pelias-schema 8.0.0 → 8.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,7 +55,6 @@
55
55
  "name_synonyms_multiplexer",
56
56
  "icu_folding",
57
57
  "remove_ordinals",
58
- "removeAllZeroNumericPrefix",
59
58
  "peliasOneEdgeGramFilter",
60
59
  "unique_only_same_position",
61
60
  "notnull",
@@ -74,7 +73,6 @@
74
73
  "trim",
75
74
  "icu_folding",
76
75
  "remove_ordinals",
77
- "removeAllZeroNumericPrefix",
78
76
  "unique_only_same_position",
79
77
  "notnull"
80
78
  ]
@@ -96,7 +94,6 @@
96
94
  "name_synonyms_multiplexer",
97
95
  "icu_folding",
98
96
  "remove_ordinals",
99
- "removeAllZeroNumericPrefix",
100
97
  "unique_only_same_position",
101
98
  "notnull",
102
99
  "flatten_graph"
@@ -252,11 +249,6 @@
252
249
  "min_gram": 1,
253
250
  "max_gram": 24
254
251
  },
255
- "removeAllZeroNumericPrefix": {
256
- "type": "pattern_replace",
257
- "pattern": "^(0*)",
258
- "replacement": ""
259
- },
260
252
  "remove_ordinals": {
261
253
  "type": "pattern_replace",
262
254
  "pattern": "(?i)((^| )((1)st?|(2)nd?|(3)rd?|([4-9])th?)|(([0-9]*)(1[0-9])th?)|(([0-9]*[02-9])((1)st?|(2)nd?|(3)rd?|([04-9])th?))($| ))",
package/test/settings.js CHANGED
@@ -1,6 +1,7 @@
1
- var path = require('path'),
1
+ const path = require('path'),
2
2
  settings = require('../settings'),
3
- fs = require('fs');
3
+ fs = require('fs'),
4
+ config = require('pelias-config').generate();
4
5
 
5
6
  module.exports.tests = {};
6
7
 
@@ -48,6 +49,20 @@ module.exports.tests.analysis = function(test, common) {
48
49
  });
49
50
  };
50
51
 
52
+ function mayBeAmpersandMapper() {
53
+ if (config.schema.icuTokenizer) {
54
+ return ['ampersand_mapper'];
55
+ }
56
+ return [];
57
+ }
58
+
59
+ function mayBeAmpersandReplacer() {
60
+ if (config.schema.icuTokenizer) {
61
+ return ['ampersand_replacer'];
62
+ }
63
+ return [];
64
+ }
65
+
51
66
  // -- analyzers --
52
67
 
53
68
  module.exports.tests.peliasAdminAnalyzer = function(test, common) {
@@ -57,13 +72,13 @@ module.exports.tests.peliasAdminAnalyzer = function(test, common) {
57
72
  var analyzer = s.analysis.analyzer.peliasAdmin;
58
73
  t.equal(analyzer.type, 'custom', 'custom analyzer');
59
74
  t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
60
- t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified');
75
+ t.deepEqual(analyzer.char_filter, [...mayBeAmpersandMapper(), 'punctuation', 'nfkc_normalizer'], 'character filters specified');
61
76
  t.true(Array.isArray(analyzer.filter), 'filters specified');
62
77
  t.end();
63
78
  });
64
79
  test('peliasAdmin token filters', function (t) {
65
80
  var analyzer = settings().analysis.analyzer.peliasAdmin;
66
- t.deepEqual(analyzer.filter, [
81
+ t.deepEqual(analyzer.filter, [...mayBeAmpersandReplacer(),
67
82
  "lowercase",
68
83
  "trim",
69
84
  "synonyms/custom_admin/multiword",
@@ -85,13 +100,14 @@ module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) {
85
100
  var analyzer = s.analysis.analyzer.peliasIndexOneEdgeGram;
86
101
  t.equal(analyzer.type, 'custom', 'custom analyzer');
87
102
  t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
88
- t.deepEqual(analyzer.char_filter, ["punctuation","nfkc_normalizer"], 'character filters specified');
103
+ t.deepEqual(analyzer.char_filter, [...mayBeAmpersandMapper(), "punctuation","nfkc_normalizer"], 'character filters specified');
89
104
  t.true(Array.isArray(analyzer.filter), 'filters specified');
90
105
  t.end();
91
106
  });
92
107
  test('peliasIndexOneEdgeGram token filters', function(t) {
93
108
  var analyzer = settings().analysis.analyzer.peliasIndexOneEdgeGram;
94
109
  t.deepEqual( analyzer.filter, [
110
+ ...mayBeAmpersandReplacer(),
95
111
  "lowercase",
96
112
  "trim",
97
113
  "synonyms/custom_name/multiword",
@@ -100,7 +116,6 @@ module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) {
100
116
  "name_synonyms_multiplexer",
101
117
  "icu_folding",
102
118
  "remove_ordinals",
103
- "removeAllZeroNumericPrefix",
104
119
  "peliasOneEdgeGramFilter",
105
120
  "unique_only_same_position",
106
121
  "notnull",
@@ -117,18 +132,18 @@ module.exports.tests.peliasQueryAnalyzer = function (test, common) {
117
132
  var analyzer = s.analysis.analyzer.peliasQuery;
118
133
  t.equal(analyzer.type, 'custom', 'custom analyzer');
119
134
  t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
120
- t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified');
135
+ t.deepEqual(analyzer.char_filter, [...mayBeAmpersandMapper(), 'punctuation', 'nfkc_normalizer'], 'character filters specified');
121
136
  t.true(Array.isArray(analyzer.filter), 'filters specified');
122
137
  t.end();
123
138
  });
124
139
  test('peliasQuery token filters', function (t) {
125
140
  var analyzer = settings().analysis.analyzer.peliasQuery;
126
141
  t.deepEqual(analyzer.filter, [
142
+ ...mayBeAmpersandReplacer(),
127
143
  'lowercase',
128
144
  'trim',
129
145
  'icu_folding',
130
146
  'remove_ordinals',
131
- 'removeAllZeroNumericPrefix',
132
147
  'unique_only_same_position',
133
148
  'notnull'
134
149
  ]);
@@ -143,13 +158,14 @@ module.exports.tests.peliasPhraseAnalyzer = function(test, common) {
143
158
  var analyzer = s.analysis.analyzer.peliasPhrase;
144
159
  t.equal(analyzer.type, 'custom', 'custom analyzer');
145
160
  t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
146
- t.deepEqual(analyzer.char_filter, ["punctuation","nfkc_normalizer"], 'character filters specified');
161
+ t.deepEqual(analyzer.char_filter, [...mayBeAmpersandMapper(), "punctuation", "nfkc_normalizer"], 'character filters specified');
147
162
  t.true(Array.isArray(analyzer.filter), 'filters specified');
148
163
  t.end();
149
164
  });
150
165
  test('peliasPhrase token filters', function(t) {
151
166
  var analyzer = settings().analysis.analyzer.peliasPhrase;
152
167
  t.deepEqual( analyzer.filter, [
168
+ ...mayBeAmpersandReplacer(),
153
169
  "lowercase",
154
170
  "trim",
155
171
  "remove_duplicate_spaces",
@@ -159,7 +175,6 @@ module.exports.tests.peliasPhraseAnalyzer = function(test, common) {
159
175
  "name_synonyms_multiplexer",
160
176
  "icu_folding",
161
177
  "remove_ordinals",
162
- "removeAllZeroNumericPrefix",
163
178
  "unique_only_same_position",
164
179
  "notnull",
165
180
  "flatten_graph"
@@ -236,13 +251,13 @@ module.exports.tests.peliasStreetAnalyzer = function(test, common) {
236
251
  var analyzer = s.analysis.analyzer.peliasStreet;
237
252
  t.equal(analyzer.type, 'custom', 'custom analyzer');
238
253
  t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
239
- t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified');
254
+ t.deepEqual(analyzer.char_filter, [...mayBeAmpersandMapper(), 'punctuation', 'nfkc_normalizer'], 'character filters specified');
240
255
  t.true(Array.isArray(analyzer.filter), 'filters specified');
241
256
  t.end();
242
257
  });
243
258
  test('peliasStreet token filters', function(t) {
244
259
  var analyzer = settings().analysis.analyzer.peliasStreet;
245
- t.deepEqual( analyzer.filter, [
260
+ t.deepEqual( analyzer.filter, [...mayBeAmpersandReplacer(),
246
261
  "lowercase",
247
262
  "trim",
248
263
  "remove_duplicate_spaces",
@@ -266,13 +281,13 @@ module.exports.tests.peliasIndexCountryAbbreviation = function (test, common) {
266
281
  var analyzer = s.analysis.analyzer.peliasIndexCountryAbbreviation;
267
282
  t.equal(analyzer.type, 'custom', 'custom analyzer');
268
283
  t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
269
- t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified');
284
+ t.deepEqual(analyzer.char_filter, [...mayBeAmpersandMapper(), 'punctuation', 'nfkc_normalizer'], 'character filters specified');
270
285
  t.true(Array.isArray(analyzer.filter), 'filters specified');
271
286
  t.end();
272
287
  });
273
288
  test('peliasIndexCountryAbbreviation token filters', function (t) {
274
289
  var analyzer = settings().analysis.analyzer.peliasIndexCountryAbbreviation;
275
- t.deepEqual(analyzer.filter, [
290
+ t.deepEqual(analyzer.filter, [...mayBeAmpersandReplacer(),
276
291
  "lowercase",
277
292
  "trim",
278
293
  "icu_folding",
@@ -292,13 +307,14 @@ module.exports.tests.peliasIndexCountryAbbreviationOneEdgeGramAnalyzer = functio
292
307
  var analyzer = s.analysis.analyzer.peliasIndexCountryAbbreviationOneEdgeGram;
293
308
  t.equal(analyzer.type, 'custom', 'custom analyzer');
294
309
  t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified');
295
- t.deepEqual(analyzer.char_filter, ["punctuation", "nfkc_normalizer"], 'character filters specified');
310
+ t.deepEqual(analyzer.char_filter, [...mayBeAmpersandMapper(), "punctuation", "nfkc_normalizer"], 'character filters specified');
296
311
  t.true(Array.isArray(analyzer.filter), 'filters specified');
297
312
  t.end();
298
313
  });
299
314
  test('peliasIndexCountryAbbreviationOneEdgeGram token filters', function (t) {
300
315
  var analyzer = settings().analysis.analyzer.peliasIndexCountryAbbreviationOneEdgeGram;
301
316
  t.deepEqual(analyzer.filter, [
317
+ ...mayBeAmpersandReplacer(),
302
318
  "lowercase",
303
319
  "trim",
304
320
  "icu_folding",
@@ -497,19 +513,6 @@ module.exports.tests.peliasOneEdgeGramFilter = function(test, common) {
497
513
  });
498
514
  };
499
515
 
500
- // this filter removed leading 0 characters. eg. 0001 -> 1
501
- module.exports.tests.removeAllZeroNumericPrefixFilter = function(test, common) {
502
- test('has removeAllZeroNumericPrefix filter', function(t) {
503
- var s = settings();
504
- t.equal(typeof s.analysis.filter.removeAllZeroNumericPrefix, 'object', 'there is a removeAllZeroNumericPrefix filter');
505
- var filter = s.analysis.filter.removeAllZeroNumericPrefix;
506
- t.equal(filter.type, 'pattern_replace');
507
- t.equal(filter.pattern, '^(0*)');
508
- t.equal(filter.replacement, '');
509
- t.end();
510
- });
511
- };
512
-
513
516
  // this filter provides synonyms for street suffixes
514
517
  // eg. road=>rd
515
518
  module.exports.tests.streetSynonymFilter = function(test, common) {