pelias-schema 7.0.0 → 7.2.0
Sign up to get free protection for your applications and to get access to all the features.
- package/.github/workflows/_integration_tests.yml +2 -2
- package/.github/workflows/_unit_tests.yml +2 -2
- package/.github/workflows/push.yml +1 -1
- package/integration/analyzer_peliasPhrase.js +2 -1
- package/package.json +1 -1
- package/punctuation.js +8 -13
- package/scripts/create_index.js +1 -1
- package/test/fixtures/expected.json +12 -2
- package/test/settings.js +1 -1
@@ -7,13 +7,13 @@ jobs:
|
|
7
7
|
matrix:
|
8
8
|
os:
|
9
9
|
- ${{ vars.UBUNTU_VERSION }}
|
10
|
-
node-version: [
|
10
|
+
node-version: [18.x, 20.x, 22.x]
|
11
11
|
es-version: [7.6.1]
|
12
12
|
jdk-version: [oraclejdk11]
|
13
13
|
steps:
|
14
14
|
- uses: actions/checkout@v2
|
15
15
|
- name: Install node.js ${{ matrix.node-version }}
|
16
|
-
uses: actions/setup-node@
|
16
|
+
uses: actions/setup-node@v4
|
17
17
|
with:
|
18
18
|
node-version: ${{ matrix.node-version }}
|
19
19
|
- name: Start elasticsearch ${{ matrix.es-version }} (${{ matrix.jdk-version }})
|
@@ -7,11 +7,11 @@ jobs:
|
|
7
7
|
matrix:
|
8
8
|
os:
|
9
9
|
- ${{ vars.UBUNTU_VERSION }}
|
10
|
-
node-version: [
|
10
|
+
node-version: [18.x, 20.x, 22.x]
|
11
11
|
steps:
|
12
12
|
- uses: actions/checkout@v2
|
13
13
|
- name: Install node.js ${{ matrix.node-version }}
|
14
|
-
uses: actions/setup-node@
|
14
|
+
uses: actions/setup-node@v4
|
15
15
|
with:
|
16
16
|
node-version: ${{ matrix.node-version }}
|
17
17
|
- name: Run unit tests
|
@@ -47,7 +47,8 @@ module.exports.tests.analyze = function(test, common){
|
|
47
47
|
// remove punctuation (handled by the char_filter)
|
48
48
|
assertAnalysis( 'punctuation', punctuation.all.join(''), ['0:&', '0:and', '0:und'] );
|
49
49
|
assertAnalysis( 'punctuation', 'Hawai‘i', ['hawaii'] );
|
50
|
-
|
50
|
+
assertAnalysis( 'punctuation - « in between', '«res»pub«lika»', ['respublika'] );
|
51
|
+
|
51
52
|
assertAnalysis( 'british_american_english', 'town theatre', ['0:town', '1:theatre', '1:theater'] );
|
52
53
|
assertAnalysis( 'british_american_english', 'town theater', ['0:town', '1:theater', '1:theatre'] );
|
53
54
|
|
package/package.json
CHANGED
package/punctuation.js
CHANGED
@@ -1,23 +1,18 @@
|
|
1
1
|
// These characters will be removed from ngrams/shingles
|
2
2
|
// @see: org/apache/lucene/analysis/cn/smart/stopwords.txt
|
3
3
|
|
4
|
-
|
5
|
-
".","`","‘","-","_","=","?","'","|","\"","(",")","{","}","[","]","<",">","*",
|
6
|
-
"#","&","^","$","@","!","~",":",";","+","《","》","—","-",",","。",
|
7
|
-
"、", ":",";","!","·","?","„","“","”",")","(","【","】","[","]","●"
|
4
|
+
const all = [
|
5
|
+
".","`","‘","’","‛","-","_","=","?","'","|","\"","(",")","{","}","[","]","<",">","*",
|
6
|
+
"#","&","^","$","@","!","~",":",";","+","《","》","—","-",",","。","‹","›","⹂","〝","〞",
|
7
|
+
"、", ":",";","!","·","?","„","“","”","‟",")","(","【","】","[","]","●","«","»"
|
8
8
|
];
|
9
9
|
|
10
|
-
|
10
|
+
const allowed = [
|
11
11
|
"-", // allow hypens
|
12
12
|
"&" // allow ampersands
|
13
13
|
];
|
14
14
|
|
15
|
-
module.exports.blacklist = module.exports.all.slice();
|
16
|
-
|
17
15
|
// remove alowed chars from blacklist
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
module.exports.blacklist.splice(index, 1);
|
22
|
-
}
|
23
|
-
});
|
16
|
+
const blacklist = all.filter(s => !allowed.includes(s));
|
17
|
+
|
18
|
+
module.exports = { all, allowed, blacklist };
|
package/scripts/create_index.js
CHANGED
@@ -2277,6 +2277,8 @@
|
|
2277
2277
|
".=>",
|
2278
2278
|
"`=>",
|
2279
2279
|
"‘=>",
|
2280
|
+
"’=>",
|
2281
|
+
"‛=>",
|
2280
2282
|
"_=>",
|
2281
2283
|
"==>",
|
2282
2284
|
"?=>",
|
@@ -2307,6 +2309,11 @@
|
|
2307
2309
|
"-=>",
|
2308
2310
|
",=>",
|
2309
2311
|
"。=>",
|
2312
|
+
"‹=>",
|
2313
|
+
"›=>",
|
2314
|
+
"⹂=>",
|
2315
|
+
"〝=>",
|
2316
|
+
"〞=>",
|
2310
2317
|
"、=>",
|
2311
2318
|
":=>",
|
2312
2319
|
";=>",
|
@@ -2316,13 +2323,16 @@
|
|
2316
2323
|
"„=>",
|
2317
2324
|
"“=>",
|
2318
2325
|
"”=>",
|
2326
|
+
"‟=>",
|
2319
2327
|
")=>",
|
2320
2328
|
"(=>",
|
2321
2329
|
"【=>",
|
2322
2330
|
"】=>",
|
2323
2331
|
"[=>",
|
2324
2332
|
"]=>",
|
2325
|
-
"●=>"
|
2333
|
+
"●=>",
|
2334
|
+
"«=>",
|
2335
|
+
"»=>"
|
2326
2336
|
]
|
2327
2337
|
},
|
2328
2338
|
"alphanumeric": {
|
@@ -3023,4 +3033,4 @@
|
|
3023
3033
|
},
|
3024
3034
|
"dynamic": "strict"
|
3025
3035
|
}
|
3026
|
-
}
|
3036
|
+
}
|
package/test/settings.js
CHANGED
@@ -591,7 +591,7 @@ module.exports.tests.punctuationCharFilter = function(test, common) {
|
|
591
591
|
var char_filter = s.analysis.char_filter.punctuation;
|
592
592
|
t.equal(char_filter.type, 'mapping');
|
593
593
|
t.true(Array.isArray(char_filter.mappings));
|
594
|
-
t.equal(char_filter.mappings.length,
|
594
|
+
t.equal(char_filter.mappings.length, 59);
|
595
595
|
t.end();
|
596
596
|
});
|
597
597
|
};
|