ochre-sdk 0.22.4 → 0.22.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +172 -44
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -2167,54 +2167,121 @@ const CTS_INCLUDES_STOP_WORDS = new Set([
|
|
|
2167
2167
|
"the",
|
|
2168
2168
|
"to"
|
|
2169
2169
|
]);
|
|
2170
|
+
const CTS_INCLUDES_TOKEN_WORD_REGEX = /^\p{L}+$/u;
|
|
2171
|
+
const CTS_INCLUDES_TOKEN_REGEX = /[\p{L}\p{N}*?]+/gu;
|
|
2170
2172
|
const CTS_INCLUDES_TOKEN_SPLIT_REGEX = /\W+/u;
|
|
2171
|
-
const
|
|
2172
|
-
title:
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2173
|
+
const CONTENT_TARGET_CONTENT_ELEMENT_PATHS = {
|
|
2174
|
+
title: [
|
|
2175
|
+
"identification",
|
|
2176
|
+
"label",
|
|
2177
|
+
"content"
|
|
2178
|
+
],
|
|
2179
|
+
description: ["description", "content"],
|
|
2180
|
+
image: [
|
|
2181
|
+
"image",
|
|
2182
|
+
"identification",
|
|
2183
|
+
"label",
|
|
2184
|
+
"content"
|
|
2185
|
+
],
|
|
2186
|
+
periods: [
|
|
2187
|
+
"periods",
|
|
2188
|
+
"period",
|
|
2189
|
+
"identification",
|
|
2190
|
+
"label",
|
|
2191
|
+
"content"
|
|
2192
|
+
],
|
|
2193
|
+
bibliography: [
|
|
2194
|
+
"bibliographies",
|
|
2195
|
+
"bibliography",
|
|
2196
|
+
"identification",
|
|
2197
|
+
"label",
|
|
2198
|
+
"content"
|
|
2199
|
+
]
|
|
2177
2200
|
};
|
|
2178
2201
|
function tokenizeIncludesSearchValue(params) {
|
|
2179
2202
|
const { value, isCaseSensitive } = params;
|
|
2180
|
-
const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).
|
|
2203
|
+
const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).match(CTS_INCLUDES_TOKEN_REGEX) ?? [];
|
|
2181
2204
|
const terms = [];
|
|
2182
2205
|
for (const term of rawTerms) {
|
|
2206
|
+
if (term.includes("*") || term.includes("?")) {
|
|
2207
|
+
if (term.replaceAll("*", "").replaceAll("?", "") !== "") terms.push(term);
|
|
2208
|
+
continue;
|
|
2209
|
+
}
|
|
2183
2210
|
const normalizedTerm = term.toLowerCase();
|
|
2184
2211
|
if (normalizedTerm !== "" && !CTS_INCLUDES_STOP_WORDS.has(normalizedTerm)) terms.push(term);
|
|
2185
2212
|
}
|
|
2186
2213
|
return terms;
|
|
2187
2214
|
}
|
|
2215
|
+
function tokenizeExactPhraseSearchValue(params) {
|
|
2216
|
+
const { value, isCaseSensitive } = params;
|
|
2217
|
+
const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).split(CTS_INCLUDES_TOKEN_SPLIT_REGEX);
|
|
2218
|
+
const terms = [];
|
|
2219
|
+
for (const term of rawTerms) if (term !== "") terms.push(term);
|
|
2220
|
+
return terms;
|
|
2221
|
+
}
|
|
2222
|
+
function hasWildcardCharacters(value) {
|
|
2223
|
+
return value.includes("*") || value.includes("?");
|
|
2224
|
+
}
|
|
2225
|
+
function getWildcardStrippedValue(value) {
|
|
2226
|
+
return value.replaceAll("*", "").replaceAll("?", "");
|
|
2227
|
+
}
|
|
2228
|
+
function shouldUseStemmedTextSearch(value) {
|
|
2229
|
+
const wildcardStrippedValue = getWildcardStrippedValue(value);
|
|
2230
|
+
return wildcardStrippedValue.length >= 3 && CTS_INCLUDES_TOKEN_WORD_REGEX.test(wildcardStrippedValue);
|
|
2231
|
+
}
|
|
2188
2232
|
function buildCtsMatchOptionsExpression(params) {
|
|
2189
|
-
const { isCaseSensitive } = params;
|
|
2190
|
-
|
|
2233
|
+
const { matchMode, isCaseSensitive, queryFamily, language, isWildcarded } = params;
|
|
2234
|
+
const { isStemmed } = params;
|
|
2235
|
+
const options = [
|
|
2191
2236
|
isCaseSensitive ? "case-sensitive" : "case-insensitive",
|
|
2192
|
-
"diacritic-insensitive",
|
|
2193
|
-
"punctuation-insensitive",
|
|
2194
|
-
"whitespace-insensitive"
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2237
|
+
matchMode === "exact" ? "diacritic-sensitive" : "diacritic-insensitive",
|
|
2238
|
+
matchMode === "exact" ? "punctuation-sensitive" : "punctuation-insensitive",
|
|
2239
|
+
matchMode === "exact" ? "whitespace-sensitive" : "whitespace-insensitive"
|
|
2240
|
+
];
|
|
2241
|
+
if (matchMode === "exact") options.push("unstemmed", "unwildcarded");
|
|
2242
|
+
else if (queryFamily === "text") {
|
|
2243
|
+
options.push(isStemmed ? "stemmed" : "unstemmed", isWildcarded ? "wildcarded" : "unwildcarded");
|
|
2244
|
+
if (isStemmed && language != null && language !== "") options.push(`lang=${language}`);
|
|
2245
|
+
} else options.push("unstemmed", isWildcarded ? "wildcarded" : "unwildcarded");
|
|
2246
|
+
return `(${options.map((option) => stringLiteral(option)).join(", ")})`;
|
|
2198
2247
|
}
|
|
2199
2248
|
function buildCtsWordQueryExpression(params) {
|
|
2200
|
-
const { value, matchMode, isCaseSensitive } = params;
|
|
2249
|
+
const { value, matchMode, isCaseSensitive, queryFamily, language } = params;
|
|
2250
|
+
const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
|
|
2251
|
+
const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
|
|
2201
2252
|
return `cts:word-query(${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
|
|
2202
2253
|
matchMode,
|
|
2203
|
-
isCaseSensitive
|
|
2254
|
+
isCaseSensitive,
|
|
2255
|
+
queryFamily,
|
|
2256
|
+
language,
|
|
2257
|
+
isWildcarded,
|
|
2258
|
+
isStemmed
|
|
2204
2259
|
})})`;
|
|
2205
2260
|
}
|
|
2206
2261
|
function buildCtsElementWordQueryExpression(params) {
|
|
2207
|
-
const { elementName, value, matchMode, isCaseSensitive } = params;
|
|
2262
|
+
const { elementName, value, matchMode, isCaseSensitive, queryFamily, language } = params;
|
|
2263
|
+
const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
|
|
2264
|
+
const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
|
|
2208
2265
|
return `cts:element-word-query(xs:QName("${elementName}"), ${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
|
|
2209
2266
|
matchMode,
|
|
2210
|
-
isCaseSensitive
|
|
2267
|
+
isCaseSensitive,
|
|
2268
|
+
queryFamily,
|
|
2269
|
+
language,
|
|
2270
|
+
isWildcarded,
|
|
2271
|
+
isStemmed
|
|
2211
2272
|
})})`;
|
|
2212
2273
|
}
|
|
2213
2274
|
function buildCtsElementAttributeWordQueryExpression(params) {
|
|
2214
|
-
const { elementName, attributeName, value, matchMode, isCaseSensitive } = params;
|
|
2275
|
+
const { elementName, attributeName, value, matchMode, isCaseSensitive, queryFamily, language } = params;
|
|
2276
|
+
const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
|
|
2277
|
+
const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
|
|
2215
2278
|
return `cts:element-attribute-word-query(xs:QName("${elementName}"), xs:QName("${attributeName}"), ${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
|
|
2216
2279
|
matchMode,
|
|
2217
|
-
isCaseSensitive
|
|
2280
|
+
isCaseSensitive,
|
|
2281
|
+
queryFamily,
|
|
2282
|
+
language,
|
|
2283
|
+
isWildcarded,
|
|
2284
|
+
isStemmed
|
|
2218
2285
|
})})`;
|
|
2219
2286
|
}
|
|
2220
2287
|
function buildCtsElementValueQueryExpression(params) {
|
|
@@ -2236,21 +2303,36 @@ function buildPlainElementAttributeValueQueryExpression(params) {
|
|
|
2236
2303
|
return `cts:element-attribute-value-query(xs:QName("${elementName}"), xs:QName("${attributeName}"), ${stringLiteral(value)})`;
|
|
2237
2304
|
}
|
|
2238
2305
|
function buildSearchableContentTextQueryExpression(params) {
|
|
2239
|
-
const { value, matchMode, isCaseSensitive } = params;
|
|
2240
|
-
if (matchMode === "exact")
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2306
|
+
const { value, matchMode, isCaseSensitive, language } = params;
|
|
2307
|
+
if (matchMode === "exact") {
|
|
2308
|
+
const phraseTerms = tokenizeExactPhraseSearchValue({
|
|
2309
|
+
value,
|
|
2310
|
+
isCaseSensitive
|
|
2311
|
+
});
|
|
2312
|
+
if (phraseTerms.length > 1) return buildAndCtsQueryExpressionInternal(phraseTerms.map((term) => buildCtsWordQueryExpression({
|
|
2313
|
+
value: term,
|
|
2314
|
+
matchMode,
|
|
2315
|
+
isCaseSensitive
|
|
2316
|
+
})));
|
|
2317
|
+
return buildCtsWordQueryExpression({
|
|
2318
|
+
value,
|
|
2319
|
+
matchMode,
|
|
2320
|
+
isCaseSensitive
|
|
2321
|
+
});
|
|
2322
|
+
}
|
|
2245
2323
|
return buildOrCtsQueryExpressionInternal([buildCtsElementWordQueryExpression({
|
|
2246
2324
|
elementName: "string",
|
|
2247
2325
|
value,
|
|
2248
2326
|
matchMode,
|
|
2249
|
-
isCaseSensitive
|
|
2327
|
+
isCaseSensitive,
|
|
2328
|
+
queryFamily: "text",
|
|
2329
|
+
language
|
|
2250
2330
|
}), buildCtsWordQueryExpression({
|
|
2251
2331
|
value,
|
|
2252
2332
|
matchMode,
|
|
2253
|
-
isCaseSensitive
|
|
2333
|
+
isCaseSensitive,
|
|
2334
|
+
queryFamily: "text",
|
|
2335
|
+
language
|
|
2254
2336
|
})]);
|
|
2255
2337
|
}
|
|
2256
2338
|
function buildNestedElementQuery(elementNames, queryExpression) {
|
|
@@ -2308,7 +2390,8 @@ function buildValueContentInnerQuery(params) {
|
|
|
2308
2390
|
return buildNestedElementQuery(["content"], buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
|
|
2309
2391
|
value,
|
|
2310
2392
|
matchMode,
|
|
2311
|
-
isCaseSensitive
|
|
2393
|
+
isCaseSensitive,
|
|
2394
|
+
language
|
|
2312
2395
|
})]));
|
|
2313
2396
|
}
|
|
2314
2397
|
function buildValueDirectTextInnerQuery(params) {
|
|
@@ -2321,7 +2404,8 @@ function buildValueDirectTextInnerQuery(params) {
|
|
|
2321
2404
|
elementName: "value",
|
|
2322
2405
|
value,
|
|
2323
2406
|
matchMode,
|
|
2324
|
-
isCaseSensitive
|
|
2407
|
+
isCaseSensitive,
|
|
2408
|
+
queryFamily: "raw"
|
|
2325
2409
|
});
|
|
2326
2410
|
return buildAndCtsQueryExpressionInternal([buildNotCtsQueryExpression(buildNestedElementQuery(["content"], "cts:true-query()")), directTextQuery]);
|
|
2327
2411
|
}
|
|
@@ -2338,7 +2422,8 @@ function buildValueRawValueInnerQuery(params) {
|
|
|
2338
2422
|
attributeName: "rawValue",
|
|
2339
2423
|
value,
|
|
2340
2424
|
matchMode,
|
|
2341
|
-
isCaseSensitive
|
|
2425
|
+
isCaseSensitive,
|
|
2426
|
+
queryFamily: "raw"
|
|
2342
2427
|
});
|
|
2343
2428
|
}
|
|
2344
2429
|
function buildNotesQueryExpression(params) {
|
|
@@ -2357,20 +2442,24 @@ function buildNotesQueryExpression(params) {
|
|
|
2357
2442
|
attributeName: "title",
|
|
2358
2443
|
value,
|
|
2359
2444
|
matchMode,
|
|
2360
|
-
isCaseSensitive
|
|
2445
|
+
isCaseSensitive,
|
|
2446
|
+
queryFamily: "text",
|
|
2447
|
+
language
|
|
2361
2448
|
}), buildSearchableContentTextQueryExpression({
|
|
2362
2449
|
value,
|
|
2363
2450
|
matchMode,
|
|
2364
|
-
isCaseSensitive
|
|
2451
|
+
isCaseSensitive,
|
|
2452
|
+
language
|
|
2365
2453
|
})])]));
|
|
2366
2454
|
}
|
|
2367
2455
|
function buildContentTargetQueryExpression(params) {
|
|
2368
2456
|
const { target, value, matchMode, isCaseSensitive, language } = params;
|
|
2369
|
-
const
|
|
2370
|
-
return buildNestedElementQuery(
|
|
2457
|
+
const contentElementPath = CONTENT_TARGET_CONTENT_ELEMENT_PATHS[target];
|
|
2458
|
+
return buildNestedElementQuery(contentElementPath, buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
|
|
2371
2459
|
value,
|
|
2372
2460
|
matchMode,
|
|
2373
|
-
isCaseSensitive
|
|
2461
|
+
isCaseSensitive,
|
|
2462
|
+
language
|
|
2374
2463
|
})]));
|
|
2375
2464
|
}
|
|
2376
2465
|
function buildPropertyQueryExpression(params) {
|
|
@@ -2414,6 +2503,27 @@ function buildPropertyScalarQueryExpression(params) {
|
|
|
2414
2503
|
}
|
|
2415
2504
|
function buildPropertyAllQueryExpression(params) {
|
|
2416
2505
|
const { query, value, matchMode } = params;
|
|
2506
|
+
if (matchMode === "includes") return buildPropertyQueryExpression({
|
|
2507
|
+
propertyVariable: query.propertyVariable,
|
|
2508
|
+
queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal([buildValueNotIdRefQuery(), buildOrCtsQueryExpressionInternal([
|
|
2509
|
+
buildValueContentInnerQuery({
|
|
2510
|
+
language: query.language,
|
|
2511
|
+
value,
|
|
2512
|
+
matchMode,
|
|
2513
|
+
isCaseSensitive: query.isCaseSensitive
|
|
2514
|
+
}),
|
|
2515
|
+
buildValueRawValueInnerQuery({
|
|
2516
|
+
value,
|
|
2517
|
+
matchMode,
|
|
2518
|
+
isCaseSensitive: query.isCaseSensitive
|
|
2519
|
+
}),
|
|
2520
|
+
buildValueDirectTextInnerQuery({
|
|
2521
|
+
value,
|
|
2522
|
+
matchMode,
|
|
2523
|
+
isCaseSensitive: query.isCaseSensitive
|
|
2524
|
+
})
|
|
2525
|
+
])]))
|
|
2526
|
+
});
|
|
2417
2527
|
return buildPropertyQueryExpression({
|
|
2418
2528
|
propertyVariable: query.propertyVariable,
|
|
2419
2529
|
queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal([buildValueNotIdRefQuery(), buildCtsWordQueryExpression({
|
|
@@ -2541,6 +2651,11 @@ function buildLeafQueryExpression(query) {
|
|
|
2541
2651
|
value: searchValue,
|
|
2542
2652
|
isCaseSensitive: query.isCaseSensitive
|
|
2543
2653
|
});
|
|
2654
|
+
const fullValueQueryExpression = buildLeafValueQueryExpression({
|
|
2655
|
+
query,
|
|
2656
|
+
value: searchValue,
|
|
2657
|
+
matchMode: "exact"
|
|
2658
|
+
});
|
|
2544
2659
|
if (terms.length === 0) return "cts:false-query()";
|
|
2545
2660
|
const termQueryExpressions = [];
|
|
2546
2661
|
for (const term of terms) termQueryExpressions.push(buildLeafValueQueryExpression({
|
|
@@ -2548,7 +2663,9 @@ function buildLeafQueryExpression(query) {
|
|
|
2548
2663
|
value: term,
|
|
2549
2664
|
matchMode: "includes"
|
|
2550
2665
|
}));
|
|
2551
|
-
|
|
2666
|
+
const tokenizedQueryExpression = buildAndCtsQueryExpressionInternal(termQueryExpressions);
|
|
2667
|
+
if (terms.length === 1) return tokenizedQueryExpression;
|
|
2668
|
+
return buildOrCtsQueryExpressionInternal([fullValueQueryExpression, tokenizedQueryExpression]);
|
|
2552
2669
|
}
|
|
2553
2670
|
function getGroupableIncludesValue(query) {
|
|
2554
2671
|
if (query.matchMode !== "includes" || query.isNegated === true) return null;
|
|
@@ -2589,11 +2706,18 @@ function getCompatibleIncludesGroupLeaves(query) {
|
|
|
2589
2706
|
return leafQueries;
|
|
2590
2707
|
}
|
|
2591
2708
|
function buildIncludesGroupMember(query) {
|
|
2592
|
-
return {
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2709
|
+
return {
|
|
2710
|
+
buildTermQuery: (term) => buildLeafValueQueryExpression({
|
|
2711
|
+
query,
|
|
2712
|
+
value: term,
|
|
2713
|
+
matchMode: "includes"
|
|
2714
|
+
}),
|
|
2715
|
+
buildFullValueQuery: (value) => buildLeafValueQueryExpression({
|
|
2716
|
+
query,
|
|
2717
|
+
value,
|
|
2718
|
+
matchMode: "exact"
|
|
2719
|
+
})
|
|
2720
|
+
};
|
|
2597
2721
|
}
|
|
2598
2722
|
function buildIncludesGroupQueryExpression(queries) {
|
|
2599
2723
|
const firstQuery = queries[0];
|
|
@@ -2607,12 +2731,16 @@ function buildIncludesGroupQueryExpression(queries) {
|
|
|
2607
2731
|
if (terms.length === 0) return "cts:false-query()";
|
|
2608
2732
|
const members = queries.map((query) => buildIncludesGroupMember(query));
|
|
2609
2733
|
const perTermQueryExpressions = [];
|
|
2734
|
+
const fullValueFieldQueryExpressions = [];
|
|
2735
|
+
for (const member of members) fullValueFieldQueryExpressions.push(member.buildFullValueQuery(groupValue));
|
|
2610
2736
|
for (const term of terms) {
|
|
2611
2737
|
const fieldQueryExpressions = [];
|
|
2612
2738
|
for (const member of members) fieldQueryExpressions.push(member.buildTermQuery(term));
|
|
2613
2739
|
perTermQueryExpressions.push(buildOrCtsQueryExpressionInternal(fieldQueryExpressions));
|
|
2614
2740
|
}
|
|
2615
|
-
|
|
2741
|
+
const tokenizedGroupQueryExpression = buildAndCtsQueryExpressionInternal(perTermQueryExpressions);
|
|
2742
|
+
if (terms.length === 1) return tokenizedGroupQueryExpression;
|
|
2743
|
+
return buildOrCtsQueryExpressionInternal([buildOrCtsQueryExpressionInternal(fullValueFieldQueryExpressions), tokenizedGroupQueryExpression]);
|
|
2616
2744
|
}
|
|
2617
2745
|
function buildQueryNode(query) {
|
|
2618
2746
|
if (isQueryLeaf(query)) {
|