ochre-sdk 0.22.4 → 0.22.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +172 -44
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -2167,54 +2167,121 @@ const CTS_INCLUDES_STOP_WORDS = new Set([
2167
2167
  "the",
2168
2168
  "to"
2169
2169
  ]);
2170
+ const CTS_INCLUDES_TOKEN_WORD_REGEX = /^\p{L}+$/u;
2171
+ const CTS_INCLUDES_TOKEN_REGEX = /[\p{L}\p{N}*?]+/gu;
2170
2172
  const CTS_INCLUDES_TOKEN_SPLIT_REGEX = /\W+/u;
2171
- const CONTENT_TARGET_CONTAINER_ELEMENTS = {
2172
- title: "identification",
2173
- description: "description",
2174
- image: "image",
2175
- periods: "period",
2176
- bibliography: "bibliography"
2173
+ const CONTENT_TARGET_CONTENT_ELEMENT_PATHS = {
2174
+ title: [
2175
+ "identification",
2176
+ "label",
2177
+ "content"
2178
+ ],
2179
+ description: ["description", "content"],
2180
+ image: [
2181
+ "image",
2182
+ "identification",
2183
+ "label",
2184
+ "content"
2185
+ ],
2186
+ periods: [
2187
+ "periods",
2188
+ "period",
2189
+ "identification",
2190
+ "label",
2191
+ "content"
2192
+ ],
2193
+ bibliography: [
2194
+ "bibliographies",
2195
+ "bibliography",
2196
+ "identification",
2197
+ "label",
2198
+ "content"
2199
+ ]
2177
2200
  };
2178
2201
  function tokenizeIncludesSearchValue(params) {
2179
2202
  const { value, isCaseSensitive } = params;
2180
- const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).split(CTS_INCLUDES_TOKEN_SPLIT_REGEX);
2203
+ const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).match(CTS_INCLUDES_TOKEN_REGEX) ?? [];
2181
2204
  const terms = [];
2182
2205
  for (const term of rawTerms) {
2206
+ if (term.includes("*") || term.includes("?")) {
2207
+ if (term.replaceAll("*", "").replaceAll("?", "") !== "") terms.push(term);
2208
+ continue;
2209
+ }
2183
2210
  const normalizedTerm = term.toLowerCase();
2184
2211
  if (normalizedTerm !== "" && !CTS_INCLUDES_STOP_WORDS.has(normalizedTerm)) terms.push(term);
2185
2212
  }
2186
2213
  return terms;
2187
2214
  }
2215
+ function tokenizeExactPhraseSearchValue(params) {
2216
+ const { value, isCaseSensitive } = params;
2217
+ const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).split(CTS_INCLUDES_TOKEN_SPLIT_REGEX);
2218
+ const terms = [];
2219
+ for (const term of rawTerms) if (term !== "") terms.push(term);
2220
+ return terms;
2221
+ }
2222
+ function hasWildcardCharacters(value) {
2223
+ return value.includes("*") || value.includes("?");
2224
+ }
2225
+ function getWildcardStrippedValue(value) {
2226
+ return value.replaceAll("*", "").replaceAll("?", "");
2227
+ }
2228
+ function shouldUseStemmedTextSearch(value) {
2229
+ const wildcardStrippedValue = getWildcardStrippedValue(value);
2230
+ return wildcardStrippedValue.length >= 3 && CTS_INCLUDES_TOKEN_WORD_REGEX.test(wildcardStrippedValue);
2231
+ }
2188
2232
  function buildCtsMatchOptionsExpression(params) {
2189
- const { isCaseSensitive } = params;
2190
- return `(${[
2233
+ const { matchMode, isCaseSensitive, queryFamily, language, isWildcarded } = params;
2234
+ const { isStemmed } = params;
2235
+ const options = [
2191
2236
  isCaseSensitive ? "case-sensitive" : "case-insensitive",
2192
- "diacritic-insensitive",
2193
- "punctuation-insensitive",
2194
- "whitespace-insensitive",
2195
- "unstemmed",
2196
- "unwildcarded"
2197
- ].map((option) => stringLiteral(option)).join(", ")})`;
2237
+ matchMode === "exact" ? "diacritic-sensitive" : "diacritic-insensitive",
2238
+ matchMode === "exact" ? "punctuation-sensitive" : "punctuation-insensitive",
2239
+ matchMode === "exact" ? "whitespace-sensitive" : "whitespace-insensitive"
2240
+ ];
2241
+ if (matchMode === "exact") options.push("unstemmed", "unwildcarded");
2242
+ else if (queryFamily === "text") {
2243
+ options.push(isStemmed ? "stemmed" : "unstemmed", isWildcarded ? "wildcarded" : "unwildcarded");
2244
+ if (isStemmed && language != null && language !== "") options.push(`lang=${language}`);
2245
+ } else options.push("unstemmed", isWildcarded ? "wildcarded" : "unwildcarded");
2246
+ return `(${options.map((option) => stringLiteral(option)).join(", ")})`;
2198
2247
  }
2199
2248
  function buildCtsWordQueryExpression(params) {
2200
- const { value, matchMode, isCaseSensitive } = params;
2249
+ const { value, matchMode, isCaseSensitive, queryFamily, language } = params;
2250
+ const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
2251
+ const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
2201
2252
  return `cts:word-query(${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
2202
2253
  matchMode,
2203
- isCaseSensitive
2254
+ isCaseSensitive,
2255
+ queryFamily,
2256
+ language,
2257
+ isWildcarded,
2258
+ isStemmed
2204
2259
  })})`;
2205
2260
  }
2206
2261
  function buildCtsElementWordQueryExpression(params) {
2207
- const { elementName, value, matchMode, isCaseSensitive } = params;
2262
+ const { elementName, value, matchMode, isCaseSensitive, queryFamily, language } = params;
2263
+ const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
2264
+ const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
2208
2265
  return `cts:element-word-query(xs:QName("${elementName}"), ${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
2209
2266
  matchMode,
2210
- isCaseSensitive
2267
+ isCaseSensitive,
2268
+ queryFamily,
2269
+ language,
2270
+ isWildcarded,
2271
+ isStemmed
2211
2272
  })})`;
2212
2273
  }
2213
2274
  function buildCtsElementAttributeWordQueryExpression(params) {
2214
- const { elementName, attributeName, value, matchMode, isCaseSensitive } = params;
2275
+ const { elementName, attributeName, value, matchMode, isCaseSensitive, queryFamily, language } = params;
2276
+ const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
2277
+ const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
2215
2278
  return `cts:element-attribute-word-query(xs:QName("${elementName}"), xs:QName("${attributeName}"), ${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
2216
2279
  matchMode,
2217
- isCaseSensitive
2280
+ isCaseSensitive,
2281
+ queryFamily,
2282
+ language,
2283
+ isWildcarded,
2284
+ isStemmed
2218
2285
  })})`;
2219
2286
  }
2220
2287
  function buildCtsElementValueQueryExpression(params) {
@@ -2236,21 +2303,36 @@ function buildPlainElementAttributeValueQueryExpression(params) {
2236
2303
  return `cts:element-attribute-value-query(xs:QName("${elementName}"), xs:QName("${attributeName}"), ${stringLiteral(value)})`;
2237
2304
  }
2238
2305
  function buildSearchableContentTextQueryExpression(params) {
2239
- const { value, matchMode, isCaseSensitive } = params;
2240
- if (matchMode === "exact") return buildCtsWordQueryExpression({
2241
- value,
2242
- matchMode,
2243
- isCaseSensitive
2244
- });
2306
+ const { value, matchMode, isCaseSensitive, language } = params;
2307
+ if (matchMode === "exact") {
2308
+ const phraseTerms = tokenizeExactPhraseSearchValue({
2309
+ value,
2310
+ isCaseSensitive
2311
+ });
2312
+ if (phraseTerms.length > 1) return buildAndCtsQueryExpressionInternal(phraseTerms.map((term) => buildCtsWordQueryExpression({
2313
+ value: term,
2314
+ matchMode,
2315
+ isCaseSensitive
2316
+ })));
2317
+ return buildCtsWordQueryExpression({
2318
+ value,
2319
+ matchMode,
2320
+ isCaseSensitive
2321
+ });
2322
+ }
2245
2323
  return buildOrCtsQueryExpressionInternal([buildCtsElementWordQueryExpression({
2246
2324
  elementName: "string",
2247
2325
  value,
2248
2326
  matchMode,
2249
- isCaseSensitive
2327
+ isCaseSensitive,
2328
+ queryFamily: "text",
2329
+ language
2250
2330
  }), buildCtsWordQueryExpression({
2251
2331
  value,
2252
2332
  matchMode,
2253
- isCaseSensitive
2333
+ isCaseSensitive,
2334
+ queryFamily: "text",
2335
+ language
2254
2336
  })]);
2255
2337
  }
2256
2338
  function buildNestedElementQuery(elementNames, queryExpression) {
@@ -2308,7 +2390,8 @@ function buildValueContentInnerQuery(params) {
2308
2390
  return buildNestedElementQuery(["content"], buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
2309
2391
  value,
2310
2392
  matchMode,
2311
- isCaseSensitive
2393
+ isCaseSensitive,
2394
+ language
2312
2395
  })]));
2313
2396
  }
2314
2397
  function buildValueDirectTextInnerQuery(params) {
@@ -2321,7 +2404,8 @@ function buildValueDirectTextInnerQuery(params) {
2321
2404
  elementName: "value",
2322
2405
  value,
2323
2406
  matchMode,
2324
- isCaseSensitive
2407
+ isCaseSensitive,
2408
+ queryFamily: "raw"
2325
2409
  });
2326
2410
  return buildAndCtsQueryExpressionInternal([buildNotCtsQueryExpression(buildNestedElementQuery(["content"], "cts:true-query()")), directTextQuery]);
2327
2411
  }
@@ -2338,7 +2422,8 @@ function buildValueRawValueInnerQuery(params) {
2338
2422
  attributeName: "rawValue",
2339
2423
  value,
2340
2424
  matchMode,
2341
- isCaseSensitive
2425
+ isCaseSensitive,
2426
+ queryFamily: "raw"
2342
2427
  });
2343
2428
  }
2344
2429
  function buildNotesQueryExpression(params) {
@@ -2357,20 +2442,24 @@ function buildNotesQueryExpression(params) {
2357
2442
  attributeName: "title",
2358
2443
  value,
2359
2444
  matchMode,
2360
- isCaseSensitive
2445
+ isCaseSensitive,
2446
+ queryFamily: "text",
2447
+ language
2361
2448
  }), buildSearchableContentTextQueryExpression({
2362
2449
  value,
2363
2450
  matchMode,
2364
- isCaseSensitive
2451
+ isCaseSensitive,
2452
+ language
2365
2453
  })])]));
2366
2454
  }
2367
2455
  function buildContentTargetQueryExpression(params) {
2368
2456
  const { target, value, matchMode, isCaseSensitive, language } = params;
2369
- const containerElement = CONTENT_TARGET_CONTAINER_ELEMENTS[target];
2370
- return buildNestedElementQuery([containerElement], buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
2457
+ const contentElementPath = CONTENT_TARGET_CONTENT_ELEMENT_PATHS[target];
2458
+ return buildNestedElementQuery(contentElementPath, buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
2371
2459
  value,
2372
2460
  matchMode,
2373
- isCaseSensitive
2461
+ isCaseSensitive,
2462
+ language
2374
2463
  })]));
2375
2464
  }
2376
2465
  function buildPropertyQueryExpression(params) {
@@ -2414,6 +2503,27 @@ function buildPropertyScalarQueryExpression(params) {
2414
2503
  }
2415
2504
  function buildPropertyAllQueryExpression(params) {
2416
2505
  const { query, value, matchMode } = params;
2506
+ if (matchMode === "includes") return buildPropertyQueryExpression({
2507
+ propertyVariable: query.propertyVariable,
2508
+ queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal([buildValueNotIdRefQuery(), buildOrCtsQueryExpressionInternal([
2509
+ buildValueContentInnerQuery({
2510
+ language: query.language,
2511
+ value,
2512
+ matchMode,
2513
+ isCaseSensitive: query.isCaseSensitive
2514
+ }),
2515
+ buildValueRawValueInnerQuery({
2516
+ value,
2517
+ matchMode,
2518
+ isCaseSensitive: query.isCaseSensitive
2519
+ }),
2520
+ buildValueDirectTextInnerQuery({
2521
+ value,
2522
+ matchMode,
2523
+ isCaseSensitive: query.isCaseSensitive
2524
+ })
2525
+ ])]))
2526
+ });
2417
2527
  return buildPropertyQueryExpression({
2418
2528
  propertyVariable: query.propertyVariable,
2419
2529
  queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal([buildValueNotIdRefQuery(), buildCtsWordQueryExpression({
@@ -2541,6 +2651,11 @@ function buildLeafQueryExpression(query) {
2541
2651
  value: searchValue,
2542
2652
  isCaseSensitive: query.isCaseSensitive
2543
2653
  });
2654
+ const fullValueQueryExpression = buildLeafValueQueryExpression({
2655
+ query,
2656
+ value: searchValue,
2657
+ matchMode: "exact"
2658
+ });
2544
2659
  if (terms.length === 0) return "cts:false-query()";
2545
2660
  const termQueryExpressions = [];
2546
2661
  for (const term of terms) termQueryExpressions.push(buildLeafValueQueryExpression({
@@ -2548,7 +2663,9 @@ function buildLeafQueryExpression(query) {
2548
2663
  value: term,
2549
2664
  matchMode: "includes"
2550
2665
  }));
2551
- return buildAndCtsQueryExpressionInternal(termQueryExpressions);
2666
+ const tokenizedQueryExpression = buildAndCtsQueryExpressionInternal(termQueryExpressions);
2667
+ if (terms.length === 1) return tokenizedQueryExpression;
2668
+ return buildOrCtsQueryExpressionInternal([fullValueQueryExpression, tokenizedQueryExpression]);
2552
2669
  }
2553
2670
  function getGroupableIncludesValue(query) {
2554
2671
  if (query.matchMode !== "includes" || query.isNegated === true) return null;
@@ -2589,11 +2706,18 @@ function getCompatibleIncludesGroupLeaves(query) {
2589
2706
  return leafQueries;
2590
2707
  }
2591
2708
  function buildIncludesGroupMember(query) {
2592
- return { buildTermQuery: (term) => buildLeafValueQueryExpression({
2593
- query,
2594
- value: term,
2595
- matchMode: "includes"
2596
- }) };
2709
+ return {
2710
+ buildTermQuery: (term) => buildLeafValueQueryExpression({
2711
+ query,
2712
+ value: term,
2713
+ matchMode: "includes"
2714
+ }),
2715
+ buildFullValueQuery: (value) => buildLeafValueQueryExpression({
2716
+ query,
2717
+ value,
2718
+ matchMode: "exact"
2719
+ })
2720
+ };
2597
2721
  }
2598
2722
  function buildIncludesGroupQueryExpression(queries) {
2599
2723
  const firstQuery = queries[0];
@@ -2607,12 +2731,16 @@ function buildIncludesGroupQueryExpression(queries) {
2607
2731
  if (terms.length === 0) return "cts:false-query()";
2608
2732
  const members = queries.map((query) => buildIncludesGroupMember(query));
2609
2733
  const perTermQueryExpressions = [];
2734
+ const fullValueFieldQueryExpressions = [];
2735
+ for (const member of members) fullValueFieldQueryExpressions.push(member.buildFullValueQuery(groupValue));
2610
2736
  for (const term of terms) {
2611
2737
  const fieldQueryExpressions = [];
2612
2738
  for (const member of members) fieldQueryExpressions.push(member.buildTermQuery(term));
2613
2739
  perTermQueryExpressions.push(buildOrCtsQueryExpressionInternal(fieldQueryExpressions));
2614
2740
  }
2615
- return buildAndCtsQueryExpressionInternal(perTermQueryExpressions);
2741
+ const tokenizedGroupQueryExpression = buildAndCtsQueryExpressionInternal(perTermQueryExpressions);
2742
+ if (terms.length === 1) return tokenizedGroupQueryExpression;
2743
+ return buildOrCtsQueryExpressionInternal([buildOrCtsQueryExpressionInternal(fullValueFieldQueryExpressions), tokenizedGroupQueryExpression]);
2616
2744
  }
2617
2745
  function buildQueryNode(query) {
2618
2746
  if (isQueryLeaf(query)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ochre-sdk",
3
- "version": "0.22.4",
3
+ "version": "0.22.5",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "description": "Node.js library for working with OCHRE (Online Cultural and Historical Research Environment) data",