ochre-sdk 0.22.3 → 0.22.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +191 -60
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -2167,54 +2167,121 @@ const CTS_INCLUDES_STOP_WORDS = new Set([
2167
2167
  "the",
2168
2168
  "to"
2169
2169
  ]);
2170
+ const CTS_INCLUDES_TOKEN_WORD_REGEX = /^\p{L}+$/u;
2171
+ const CTS_INCLUDES_TOKEN_REGEX = /[\p{L}\p{N}*?]+/gu;
2170
2172
  const CTS_INCLUDES_TOKEN_SPLIT_REGEX = /\W+/u;
2171
- const CONTENT_TARGET_CONTAINER_ELEMENTS = {
2172
- title: "identification",
2173
- description: "description",
2174
- image: "image",
2175
- periods: "period",
2176
- bibliography: "bibliography"
2173
+ const CONTENT_TARGET_CONTENT_ELEMENT_PATHS = {
2174
+ title: [
2175
+ "identification",
2176
+ "label",
2177
+ "content"
2178
+ ],
2179
+ description: ["description", "content"],
2180
+ image: [
2181
+ "image",
2182
+ "identification",
2183
+ "label",
2184
+ "content"
2185
+ ],
2186
+ periods: [
2187
+ "periods",
2188
+ "period",
2189
+ "identification",
2190
+ "label",
2191
+ "content"
2192
+ ],
2193
+ bibliography: [
2194
+ "bibliographies",
2195
+ "bibliography",
2196
+ "identification",
2197
+ "label",
2198
+ "content"
2199
+ ]
2177
2200
  };
2178
2201
  function tokenizeIncludesSearchValue(params) {
2179
2202
  const { value, isCaseSensitive } = params;
2180
- const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).split(CTS_INCLUDES_TOKEN_SPLIT_REGEX);
2203
+ const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).match(CTS_INCLUDES_TOKEN_REGEX) ?? [];
2181
2204
  const terms = [];
2182
2205
  for (const term of rawTerms) {
2206
+ if (term.includes("*") || term.includes("?")) {
2207
+ if (term.replaceAll("*", "").replaceAll("?", "") !== "") terms.push(term);
2208
+ continue;
2209
+ }
2183
2210
  const normalizedTerm = term.toLowerCase();
2184
2211
  if (normalizedTerm !== "" && !CTS_INCLUDES_STOP_WORDS.has(normalizedTerm)) terms.push(term);
2185
2212
  }
2186
2213
  return terms;
2187
2214
  }
2215
+ function tokenizeExactPhraseSearchValue(params) {
2216
+ const { value, isCaseSensitive } = params;
2217
+ const rawTerms = (isCaseSensitive ? value : value.toLowerCase()).split(CTS_INCLUDES_TOKEN_SPLIT_REGEX);
2218
+ const terms = [];
2219
+ for (const term of rawTerms) if (term !== "") terms.push(term);
2220
+ return terms;
2221
+ }
2222
+ function hasWildcardCharacters(value) {
2223
+ return value.includes("*") || value.includes("?");
2224
+ }
2225
+ function getWildcardStrippedValue(value) {
2226
+ return value.replaceAll("*", "").replaceAll("?", "");
2227
+ }
2228
+ function shouldUseStemmedTextSearch(value) {
2229
+ const wildcardStrippedValue = getWildcardStrippedValue(value);
2230
+ return wildcardStrippedValue.length >= 3 && CTS_INCLUDES_TOKEN_WORD_REGEX.test(wildcardStrippedValue);
2231
+ }
2188
2232
  function buildCtsMatchOptionsExpression(params) {
2189
- const { isCaseSensitive } = params;
2190
- return `(${[
2233
+ const { matchMode, isCaseSensitive, queryFamily, language, isWildcarded } = params;
2234
+ const { isStemmed } = params;
2235
+ const options = [
2191
2236
  isCaseSensitive ? "case-sensitive" : "case-insensitive",
2192
- "diacritic-insensitive",
2193
- "punctuation-insensitive",
2194
- "whitespace-insensitive",
2195
- "unstemmed",
2196
- "unwildcarded"
2197
- ].map((option) => stringLiteral(option)).join(", ")})`;
2237
+ matchMode === "exact" ? "diacritic-sensitive" : "diacritic-insensitive",
2238
+ matchMode === "exact" ? "punctuation-sensitive" : "punctuation-insensitive",
2239
+ matchMode === "exact" ? "whitespace-sensitive" : "whitespace-insensitive"
2240
+ ];
2241
+ if (matchMode === "exact") options.push("unstemmed", "unwildcarded");
2242
+ else if (queryFamily === "text") {
2243
+ options.push(isStemmed ? "stemmed" : "unstemmed", isWildcarded ? "wildcarded" : "unwildcarded");
2244
+ if (isStemmed && language != null && language !== "") options.push(`lang=${language}`);
2245
+ } else options.push("unstemmed", isWildcarded ? "wildcarded" : "unwildcarded");
2246
+ return `(${options.map((option) => stringLiteral(option)).join(", ")})`;
2198
2247
  }
2199
2248
  function buildCtsWordQueryExpression(params) {
2200
- const { value, matchMode, isCaseSensitive } = params;
2249
+ const { value, matchMode, isCaseSensitive, queryFamily, language } = params;
2250
+ const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
2251
+ const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
2201
2252
  return `cts:word-query(${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
2202
2253
  matchMode,
2203
- isCaseSensitive
2254
+ isCaseSensitive,
2255
+ queryFamily,
2256
+ language,
2257
+ isWildcarded,
2258
+ isStemmed
2204
2259
  })})`;
2205
2260
  }
2206
2261
  function buildCtsElementWordQueryExpression(params) {
2207
- const { elementName, value, matchMode, isCaseSensitive } = params;
2262
+ const { elementName, value, matchMode, isCaseSensitive, queryFamily, language } = params;
2263
+ const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
2264
+ const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
2208
2265
  return `cts:element-word-query(xs:QName("${elementName}"), ${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
2209
2266
  matchMode,
2210
- isCaseSensitive
2267
+ isCaseSensitive,
2268
+ queryFamily,
2269
+ language,
2270
+ isWildcarded,
2271
+ isStemmed
2211
2272
  })})`;
2212
2273
  }
2213
2274
  function buildCtsElementAttributeWordQueryExpression(params) {
2214
- const { elementName, attributeName, value, matchMode, isCaseSensitive } = params;
2275
+ const { elementName, attributeName, value, matchMode, isCaseSensitive, queryFamily, language } = params;
2276
+ const isWildcarded = matchMode === "includes" && hasWildcardCharacters(value);
2277
+ const isStemmed = matchMode === "includes" && queryFamily === "text" && !isWildcarded && shouldUseStemmedTextSearch(value);
2215
2278
  return `cts:element-attribute-word-query(xs:QName("${elementName}"), xs:QName("${attributeName}"), ${stringLiteral(value)}, ${buildCtsMatchOptionsExpression({
2216
2279
  matchMode,
2217
- isCaseSensitive
2280
+ isCaseSensitive,
2281
+ queryFamily,
2282
+ language,
2283
+ isWildcarded,
2284
+ isStemmed
2218
2285
  })})`;
2219
2286
  }
2220
2287
  function buildCtsElementValueQueryExpression(params) {
@@ -2236,21 +2303,36 @@ function buildPlainElementAttributeValueQueryExpression(params) {
2236
2303
  return `cts:element-attribute-value-query(xs:QName("${elementName}"), xs:QName("${attributeName}"), ${stringLiteral(value)})`;
2237
2304
  }
2238
2305
  function buildSearchableContentTextQueryExpression(params) {
2239
- const { value, matchMode, isCaseSensitive } = params;
2240
- if (matchMode === "exact") return buildCtsWordQueryExpression({
2241
- value,
2242
- matchMode,
2243
- isCaseSensitive
2244
- });
2306
+ const { value, matchMode, isCaseSensitive, language } = params;
2307
+ if (matchMode === "exact") {
2308
+ const phraseTerms = tokenizeExactPhraseSearchValue({
2309
+ value,
2310
+ isCaseSensitive
2311
+ });
2312
+ if (phraseTerms.length > 1) return buildAndCtsQueryExpressionInternal(phraseTerms.map((term) => buildCtsWordQueryExpression({
2313
+ value: term,
2314
+ matchMode,
2315
+ isCaseSensitive
2316
+ })));
2317
+ return buildCtsWordQueryExpression({
2318
+ value,
2319
+ matchMode,
2320
+ isCaseSensitive
2321
+ });
2322
+ }
2245
2323
  return buildOrCtsQueryExpressionInternal([buildCtsElementWordQueryExpression({
2246
2324
  elementName: "string",
2247
2325
  value,
2248
2326
  matchMode,
2249
- isCaseSensitive
2327
+ isCaseSensitive,
2328
+ queryFamily: "text",
2329
+ language
2250
2330
  }), buildCtsWordQueryExpression({
2251
2331
  value,
2252
2332
  matchMode,
2253
- isCaseSensitive
2333
+ isCaseSensitive,
2334
+ queryFamily: "text",
2335
+ language
2254
2336
  })]);
2255
2337
  }
2256
2338
  function buildNestedElementQuery(elementNames, queryExpression) {
@@ -2308,7 +2390,8 @@ function buildValueContentInnerQuery(params) {
2308
2390
  return buildNestedElementQuery(["content"], buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
2309
2391
  value,
2310
2392
  matchMode,
2311
- isCaseSensitive
2393
+ isCaseSensitive,
2394
+ language
2312
2395
  })]));
2313
2396
  }
2314
2397
  function buildValueDirectTextInnerQuery(params) {
@@ -2321,7 +2404,8 @@ function buildValueDirectTextInnerQuery(params) {
2321
2404
  elementName: "value",
2322
2405
  value,
2323
2406
  matchMode,
2324
- isCaseSensitive
2407
+ isCaseSensitive,
2408
+ queryFamily: "raw"
2325
2409
  });
2326
2410
  return buildAndCtsQueryExpressionInternal([buildNotCtsQueryExpression(buildNestedElementQuery(["content"], "cts:true-query()")), directTextQuery]);
2327
2411
  }
@@ -2338,7 +2422,8 @@ function buildValueRawValueInnerQuery(params) {
2338
2422
  attributeName: "rawValue",
2339
2423
  value,
2340
2424
  matchMode,
2341
- isCaseSensitive
2425
+ isCaseSensitive,
2426
+ queryFamily: "raw"
2342
2427
  });
2343
2428
  }
2344
2429
  function buildNotesQueryExpression(params) {
@@ -2357,20 +2442,24 @@ function buildNotesQueryExpression(params) {
2357
2442
  attributeName: "title",
2358
2443
  value,
2359
2444
  matchMode,
2360
- isCaseSensitive
2445
+ isCaseSensitive,
2446
+ queryFamily: "text",
2447
+ language
2361
2448
  }), buildSearchableContentTextQueryExpression({
2362
2449
  value,
2363
2450
  matchMode,
2364
- isCaseSensitive
2451
+ isCaseSensitive,
2452
+ language
2365
2453
  })])]));
2366
2454
  }
2367
2455
  function buildContentTargetQueryExpression(params) {
2368
2456
  const { target, value, matchMode, isCaseSensitive, language } = params;
2369
- const containerElement = CONTENT_TARGET_CONTAINER_ELEMENTS[target];
2370
- return buildNestedElementQuery([containerElement], buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
2457
+ const contentElementPath = CONTENT_TARGET_CONTENT_ELEMENT_PATHS[target];
2458
+ return buildNestedElementQuery(contentElementPath, buildAndCtsQueryExpressionInternal([buildContentLanguageQuery(language), buildSearchableContentTextQueryExpression({
2371
2459
  value,
2372
2460
  matchMode,
2373
- isCaseSensitive
2461
+ isCaseSensitive,
2462
+ language
2374
2463
  })]));
2375
2464
  }
2376
2465
  function buildPropertyQueryExpression(params) {
@@ -2379,6 +2468,12 @@ function buildPropertyQueryExpression(params) {
2379
2468
  if (propertyVariable != null) propertyQueryExpressions.unshift(buildPropertyLabelQuery(propertyVariable));
2380
2469
  return buildNestedElementQuery(["properties", "property"], buildAndCtsQueryExpressionInternal(propertyQueryExpressions));
2381
2470
  }
2471
+ function buildPropertyPresenceQueryExpression(params) {
2472
+ return buildPropertyQueryExpression({
2473
+ propertyVariable: params.propertyVariable,
2474
+ queryExpression: "cts:true-query()"
2475
+ });
2476
+ }
2382
2477
  function buildPropertyStringQueryExpression(params) {
2383
2478
  const { propertyVariable, value, matchMode, isCaseSensitive, language } = params;
2384
2479
  return buildPropertyQueryExpression({
@@ -2408,35 +2503,35 @@ function buildPropertyScalarQueryExpression(params) {
2408
2503
  }
2409
2504
  function buildPropertyAllQueryExpression(params) {
2410
2505
  const { query, value, matchMode } = params;
2411
- if (matchMode === "exact") return buildPropertyQueryExpression({
2412
- propertyVariable: query.propertyVariable,
2413
- queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal([buildValueNotIdRefQuery(), buildCtsWordQueryExpression({
2414
- value,
2415
- matchMode,
2416
- isCaseSensitive: query.isCaseSensitive
2417
- })]))
2418
- });
2419
- return buildPropertyQueryExpression({
2506
+ if (matchMode === "includes") return buildPropertyQueryExpression({
2420
2507
  propertyVariable: query.propertyVariable,
2421
2508
  queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal([buildValueNotIdRefQuery(), buildOrCtsQueryExpressionInternal([
2422
- buildValueRawValueInnerQuery({
2509
+ buildValueContentInnerQuery({
2510
+ language: query.language,
2423
2511
  value,
2424
2512
  matchMode,
2425
2513
  isCaseSensitive: query.isCaseSensitive
2426
2514
  }),
2427
- buildValueDirectTextInnerQuery({
2515
+ buildValueRawValueInnerQuery({
2428
2516
  value,
2429
2517
  matchMode,
2430
2518
  isCaseSensitive: query.isCaseSensitive
2431
2519
  }),
2432
- buildValueContentInnerQuery({
2433
- language: query.language,
2520
+ buildValueDirectTextInnerQuery({
2434
2521
  value,
2435
2522
  matchMode,
2436
2523
  isCaseSensitive: query.isCaseSensitive
2437
2524
  })
2438
2525
  ])]))
2439
2526
  });
2527
+ return buildPropertyQueryExpression({
2528
+ propertyVariable: query.propertyVariable,
2529
+ queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal([buildValueNotIdRefQuery(), buildCtsWordQueryExpression({
2530
+ value,
2531
+ matchMode,
2532
+ isCaseSensitive: query.isCaseSensitive
2533
+ })]))
2534
+ });
2440
2535
  }
2441
2536
  function buildPropertyIdRefQueryExpression(params) {
2442
2537
  const { propertyVariable, value } = params;
@@ -2451,8 +2546,8 @@ function buildPropertyIdRefQueryExpression(params) {
2451
2546
  }
2452
2547
  function buildPropertyDateRangeQueryExpression(query) {
2453
2548
  const rangeQueryExpressions = [];
2454
- if (query.from != null) rangeQueryExpressions.push(`cts:element-attribute-range-query(xs:QName("value"), xs:QName("rawValue"), ">=", xs:${query.dataType}(${stringLiteral(query.from)}))`);
2455
- if (query.to != null) rangeQueryExpressions.push(`cts:element-attribute-range-query(xs:QName("value"), xs:QName("rawValue"), "<=", xs:${query.dataType}(${stringLiteral(query.to)}))`);
2549
+ if (query.from != null) rangeQueryExpressions.push(`cts:element-attribute-range-query(xs:QName("value"), xs:QName("rawValue"), ">=", ${stringLiteral(query.from)})`);
2550
+ if (query.to != null) rangeQueryExpressions.push(`cts:element-attribute-range-query(xs:QName("value"), xs:QName("rawValue"), "<=", ${stringLiteral(query.to)})`);
2456
2551
  return buildPropertyQueryExpression({
2457
2552
  propertyVariable: query.propertyVariable,
2458
2553
  queryExpression: buildNestedElementQuery(["value"], buildAndCtsQueryExpressionInternal(rangeQueryExpressions))
@@ -2543,6 +2638,7 @@ function buildLeafValueQueryExpression(params) {
2543
2638
  }
2544
2639
  }
2545
2640
  function buildLeafQueryExpression(query) {
2641
+ if (query.target === "property" && query.dataType !== "date" && query.dataType !== "dateTime" && !("value" in query) && query.propertyVariable != null) return buildPropertyPresenceQueryExpression({ propertyVariable: query.propertyVariable });
2546
2642
  if (query.target === "property" && (query.dataType === "date" || query.dataType === "dateTime") && query.value == null) return buildPropertyDateRangeQueryExpression(query);
2547
2643
  const searchValue = getLeafSearchValue(query);
2548
2644
  if (searchValue == null) throw new Error("Missing searchable value for query leaf");
@@ -2555,6 +2651,11 @@ function buildLeafQueryExpression(query) {
2555
2651
  value: searchValue,
2556
2652
  isCaseSensitive: query.isCaseSensitive
2557
2653
  });
2654
+ const fullValueQueryExpression = buildLeafValueQueryExpression({
2655
+ query,
2656
+ value: searchValue,
2657
+ matchMode: "exact"
2658
+ });
2558
2659
  if (terms.length === 0) return "cts:false-query()";
2559
2660
  const termQueryExpressions = [];
2560
2661
  for (const term of terms) termQueryExpressions.push(buildLeafValueQueryExpression({
@@ -2562,7 +2663,9 @@ function buildLeafQueryExpression(query) {
2562
2663
  value: term,
2563
2664
  matchMode: "includes"
2564
2665
  }));
2565
- return buildAndCtsQueryExpressionInternal(termQueryExpressions);
2666
+ const tokenizedQueryExpression = buildAndCtsQueryExpressionInternal(termQueryExpressions);
2667
+ if (terms.length === 1) return tokenizedQueryExpression;
2668
+ return buildOrCtsQueryExpressionInternal([fullValueQueryExpression, tokenizedQueryExpression]);
2566
2669
  }
2567
2670
  function getGroupableIncludesValue(query) {
2568
2671
  if (query.matchMode !== "includes" || query.isNegated === true) return null;
@@ -2603,11 +2706,18 @@ function getCompatibleIncludesGroupLeaves(query) {
2603
2706
  return leafQueries;
2604
2707
  }
2605
2708
  function buildIncludesGroupMember(query) {
2606
- return { buildTermQuery: (term) => buildLeafValueQueryExpression({
2607
- query,
2608
- value: term,
2609
- matchMode: "includes"
2610
- }) };
2709
+ return {
2710
+ buildTermQuery: (term) => buildLeafValueQueryExpression({
2711
+ query,
2712
+ value: term,
2713
+ matchMode: "includes"
2714
+ }),
2715
+ buildFullValueQuery: (value) => buildLeafValueQueryExpression({
2716
+ query,
2717
+ value,
2718
+ matchMode: "exact"
2719
+ })
2720
+ };
2611
2721
  }
2612
2722
  function buildIncludesGroupQueryExpression(queries) {
2613
2723
  const firstQuery = queries[0];
@@ -2621,12 +2731,16 @@ function buildIncludesGroupQueryExpression(queries) {
2621
2731
  if (terms.length === 0) return "cts:false-query()";
2622
2732
  const members = queries.map((query) => buildIncludesGroupMember(query));
2623
2733
  const perTermQueryExpressions = [];
2734
+ const fullValueFieldQueryExpressions = [];
2735
+ for (const member of members) fullValueFieldQueryExpressions.push(member.buildFullValueQuery(groupValue));
2624
2736
  for (const term of terms) {
2625
2737
  const fieldQueryExpressions = [];
2626
2738
  for (const member of members) fieldQueryExpressions.push(member.buildTermQuery(term));
2627
2739
  perTermQueryExpressions.push(buildOrCtsQueryExpressionInternal(fieldQueryExpressions));
2628
2740
  }
2629
- return buildAndCtsQueryExpressionInternal(perTermQueryExpressions);
2741
+ const tokenizedGroupQueryExpression = buildAndCtsQueryExpressionInternal(perTermQueryExpressions);
2742
+ if (terms.length === 1) return tokenizedGroupQueryExpression;
2743
+ return buildOrCtsQueryExpressionInternal([buildOrCtsQueryExpressionInternal(fullValueFieldQueryExpressions), tokenizedGroupQueryExpression]);
2630
2744
  }
2631
2745
  function buildQueryNode(query) {
2632
2746
  if (isQueryLeaf(query)) {
@@ -2959,6 +3073,23 @@ function getPropertyVariableUuidsFromQueries(queries) {
2959
3073
  }
2960
3074
  return [...propertyVariableUuids];
2961
3075
  }
3076
+ function getItemFilterQueriesFromPropertyValueQueries(queries) {
3077
+ if (queries == null) return null;
3078
+ if ("target" in queries) {
3079
+ if (queries.target !== "property") return queries;
3080
+ if (queries.dataType === "date" || queries.dataType === "dateTime") return queries;
3081
+ return "value" in queries && queries.value != null ? queries : null;
3082
+ }
3083
+ const filteredChildren = [];
3084
+ const childQueries = "and" in queries ? queries.and : queries.or;
3085
+ for (const childQuery of childQueries) {
3086
+ const filteredChildQuery = getItemFilterQueriesFromPropertyValueQueries(childQuery);
3087
+ if (filteredChildQuery != null) filteredChildren.push(filteredChildQuery);
3088
+ }
3089
+ if (filteredChildren.length === 0) return null;
3090
+ if (filteredChildren.length === 1) return filteredChildren[0] ?? null;
3091
+ return "and" in queries ? { and: filteredChildren } : { or: filteredChildren };
3092
+ }
2962
3093
  /**
2963
3094
  * Schema for a single property value query item in the OCHRE API response
2964
3095
  */
@@ -3040,7 +3171,7 @@ function buildXQuery(params) {
3040
3171
  if (setScopeUuids.length > 0) setScopeFilter = `/set[(${setScopeUuids.map((uuid) => `@uuid="${uuid}"`).join(" or ")})]/items/*`;
3041
3172
  const propertyVariableFilters = getPropertyVariableUuidsFromQueries(queries).map((uuid) => `@uuid="${uuid}"`).join(" or ");
3042
3173
  const baseItemsExpression = `doc()/ochre${setScopeFilter}`;
3043
- const compiledQueryPlan = buildQueryPlan({ queries });
3174
+ const compiledQueryPlan = buildQueryPlan({ queries: getItemFilterQueriesFromPropertyValueQueries(queries) });
3044
3175
  const itemsQueryExpressions = [];
3045
3176
  const belongsToCollectionQueryExpression = buildBelongsToCollectionQueryExpression(belongsToCollectionScopeUuids, BELONGS_TO_COLLECTION_UUID);
3046
3177
  if (compiledQueryPlan.queryExpression != null) itemsQueryExpressions.push(compiledQueryPlan.queryExpression);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ochre-sdk",
3
- "version": "0.22.3",
3
+ "version": "0.22.5",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "description": "Node.js library for working with OCHRE (Online Cultural and Historical Research Environment) data",