@resistdesign/voltra 3.0.0-alpha.14 → 3.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/api/index.js +84 -1
- package/package.json +1 -1
package/api/index.js
CHANGED
|
@@ -3372,6 +3372,64 @@ async function structuredHandler(event) {
|
|
|
3372
3372
|
}
|
|
3373
3373
|
}
|
|
3374
3374
|
|
|
3375
|
+
// src/api/Indexing/structured/StructuredStringLike.ts
|
|
3376
|
+
var MAX_INDEXED_STRING_LENGTH = 128;
|
|
3377
|
+
var MAX_TOKENS_PER_VALUE = 256;
|
|
3378
|
+
var MAX_NGRAM_SIZE = 3;
|
|
3379
|
+
var LIKE_WILDCARD_REGEX = /[%_]/;
|
|
3380
|
+
var NORMALIZED_WHITESPACE_REGEX = /\s+/g;
|
|
3381
|
+
var STRUCTURED_STRING_CONTAINS_TOKEN_PREFIX = "__str__:";
|
|
3382
|
+
var normalizeStructuredLikeString = (value) => value.toLowerCase().trim().replace(NORMALIZED_WHITESPACE_REGEX, " ");
|
|
3383
|
+
var toNgrams = (normalized) => {
|
|
3384
|
+
const tokens = [];
|
|
3385
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3386
|
+
const limited = normalized.slice(0, MAX_INDEXED_STRING_LENGTH);
|
|
3387
|
+
for (let size = 1; size <= MAX_NGRAM_SIZE; size += 1) {
|
|
3388
|
+
if (limited.length < size) {
|
|
3389
|
+
break;
|
|
3390
|
+
}
|
|
3391
|
+
for (let index = 0; index <= limited.length - size; index += 1) {
|
|
3392
|
+
const token = limited.slice(index, index + size);
|
|
3393
|
+
if (!seen.has(token)) {
|
|
3394
|
+
seen.add(token);
|
|
3395
|
+
tokens.push(token);
|
|
3396
|
+
if (tokens.length >= MAX_TOKENS_PER_VALUE) {
|
|
3397
|
+
return tokens;
|
|
3398
|
+
}
|
|
3399
|
+
}
|
|
3400
|
+
}
|
|
3401
|
+
}
|
|
3402
|
+
return tokens;
|
|
3403
|
+
};
|
|
3404
|
+
var toContainsToken = (token) => `${STRUCTURED_STRING_CONTAINS_TOKEN_PREFIX}${token}`;
|
|
3405
|
+
var buildStructuredStringContainsTokens = (value) => {
|
|
3406
|
+
const normalized = normalizeStructuredLikeString(value);
|
|
3407
|
+
if (!normalized.length) {
|
|
3408
|
+
return [];
|
|
3409
|
+
}
|
|
3410
|
+
return toNgrams(normalized).map(toContainsToken);
|
|
3411
|
+
};
|
|
3412
|
+
var buildStructuredLikePatternTokens = (value) => {
|
|
3413
|
+
const normalized = normalizeStructuredLikeString(value);
|
|
3414
|
+
const pattern = LIKE_WILDCARD_REGEX.test(normalized) ? normalized : `%${normalized}%`;
|
|
3415
|
+
const literalSegments = pattern.split(LIKE_WILDCARD_REGEX).map((segment) => segment.trim()).filter((segment) => segment.length > 0);
|
|
3416
|
+
const tokens = [];
|
|
3417
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3418
|
+
for (const segment of literalSegments) {
|
|
3419
|
+
for (const token of toNgrams(segment)) {
|
|
3420
|
+
const containsToken = toContainsToken(token);
|
|
3421
|
+
if (!seen.has(containsToken)) {
|
|
3422
|
+
seen.add(containsToken);
|
|
3423
|
+
tokens.push(containsToken);
|
|
3424
|
+
if (tokens.length >= MAX_TOKENS_PER_VALUE) {
|
|
3425
|
+
return tokens;
|
|
3426
|
+
}
|
|
3427
|
+
}
|
|
3428
|
+
}
|
|
3429
|
+
}
|
|
3430
|
+
return tokens;
|
|
3431
|
+
};
|
|
3432
|
+
|
|
3375
3433
|
// src/api/Indexing/structured/StructuredInMemoryIndex.ts
|
|
3376
3434
|
function insertSortedUnique2(values, docId) {
|
|
3377
3435
|
let low = 0;
|
|
@@ -3515,6 +3573,11 @@ var StructuredInMemoryIndex = class {
|
|
|
3515
3573
|
continue;
|
|
3516
3574
|
}
|
|
3517
3575
|
addPosting(this.eqIndex, field, value, docId);
|
|
3576
|
+
if (typeof value === "string") {
|
|
3577
|
+
for (const token of buildStructuredStringContainsTokens(value)) {
|
|
3578
|
+
addPosting(this.containsIndex, field, token, docId);
|
|
3579
|
+
}
|
|
3580
|
+
}
|
|
3518
3581
|
const entries = this.rangeIndex.get(field) ?? [];
|
|
3519
3582
|
insertRangeEntry(entries, value, docId);
|
|
3520
3583
|
this.rangeIndex.set(field, entries);
|
|
@@ -3758,6 +3821,11 @@ function buildTermEntries(docId, fields) {
|
|
|
3758
3821
|
}
|
|
3759
3822
|
} else {
|
|
3760
3823
|
entries.push(buildStructuredTermItem(field, value, "eq", docId));
|
|
3824
|
+
if (typeof value === "string") {
|
|
3825
|
+
for (const token of buildStructuredStringContainsTokens(value)) {
|
|
3826
|
+
entries.push(buildStructuredTermItem(field, token, "contains", docId));
|
|
3827
|
+
}
|
|
3828
|
+
}
|
|
3761
3829
|
}
|
|
3762
3830
|
}
|
|
3763
3831
|
return entries;
|
|
@@ -7454,8 +7522,23 @@ var buildWhereForCriterion = (criterion) => {
|
|
|
7454
7522
|
case "EQUALS" /* EQUALS */:
|
|
7455
7523
|
return buildTerm(fieldName, "eq", value);
|
|
7456
7524
|
case "CONTAINS" /* CONTAINS */:
|
|
7457
|
-
case "LIKE" /* LIKE */:
|
|
7458
7525
|
return buildTerm(fieldName, "contains", value);
|
|
7526
|
+
case "LIKE" /* LIKE */: {
|
|
7527
|
+
if (typeof value !== "string") {
|
|
7528
|
+
return buildTerm(fieldName, "contains", value);
|
|
7529
|
+
}
|
|
7530
|
+
const tokens = buildStructuredLikePatternTokens(value);
|
|
7531
|
+
const tokenClauses = tokens.map(
|
|
7532
|
+
(token) => buildTerm(fieldName, "contains", token)
|
|
7533
|
+
);
|
|
7534
|
+
if (tokenClauses.length === 0) {
|
|
7535
|
+
return buildTerm(fieldName, "contains", value);
|
|
7536
|
+
}
|
|
7537
|
+
if (tokenClauses.length === 1) {
|
|
7538
|
+
return tokenClauses[0];
|
|
7539
|
+
}
|
|
7540
|
+
return { and: tokenClauses };
|
|
7541
|
+
}
|
|
7459
7542
|
case "GREATER_THAN_OR_EQUAL" /* GREATER_THAN_OR_EQUAL */:
|
|
7460
7543
|
return { type: "gte", field: fieldName, value };
|
|
7461
7544
|
case "LESS_THAN_OR_EQUAL" /* LESS_THAN_OR_EQUAL */:
|