@topgunbuild/core 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +878 -1
- package/dist/index.d.ts +878 -1
- package/dist/index.js +1283 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1265 -1
- package/dist/index.mjs.map +1 -1
- package/package.json +11 -11
- package/LICENSE +0 -97
package/dist/index.mjs
CHANGED
|
@@ -2416,6 +2416,66 @@ var JournalReadResponseSchema = z3.object({
|
|
|
2416
2416
|
events: z3.array(JournalEventDataSchema),
|
|
2417
2417
|
hasMore: z3.boolean()
|
|
2418
2418
|
});
|
|
2419
|
+
var SearchOptionsSchema = z3.object({
|
|
2420
|
+
limit: z3.number().optional(),
|
|
2421
|
+
minScore: z3.number().optional(),
|
|
2422
|
+
boost: z3.record(z3.string(), z3.number()).optional()
|
|
2423
|
+
});
|
|
2424
|
+
var SearchPayloadSchema = z3.object({
|
|
2425
|
+
requestId: z3.string(),
|
|
2426
|
+
mapName: z3.string(),
|
|
2427
|
+
query: z3.string(),
|
|
2428
|
+
options: SearchOptionsSchema.optional()
|
|
2429
|
+
});
|
|
2430
|
+
var SearchMessageSchema = z3.object({
|
|
2431
|
+
type: z3.literal("SEARCH"),
|
|
2432
|
+
payload: SearchPayloadSchema
|
|
2433
|
+
});
|
|
2434
|
+
var SearchRespPayloadSchema = z3.object({
|
|
2435
|
+
requestId: z3.string(),
|
|
2436
|
+
results: z3.array(z3.object({
|
|
2437
|
+
key: z3.string(),
|
|
2438
|
+
value: z3.unknown(),
|
|
2439
|
+
score: z3.number(),
|
|
2440
|
+
matchedTerms: z3.array(z3.string())
|
|
2441
|
+
})),
|
|
2442
|
+
totalCount: z3.number(),
|
|
2443
|
+
error: z3.string().optional()
|
|
2444
|
+
});
|
|
2445
|
+
var SearchRespMessageSchema = z3.object({
|
|
2446
|
+
type: z3.literal("SEARCH_RESP"),
|
|
2447
|
+
payload: SearchRespPayloadSchema
|
|
2448
|
+
});
|
|
2449
|
+
var SearchUpdateTypeSchema = z3.enum(["ENTER", "UPDATE", "LEAVE"]);
|
|
2450
|
+
var SearchSubPayloadSchema = z3.object({
|
|
2451
|
+
subscriptionId: z3.string(),
|
|
2452
|
+
mapName: z3.string(),
|
|
2453
|
+
query: z3.string(),
|
|
2454
|
+
options: SearchOptionsSchema.optional()
|
|
2455
|
+
});
|
|
2456
|
+
var SearchSubMessageSchema = z3.object({
|
|
2457
|
+
type: z3.literal("SEARCH_SUB"),
|
|
2458
|
+
payload: SearchSubPayloadSchema
|
|
2459
|
+
});
|
|
2460
|
+
var SearchUpdatePayloadSchema = z3.object({
|
|
2461
|
+
subscriptionId: z3.string(),
|
|
2462
|
+
key: z3.string(),
|
|
2463
|
+
value: z3.unknown(),
|
|
2464
|
+
score: z3.number(),
|
|
2465
|
+
matchedTerms: z3.array(z3.string()),
|
|
2466
|
+
type: SearchUpdateTypeSchema
|
|
2467
|
+
});
|
|
2468
|
+
var SearchUpdateMessageSchema = z3.object({
|
|
2469
|
+
type: z3.literal("SEARCH_UPDATE"),
|
|
2470
|
+
payload: SearchUpdatePayloadSchema
|
|
2471
|
+
});
|
|
2472
|
+
var SearchUnsubPayloadSchema = z3.object({
|
|
2473
|
+
subscriptionId: z3.string()
|
|
2474
|
+
});
|
|
2475
|
+
var SearchUnsubMessageSchema = z3.object({
|
|
2476
|
+
type: z3.literal("SEARCH_UNSUB"),
|
|
2477
|
+
payload: SearchUnsubPayloadSchema
|
|
2478
|
+
});
|
|
2419
2479
|
var ConflictResolverSchema = z3.object({
|
|
2420
2480
|
name: z3.string().min(1).max(100),
|
|
2421
2481
|
code: z3.string().max(5e4),
|
|
@@ -2547,7 +2607,14 @@ var MessageSchema = z3.discriminatedUnion("type", [
|
|
|
2547
2607
|
UnregisterResolverResponseSchema,
|
|
2548
2608
|
MergeRejectedMessageSchema,
|
|
2549
2609
|
ListResolversRequestSchema,
|
|
2550
|
-
ListResolversResponseSchema
|
|
2610
|
+
ListResolversResponseSchema,
|
|
2611
|
+
// Phase 11.1: Full-Text Search
|
|
2612
|
+
SearchMessageSchema,
|
|
2613
|
+
SearchRespMessageSchema,
|
|
2614
|
+
// Phase 11.1b: Live Search Subscriptions
|
|
2615
|
+
SearchSubMessageSchema,
|
|
2616
|
+
SearchUpdateMessageSchema,
|
|
2617
|
+
SearchUnsubMessageSchema
|
|
2551
2618
|
]);
|
|
2552
2619
|
|
|
2553
2620
|
// src/types/WriteConcern.ts
|
|
@@ -8449,10 +8516,1076 @@ var IndexedLWWMap = class extends LWWMap {
|
|
|
8449
8516
|
}
|
|
8450
8517
|
};
|
|
8451
8518
|
|
|
8519
|
+
// src/query/tokenization/stopwords.ts
|
|
8520
|
+
var ENGLISH_STOPWORDS = /* @__PURE__ */ new Set([
|
|
8521
|
+
// Articles
|
|
8522
|
+
"a",
|
|
8523
|
+
"an",
|
|
8524
|
+
"the",
|
|
8525
|
+
// Pronouns
|
|
8526
|
+
"i",
|
|
8527
|
+
"me",
|
|
8528
|
+
"my",
|
|
8529
|
+
"myself",
|
|
8530
|
+
"we",
|
|
8531
|
+
"our",
|
|
8532
|
+
"ours",
|
|
8533
|
+
"ourselves",
|
|
8534
|
+
"you",
|
|
8535
|
+
"your",
|
|
8536
|
+
"yours",
|
|
8537
|
+
"yourself",
|
|
8538
|
+
"yourselves",
|
|
8539
|
+
"he",
|
|
8540
|
+
"him",
|
|
8541
|
+
"his",
|
|
8542
|
+
"himself",
|
|
8543
|
+
"she",
|
|
8544
|
+
"her",
|
|
8545
|
+
"hers",
|
|
8546
|
+
"herself",
|
|
8547
|
+
"it",
|
|
8548
|
+
"its",
|
|
8549
|
+
"itself",
|
|
8550
|
+
"they",
|
|
8551
|
+
"them",
|
|
8552
|
+
"their",
|
|
8553
|
+
"theirs",
|
|
8554
|
+
"themselves",
|
|
8555
|
+
"what",
|
|
8556
|
+
"which",
|
|
8557
|
+
"who",
|
|
8558
|
+
"whom",
|
|
8559
|
+
"this",
|
|
8560
|
+
"that",
|
|
8561
|
+
"these",
|
|
8562
|
+
"those",
|
|
8563
|
+
// Auxiliary verbs
|
|
8564
|
+
"am",
|
|
8565
|
+
"is",
|
|
8566
|
+
"are",
|
|
8567
|
+
"was",
|
|
8568
|
+
"were",
|
|
8569
|
+
"be",
|
|
8570
|
+
"been",
|
|
8571
|
+
"being",
|
|
8572
|
+
"have",
|
|
8573
|
+
"has",
|
|
8574
|
+
"had",
|
|
8575
|
+
"having",
|
|
8576
|
+
"do",
|
|
8577
|
+
"does",
|
|
8578
|
+
"did",
|
|
8579
|
+
"doing",
|
|
8580
|
+
"will",
|
|
8581
|
+
"would",
|
|
8582
|
+
"shall",
|
|
8583
|
+
"should",
|
|
8584
|
+
"can",
|
|
8585
|
+
"could",
|
|
8586
|
+
"may",
|
|
8587
|
+
"might",
|
|
8588
|
+
"must",
|
|
8589
|
+
"ought",
|
|
8590
|
+
// Prepositions
|
|
8591
|
+
"about",
|
|
8592
|
+
"above",
|
|
8593
|
+
"across",
|
|
8594
|
+
"after",
|
|
8595
|
+
"against",
|
|
8596
|
+
"along",
|
|
8597
|
+
"among",
|
|
8598
|
+
"around",
|
|
8599
|
+
"at",
|
|
8600
|
+
"before",
|
|
8601
|
+
"behind",
|
|
8602
|
+
"below",
|
|
8603
|
+
"beneath",
|
|
8604
|
+
"beside",
|
|
8605
|
+
"between",
|
|
8606
|
+
"beyond",
|
|
8607
|
+
"by",
|
|
8608
|
+
"down",
|
|
8609
|
+
"during",
|
|
8610
|
+
"except",
|
|
8611
|
+
"for",
|
|
8612
|
+
"from",
|
|
8613
|
+
"in",
|
|
8614
|
+
"inside",
|
|
8615
|
+
"into",
|
|
8616
|
+
"near",
|
|
8617
|
+
"of",
|
|
8618
|
+
"off",
|
|
8619
|
+
"on",
|
|
8620
|
+
"onto",
|
|
8621
|
+
"out",
|
|
8622
|
+
"outside",
|
|
8623
|
+
"over",
|
|
8624
|
+
"past",
|
|
8625
|
+
"since",
|
|
8626
|
+
"through",
|
|
8627
|
+
"throughout",
|
|
8628
|
+
"to",
|
|
8629
|
+
"toward",
|
|
8630
|
+
"towards",
|
|
8631
|
+
"under",
|
|
8632
|
+
"underneath",
|
|
8633
|
+
"until",
|
|
8634
|
+
"up",
|
|
8635
|
+
"upon",
|
|
8636
|
+
"with",
|
|
8637
|
+
"within",
|
|
8638
|
+
"without",
|
|
8639
|
+
// Conjunctions
|
|
8640
|
+
"and",
|
|
8641
|
+
"but",
|
|
8642
|
+
"or",
|
|
8643
|
+
"nor",
|
|
8644
|
+
"so",
|
|
8645
|
+
"yet",
|
|
8646
|
+
"both",
|
|
8647
|
+
"either",
|
|
8648
|
+
"neither",
|
|
8649
|
+
"not",
|
|
8650
|
+
"only",
|
|
8651
|
+
"as",
|
|
8652
|
+
"if",
|
|
8653
|
+
"than",
|
|
8654
|
+
"when",
|
|
8655
|
+
"while",
|
|
8656
|
+
"although",
|
|
8657
|
+
"because",
|
|
8658
|
+
"unless",
|
|
8659
|
+
"whether",
|
|
8660
|
+
// Adverbs
|
|
8661
|
+
"here",
|
|
8662
|
+
"there",
|
|
8663
|
+
"where",
|
|
8664
|
+
"when",
|
|
8665
|
+
"how",
|
|
8666
|
+
"why",
|
|
8667
|
+
"all",
|
|
8668
|
+
"each",
|
|
8669
|
+
"every",
|
|
8670
|
+
"any",
|
|
8671
|
+
"some",
|
|
8672
|
+
"no",
|
|
8673
|
+
"none",
|
|
8674
|
+
"more",
|
|
8675
|
+
"most",
|
|
8676
|
+
"other",
|
|
8677
|
+
"such",
|
|
8678
|
+
"own",
|
|
8679
|
+
"same",
|
|
8680
|
+
"too",
|
|
8681
|
+
"very",
|
|
8682
|
+
"just",
|
|
8683
|
+
"also",
|
|
8684
|
+
"now",
|
|
8685
|
+
"then",
|
|
8686
|
+
"again",
|
|
8687
|
+
"ever",
|
|
8688
|
+
"once",
|
|
8689
|
+
// Misc
|
|
8690
|
+
"few",
|
|
8691
|
+
"many",
|
|
8692
|
+
"much",
|
|
8693
|
+
"several",
|
|
8694
|
+
"s",
|
|
8695
|
+
"t",
|
|
8696
|
+
"d",
|
|
8697
|
+
"ll",
|
|
8698
|
+
"m",
|
|
8699
|
+
"ve",
|
|
8700
|
+
"re"
|
|
8701
|
+
]);
|
|
8702
|
+
|
|
8703
|
+
// src/query/tokenization/porter-stemmer.ts
|
|
8704
|
+
function porterStem(word) {
|
|
8705
|
+
if (!word || word.length < 3) {
|
|
8706
|
+
return word;
|
|
8707
|
+
}
|
|
8708
|
+
let stem = word;
|
|
8709
|
+
if (stem.endsWith("sses")) {
|
|
8710
|
+
stem = stem.slice(0, -2);
|
|
8711
|
+
} else if (stem.endsWith("ies")) {
|
|
8712
|
+
stem = stem.slice(0, -2);
|
|
8713
|
+
} else if (!stem.endsWith("ss") && stem.endsWith("s")) {
|
|
8714
|
+
stem = stem.slice(0, -1);
|
|
8715
|
+
}
|
|
8716
|
+
const step1bRegex = /^(.+?)(eed|ed|ing)$/;
|
|
8717
|
+
const step1bMatch = stem.match(step1bRegex);
|
|
8718
|
+
if (step1bMatch) {
|
|
8719
|
+
const [, base, suffix] = step1bMatch;
|
|
8720
|
+
if (suffix === "eed") {
|
|
8721
|
+
if (getMeasure(base) > 0) {
|
|
8722
|
+
stem = base + "ee";
|
|
8723
|
+
}
|
|
8724
|
+
} else if (hasVowel(base)) {
|
|
8725
|
+
stem = base;
|
|
8726
|
+
if (stem.endsWith("at") || stem.endsWith("bl") || stem.endsWith("iz")) {
|
|
8727
|
+
stem = stem + "e";
|
|
8728
|
+
} else if (endsWithDoubleConsonant(stem) && !stem.match(/[lsz]$/)) {
|
|
8729
|
+
stem = stem.slice(0, -1);
|
|
8730
|
+
} else if (getMeasure(stem) === 1 && endsWithCVC(stem)) {
|
|
8731
|
+
stem = stem + "e";
|
|
8732
|
+
}
|
|
8733
|
+
}
|
|
8734
|
+
}
|
|
8735
|
+
if (stem.endsWith("y") && hasVowel(stem.slice(0, -1))) {
|
|
8736
|
+
stem = stem.slice(0, -1) + "i";
|
|
8737
|
+
}
|
|
8738
|
+
const step2Suffixes = [
|
|
8739
|
+
[/ational$/, "ate", 0],
|
|
8740
|
+
[/tional$/, "tion", 0],
|
|
8741
|
+
[/enci$/, "ence", 0],
|
|
8742
|
+
[/anci$/, "ance", 0],
|
|
8743
|
+
[/izer$/, "ize", 0],
|
|
8744
|
+
[/abli$/, "able", 0],
|
|
8745
|
+
[/alli$/, "al", 0],
|
|
8746
|
+
[/entli$/, "ent", 0],
|
|
8747
|
+
[/eli$/, "e", 0],
|
|
8748
|
+
[/ousli$/, "ous", 0],
|
|
8749
|
+
[/ization$/, "ize", 0],
|
|
8750
|
+
[/ation$/, "ate", 0],
|
|
8751
|
+
[/ator$/, "ate", 0],
|
|
8752
|
+
[/alism$/, "al", 0],
|
|
8753
|
+
[/iveness$/, "ive", 0],
|
|
8754
|
+
[/fulness$/, "ful", 0],
|
|
8755
|
+
[/ousness$/, "ous", 0],
|
|
8756
|
+
[/aliti$/, "al", 0],
|
|
8757
|
+
[/iviti$/, "ive", 0],
|
|
8758
|
+
[/biliti$/, "ble", 0]
|
|
8759
|
+
];
|
|
8760
|
+
for (const [regex, replacement, minMeasure] of step2Suffixes) {
|
|
8761
|
+
if (regex.test(stem)) {
|
|
8762
|
+
const base = stem.replace(regex, "");
|
|
8763
|
+
if (getMeasure(base) > minMeasure) {
|
|
8764
|
+
stem = base + replacement;
|
|
8765
|
+
break;
|
|
8766
|
+
}
|
|
8767
|
+
}
|
|
8768
|
+
}
|
|
8769
|
+
const step3Suffixes = [
|
|
8770
|
+
[/icate$/, "ic", 0],
|
|
8771
|
+
[/ative$/, "", 0],
|
|
8772
|
+
[/alize$/, "al", 0],
|
|
8773
|
+
[/iciti$/, "ic", 0],
|
|
8774
|
+
[/ical$/, "ic", 0],
|
|
8775
|
+
[/ful$/, "", 0],
|
|
8776
|
+
[/ness$/, "", 0]
|
|
8777
|
+
];
|
|
8778
|
+
for (const [regex, replacement, minMeasure] of step3Suffixes) {
|
|
8779
|
+
if (regex.test(stem)) {
|
|
8780
|
+
const base = stem.replace(regex, "");
|
|
8781
|
+
if (getMeasure(base) > minMeasure) {
|
|
8782
|
+
stem = base + replacement;
|
|
8783
|
+
break;
|
|
8784
|
+
}
|
|
8785
|
+
}
|
|
8786
|
+
}
|
|
8787
|
+
const step4Suffixes = [
|
|
8788
|
+
[/al$/, 1],
|
|
8789
|
+
[/ance$/, 1],
|
|
8790
|
+
[/ence$/, 1],
|
|
8791
|
+
[/er$/, 1],
|
|
8792
|
+
[/ic$/, 1],
|
|
8793
|
+
[/able$/, 1],
|
|
8794
|
+
[/ible$/, 1],
|
|
8795
|
+
[/ant$/, 1],
|
|
8796
|
+
[/ement$/, 1],
|
|
8797
|
+
[/ment$/, 1],
|
|
8798
|
+
[/ent$/, 1],
|
|
8799
|
+
[/ion$/, 1],
|
|
8800
|
+
[/ou$/, 1],
|
|
8801
|
+
[/ism$/, 1],
|
|
8802
|
+
[/ate$/, 1],
|
|
8803
|
+
[/iti$/, 1],
|
|
8804
|
+
[/ous$/, 1],
|
|
8805
|
+
[/ive$/, 1],
|
|
8806
|
+
[/ize$/, 1]
|
|
8807
|
+
];
|
|
8808
|
+
for (const [regex, minMeasure] of step4Suffixes) {
|
|
8809
|
+
if (regex.test(stem)) {
|
|
8810
|
+
const base = stem.replace(regex, "");
|
|
8811
|
+
if (getMeasure(base) > minMeasure) {
|
|
8812
|
+
if (regex.source === "ion$") {
|
|
8813
|
+
if (base.match(/[st]$/)) {
|
|
8814
|
+
stem = base;
|
|
8815
|
+
}
|
|
8816
|
+
} else {
|
|
8817
|
+
stem = base;
|
|
8818
|
+
}
|
|
8819
|
+
break;
|
|
8820
|
+
}
|
|
8821
|
+
}
|
|
8822
|
+
}
|
|
8823
|
+
if (stem.endsWith("e")) {
|
|
8824
|
+
const base = stem.slice(0, -1);
|
|
8825
|
+
const measure = getMeasure(base);
|
|
8826
|
+
if (measure > 1 || measure === 1 && !endsWithCVC(base)) {
|
|
8827
|
+
stem = base;
|
|
8828
|
+
}
|
|
8829
|
+
}
|
|
8830
|
+
if (getMeasure(stem) > 1 && endsWithDoubleConsonant(stem) && stem.endsWith("l")) {
|
|
8831
|
+
stem = stem.slice(0, -1);
|
|
8832
|
+
}
|
|
8833
|
+
return stem;
|
|
8834
|
+
}
|
|
8835
|
+
function isVowel(char, prevChar) {
|
|
8836
|
+
if ("aeiou".includes(char)) {
|
|
8837
|
+
return true;
|
|
8838
|
+
}
|
|
8839
|
+
if (char === "y" && prevChar && !"aeiou".includes(prevChar)) {
|
|
8840
|
+
return true;
|
|
8841
|
+
}
|
|
8842
|
+
return false;
|
|
8843
|
+
}
|
|
8844
|
+
function hasVowel(str) {
|
|
8845
|
+
for (let i = 0; i < str.length; i++) {
|
|
8846
|
+
if (isVowel(str[i], i > 0 ? str[i - 1] : void 0)) {
|
|
8847
|
+
return true;
|
|
8848
|
+
}
|
|
8849
|
+
}
|
|
8850
|
+
return false;
|
|
8851
|
+
}
|
|
8852
|
+
function getMeasure(str) {
|
|
8853
|
+
let pattern = "";
|
|
8854
|
+
for (let i = 0; i < str.length; i++) {
|
|
8855
|
+
pattern += isVowel(str[i], i > 0 ? str[i - 1] : void 0) ? "v" : "c";
|
|
8856
|
+
}
|
|
8857
|
+
const matches = pattern.match(/vc/g);
|
|
8858
|
+
return matches ? matches.length : 0;
|
|
8859
|
+
}
|
|
8860
|
+
function endsWithDoubleConsonant(str) {
|
|
8861
|
+
if (str.length < 2) return false;
|
|
8862
|
+
const last = str[str.length - 1];
|
|
8863
|
+
const secondLast = str[str.length - 2];
|
|
8864
|
+
return last === secondLast && !"aeiou".includes(last);
|
|
8865
|
+
}
|
|
8866
|
+
function endsWithCVC(str) {
|
|
8867
|
+
if (str.length < 3) return false;
|
|
8868
|
+
const last3 = str.slice(-3);
|
|
8869
|
+
const c1 = !"aeiou".includes(last3[0]);
|
|
8870
|
+
const v = isVowel(last3[1], last3[0]);
|
|
8871
|
+
const c2 = !"aeiou".includes(last3[2]) && !"wxy".includes(last3[2]);
|
|
8872
|
+
return c1 && v && c2;
|
|
8873
|
+
}
|
|
8874
|
+
|
|
8875
|
+
// src/fts/Tokenizer.ts
|
|
8876
|
+
var BM25Tokenizer = class {
|
|
8877
|
+
/**
|
|
8878
|
+
* Create a new BM25Tokenizer.
|
|
8879
|
+
*
|
|
8880
|
+
* @param options - Configuration options
|
|
8881
|
+
*/
|
|
8882
|
+
constructor(options) {
|
|
8883
|
+
this.options = {
|
|
8884
|
+
lowercase: true,
|
|
8885
|
+
stopwords: ENGLISH_STOPWORDS,
|
|
8886
|
+
stemmer: porterStem,
|
|
8887
|
+
minLength: 2,
|
|
8888
|
+
maxLength: 40,
|
|
8889
|
+
...options
|
|
8890
|
+
};
|
|
8891
|
+
}
|
|
8892
|
+
/**
|
|
8893
|
+
* Tokenize text into an array of normalized tokens.
|
|
8894
|
+
*
|
|
8895
|
+
* @param text - Text to tokenize
|
|
8896
|
+
* @returns Array of tokens
|
|
8897
|
+
*/
|
|
8898
|
+
tokenize(text) {
|
|
8899
|
+
if (!text || typeof text !== "string") {
|
|
8900
|
+
return [];
|
|
8901
|
+
}
|
|
8902
|
+
let processed = this.options.lowercase ? text.toLowerCase() : text;
|
|
8903
|
+
const words = processed.split(/[^\p{L}\p{N}]+/u).filter((w) => w.length > 0);
|
|
8904
|
+
const tokens = [];
|
|
8905
|
+
for (const word of words) {
|
|
8906
|
+
if (word.length < this.options.minLength) {
|
|
8907
|
+
continue;
|
|
8908
|
+
}
|
|
8909
|
+
if (this.options.stopwords.has(word)) {
|
|
8910
|
+
continue;
|
|
8911
|
+
}
|
|
8912
|
+
const stemmed = this.options.stemmer(word);
|
|
8913
|
+
if (stemmed.length < this.options.minLength) {
|
|
8914
|
+
continue;
|
|
8915
|
+
}
|
|
8916
|
+
if (stemmed.length > this.options.maxLength) {
|
|
8917
|
+
continue;
|
|
8918
|
+
}
|
|
8919
|
+
tokens.push(stemmed);
|
|
8920
|
+
}
|
|
8921
|
+
return tokens;
|
|
8922
|
+
}
|
|
8923
|
+
};
|
|
8924
|
+
|
|
8925
|
+
// src/fts/BM25InvertedIndex.ts
|
|
8926
|
+
var BM25InvertedIndex = class {
|
|
8927
|
+
constructor() {
|
|
8928
|
+
this.index = /* @__PURE__ */ new Map();
|
|
8929
|
+
this.docLengths = /* @__PURE__ */ new Map();
|
|
8930
|
+
this.docTerms = /* @__PURE__ */ new Map();
|
|
8931
|
+
this.idfCache = /* @__PURE__ */ new Map();
|
|
8932
|
+
this.totalDocs = 0;
|
|
8933
|
+
this.avgDocLength = 0;
|
|
8934
|
+
}
|
|
8935
|
+
/**
|
|
8936
|
+
* Add a document to the index.
|
|
8937
|
+
*
|
|
8938
|
+
* @param docId - Unique document identifier
|
|
8939
|
+
* @param tokens - Array of tokens (already tokenized/stemmed)
|
|
8940
|
+
*/
|
|
8941
|
+
addDocument(docId, tokens) {
|
|
8942
|
+
const termFreqs = /* @__PURE__ */ new Map();
|
|
8943
|
+
const uniqueTerms = /* @__PURE__ */ new Set();
|
|
8944
|
+
for (const token of tokens) {
|
|
8945
|
+
termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
|
|
8946
|
+
uniqueTerms.add(token);
|
|
8947
|
+
}
|
|
8948
|
+
for (const [term, freq] of termFreqs) {
|
|
8949
|
+
if (!this.index.has(term)) {
|
|
8950
|
+
this.index.set(term, []);
|
|
8951
|
+
}
|
|
8952
|
+
this.index.get(term).push({
|
|
8953
|
+
docId,
|
|
8954
|
+
termFrequency: freq
|
|
8955
|
+
});
|
|
8956
|
+
}
|
|
8957
|
+
this.docLengths.set(docId, tokens.length);
|
|
8958
|
+
this.docTerms.set(docId, uniqueTerms);
|
|
8959
|
+
this.totalDocs++;
|
|
8960
|
+
this.updateAvgDocLength();
|
|
8961
|
+
this.idfCache.clear();
|
|
8962
|
+
}
|
|
8963
|
+
/**
|
|
8964
|
+
* Remove a document from the index.
|
|
8965
|
+
*
|
|
8966
|
+
* @param docId - Document identifier to remove
|
|
8967
|
+
*/
|
|
8968
|
+
removeDocument(docId) {
|
|
8969
|
+
const terms = this.docTerms.get(docId);
|
|
8970
|
+
if (!terms) {
|
|
8971
|
+
return;
|
|
8972
|
+
}
|
|
8973
|
+
for (const term of terms) {
|
|
8974
|
+
const termInfos = this.index.get(term);
|
|
8975
|
+
if (termInfos) {
|
|
8976
|
+
const filtered = termInfos.filter((info) => info.docId !== docId);
|
|
8977
|
+
if (filtered.length === 0) {
|
|
8978
|
+
this.index.delete(term);
|
|
8979
|
+
} else {
|
|
8980
|
+
this.index.set(term, filtered);
|
|
8981
|
+
}
|
|
8982
|
+
}
|
|
8983
|
+
}
|
|
8984
|
+
this.docLengths.delete(docId);
|
|
8985
|
+
this.docTerms.delete(docId);
|
|
8986
|
+
this.totalDocs--;
|
|
8987
|
+
this.updateAvgDocLength();
|
|
8988
|
+
this.idfCache.clear();
|
|
8989
|
+
}
|
|
8990
|
+
/**
|
|
8991
|
+
* Get all documents containing a term.
|
|
8992
|
+
*
|
|
8993
|
+
* @param term - Term to look up
|
|
8994
|
+
* @returns Array of TermInfo objects
|
|
8995
|
+
*/
|
|
8996
|
+
getDocumentsForTerm(term) {
|
|
8997
|
+
return this.index.get(term) || [];
|
|
8998
|
+
}
|
|
8999
|
+
/**
|
|
9000
|
+
* Calculate IDF (Inverse Document Frequency) for a term.
|
|
9001
|
+
*
|
|
9002
|
+
* Uses BM25 IDF formula:
|
|
9003
|
+
* IDF = log((N - df + 0.5) / (df + 0.5) + 1)
|
|
9004
|
+
*
|
|
9005
|
+
* Where:
|
|
9006
|
+
* - N = total documents
|
|
9007
|
+
* - df = document frequency (docs containing term)
|
|
9008
|
+
*
|
|
9009
|
+
* @param term - Term to calculate IDF for
|
|
9010
|
+
* @returns IDF value (0 if term doesn't exist)
|
|
9011
|
+
*/
|
|
9012
|
+
getIDF(term) {
|
|
9013
|
+
if (this.idfCache.has(term)) {
|
|
9014
|
+
return this.idfCache.get(term);
|
|
9015
|
+
}
|
|
9016
|
+
const termInfos = this.index.get(term);
|
|
9017
|
+
if (!termInfos || termInfos.length === 0) {
|
|
9018
|
+
return 0;
|
|
9019
|
+
}
|
|
9020
|
+
const docFreq = termInfos.length;
|
|
9021
|
+
const idf = Math.log((this.totalDocs - docFreq + 0.5) / (docFreq + 0.5) + 1);
|
|
9022
|
+
this.idfCache.set(term, idf);
|
|
9023
|
+
return idf;
|
|
9024
|
+
}
|
|
9025
|
+
/**
|
|
9026
|
+
* Get the length of a document (number of tokens).
|
|
9027
|
+
*
|
|
9028
|
+
* @param docId - Document identifier
|
|
9029
|
+
* @returns Document length (0 if not found)
|
|
9030
|
+
*/
|
|
9031
|
+
getDocLength(docId) {
|
|
9032
|
+
return this.docLengths.get(docId) || 0;
|
|
9033
|
+
}
|
|
9034
|
+
/**
|
|
9035
|
+
* Get the average document length.
|
|
9036
|
+
*
|
|
9037
|
+
* @returns Average length across all documents
|
|
9038
|
+
*/
|
|
9039
|
+
getAvgDocLength() {
|
|
9040
|
+
return this.avgDocLength;
|
|
9041
|
+
}
|
|
9042
|
+
/**
|
|
9043
|
+
* Get the total number of documents in the index.
|
|
9044
|
+
*
|
|
9045
|
+
* @returns Total document count
|
|
9046
|
+
*/
|
|
9047
|
+
getTotalDocs() {
|
|
9048
|
+
return this.totalDocs;
|
|
9049
|
+
}
|
|
9050
|
+
/**
|
|
9051
|
+
* Get iterator for document lengths (useful for serialization).
|
|
9052
|
+
*
|
|
9053
|
+
* @returns Iterator of [docId, length] pairs
|
|
9054
|
+
*/
|
|
9055
|
+
getDocLengths() {
|
|
9056
|
+
return this.docLengths.entries();
|
|
9057
|
+
}
|
|
9058
|
+
/**
|
|
9059
|
+
* Get the number of documents in the index (alias for getTotalDocs).
|
|
9060
|
+
*
|
|
9061
|
+
* @returns Number of indexed documents
|
|
9062
|
+
*/
|
|
9063
|
+
getSize() {
|
|
9064
|
+
return this.totalDocs;
|
|
9065
|
+
}
|
|
9066
|
+
/**
|
|
9067
|
+
* Clear all data from the index.
|
|
9068
|
+
*/
|
|
9069
|
+
clear() {
|
|
9070
|
+
this.index.clear();
|
|
9071
|
+
this.docLengths.clear();
|
|
9072
|
+
this.docTerms.clear();
|
|
9073
|
+
this.idfCache.clear();
|
|
9074
|
+
this.totalDocs = 0;
|
|
9075
|
+
this.avgDocLength = 0;
|
|
9076
|
+
}
|
|
9077
|
+
/**
|
|
9078
|
+
* Check if a document exists in the index.
|
|
9079
|
+
*
|
|
9080
|
+
* @param docId - Document identifier
|
|
9081
|
+
* @returns True if document exists
|
|
9082
|
+
*/
|
|
9083
|
+
hasDocument(docId) {
|
|
9084
|
+
return this.docTerms.has(docId);
|
|
9085
|
+
}
|
|
9086
|
+
/**
|
|
9087
|
+
* Get all unique terms in the index.
|
|
9088
|
+
*
|
|
9089
|
+
* @returns Iterator of all terms
|
|
9090
|
+
*/
|
|
9091
|
+
getTerms() {
|
|
9092
|
+
return this.index.keys();
|
|
9093
|
+
}
|
|
9094
|
+
/**
|
|
9095
|
+
* Get the number of unique terms in the index.
|
|
9096
|
+
*
|
|
9097
|
+
* @returns Number of unique terms
|
|
9098
|
+
*/
|
|
9099
|
+
getTermCount() {
|
|
9100
|
+
return this.index.size;
|
|
9101
|
+
}
|
|
9102
|
+
/**
|
|
9103
|
+
* Update the average document length after add/remove.
|
|
9104
|
+
*/
|
|
9105
|
+
updateAvgDocLength() {
|
|
9106
|
+
if (this.totalDocs === 0) {
|
|
9107
|
+
this.avgDocLength = 0;
|
|
9108
|
+
return;
|
|
9109
|
+
}
|
|
9110
|
+
let sum = 0;
|
|
9111
|
+
for (const length of this.docLengths.values()) {
|
|
9112
|
+
sum += length;
|
|
9113
|
+
}
|
|
9114
|
+
this.avgDocLength = sum / this.totalDocs;
|
|
9115
|
+
}
|
|
9116
|
+
};
|
|
9117
|
+
|
|
9118
|
+
// src/fts/BM25Scorer.ts
|
|
9119
|
+
var BM25Scorer = class {
|
|
9120
|
+
/**
|
|
9121
|
+
* Create a new BM25 scorer.
|
|
9122
|
+
*
|
|
9123
|
+
* @param options - BM25 configuration options
|
|
9124
|
+
*/
|
|
9125
|
+
constructor(options) {
|
|
9126
|
+
this.k1 = options?.k1 ?? 1.2;
|
|
9127
|
+
this.b = options?.b ?? 0.75;
|
|
9128
|
+
}
|
|
9129
|
+
/**
|
|
9130
|
+
* Score documents against a query.
|
|
9131
|
+
*
|
|
9132
|
+
* @param queryTerms - Array of query terms (already tokenized/stemmed)
|
|
9133
|
+
* @param index - The inverted index to search
|
|
9134
|
+
* @returns Array of scored documents, sorted by relevance (descending)
|
|
9135
|
+
*/
|
|
9136
|
+
score(queryTerms, index) {
|
|
9137
|
+
if (queryTerms.length === 0 || index.getTotalDocs() === 0) {
|
|
9138
|
+
return [];
|
|
9139
|
+
}
|
|
9140
|
+
const avgDocLength = index.getAvgDocLength();
|
|
9141
|
+
const docScores = /* @__PURE__ */ new Map();
|
|
9142
|
+
for (const term of queryTerms) {
|
|
9143
|
+
const idf = index.getIDF(term);
|
|
9144
|
+
if (idf === 0) {
|
|
9145
|
+
continue;
|
|
9146
|
+
}
|
|
9147
|
+
const termInfos = index.getDocumentsForTerm(term);
|
|
9148
|
+
for (const { docId, termFrequency } of termInfos) {
|
|
9149
|
+
const docLength = index.getDocLength(docId);
|
|
9150
|
+
const numerator = termFrequency * (this.k1 + 1);
|
|
9151
|
+
const denominator = termFrequency + this.k1 * (1 - this.b + this.b * (docLength / avgDocLength));
|
|
9152
|
+
const termScore = idf * (numerator / denominator);
|
|
9153
|
+
const current = docScores.get(docId) || { score: 0, terms: /* @__PURE__ */ new Set() };
|
|
9154
|
+
current.score += termScore;
|
|
9155
|
+
current.terms.add(term);
|
|
9156
|
+
docScores.set(docId, current);
|
|
9157
|
+
}
|
|
9158
|
+
}
|
|
9159
|
+
const results = [];
|
|
9160
|
+
for (const [docId, { score, terms }] of docScores) {
|
|
9161
|
+
results.push({
|
|
9162
|
+
docId,
|
|
9163
|
+
score,
|
|
9164
|
+
matchedTerms: Array.from(terms)
|
|
9165
|
+
});
|
|
9166
|
+
}
|
|
9167
|
+
results.sort((a, b) => b.score - a.score);
|
|
9168
|
+
return results;
|
|
9169
|
+
}
|
|
9170
|
+
/**
|
|
9171
|
+
* Score a single document against query terms.
|
|
9172
|
+
* Uses pre-computed IDF from index but calculates TF locally.
|
|
9173
|
+
*
|
|
9174
|
+
* Complexity: O(Q × D) where Q = query terms, D = document tokens
|
|
9175
|
+
*
|
|
9176
|
+
* @param queryTerms - Tokenized query terms
|
|
9177
|
+
* @param docTokens - Tokenized document terms
|
|
9178
|
+
* @param index - Inverted index for IDF and avgDocLength
|
|
9179
|
+
* @returns BM25 score (0 if no matching terms)
|
|
9180
|
+
*/
|
|
9181
|
+
scoreSingleDocument(queryTerms, docTokens, index) {
|
|
9182
|
+
if (queryTerms.length === 0 || docTokens.length === 0) {
|
|
9183
|
+
return 0;
|
|
9184
|
+
}
|
|
9185
|
+
const avgDocLength = index.getAvgDocLength();
|
|
9186
|
+
const docLength = docTokens.length;
|
|
9187
|
+
if (avgDocLength === 0) {
|
|
9188
|
+
return 0;
|
|
9189
|
+
}
|
|
9190
|
+
const termFreqs = /* @__PURE__ */ new Map();
|
|
9191
|
+
for (const token of docTokens) {
|
|
9192
|
+
termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
|
|
9193
|
+
}
|
|
9194
|
+
let score = 0;
|
|
9195
|
+
for (const term of queryTerms) {
|
|
9196
|
+
const tf = termFreqs.get(term) || 0;
|
|
9197
|
+
if (tf === 0) {
|
|
9198
|
+
continue;
|
|
9199
|
+
}
|
|
9200
|
+
const idf = index.getIDF(term);
|
|
9201
|
+
if (idf <= 0) {
|
|
9202
|
+
continue;
|
|
9203
|
+
}
|
|
9204
|
+
const numerator = tf * (this.k1 + 1);
|
|
9205
|
+
const denominator = tf + this.k1 * (1 - this.b + this.b * (docLength / avgDocLength));
|
|
9206
|
+
const termScore = idf * (numerator / denominator);
|
|
9207
|
+
score += termScore;
|
|
9208
|
+
}
|
|
9209
|
+
return score;
|
|
9210
|
+
}
|
|
9211
|
+
/**
|
|
9212
|
+
* Get the k1 parameter value.
|
|
9213
|
+
*/
|
|
9214
|
+
getK1() {
|
|
9215
|
+
return this.k1;
|
|
9216
|
+
}
|
|
9217
|
+
/**
|
|
9218
|
+
* Get the b parameter value.
|
|
9219
|
+
*/
|
|
9220
|
+
getB() {
|
|
9221
|
+
return this.b;
|
|
9222
|
+
}
|
|
9223
|
+
};
|
|
9224
|
+
|
|
9225
|
+
// src/fts/IndexSerializer.ts
|
|
9226
|
+
var IndexSerializer = class {
|
|
9227
|
+
/**
|
|
9228
|
+
* Serialize inverted index to a JSON-serializable object.
|
|
9229
|
+
* Note: In a real app, you might want to encoding this to binary (msgpack) later.
|
|
9230
|
+
*/
|
|
9231
|
+
serialize(index) {
|
|
9232
|
+
const data = {
|
|
9233
|
+
version: 1,
|
|
9234
|
+
metadata: {
|
|
9235
|
+
totalDocs: index.getTotalDocs(),
|
|
9236
|
+
avgDocLength: index.getAvgDocLength(),
|
|
9237
|
+
createdAt: Date.now(),
|
|
9238
|
+
lastModified: Date.now()
|
|
9239
|
+
},
|
|
9240
|
+
terms: this.serializeTerms(index),
|
|
9241
|
+
docLengths: this.serializeDocLengths(index)
|
|
9242
|
+
};
|
|
9243
|
+
return data;
|
|
9244
|
+
}
|
|
9245
|
+
/**
|
|
9246
|
+
* Deserialize from object into a new BM25InvertedIndex.
|
|
9247
|
+
*/
|
|
9248
|
+
deserialize(data) {
|
|
9249
|
+
if (data.version !== 1) {
|
|
9250
|
+
throw new Error(`Unsupported index version: ${data.version}`);
|
|
9251
|
+
}
|
|
9252
|
+
const index = new BM25InvertedIndex();
|
|
9253
|
+
this.loadIntoIndex(index, data);
|
|
9254
|
+
return index;
|
|
9255
|
+
}
|
|
9256
|
+
serializeTerms(index) {
|
|
9257
|
+
const terms = [];
|
|
9258
|
+
const indexMap = index.index;
|
|
9259
|
+
for (const term of index.getTerms()) {
|
|
9260
|
+
const termInfos = index.getDocumentsForTerm(term);
|
|
9261
|
+
terms.push({
|
|
9262
|
+
term,
|
|
9263
|
+
idf: index.getIDF(term),
|
|
9264
|
+
postings: termInfos.map((info) => ({
|
|
9265
|
+
docId: info.docId,
|
|
9266
|
+
termFrequency: info.termFrequency,
|
|
9267
|
+
positions: info.fieldPositions
|
|
9268
|
+
}))
|
|
9269
|
+
});
|
|
9270
|
+
}
|
|
9271
|
+
return terms;
|
|
9272
|
+
}
|
|
9273
|
+
serializeDocLengths(index) {
|
|
9274
|
+
const lengths = {};
|
|
9275
|
+
for (const [docId, length] of index.getDocLengths()) {
|
|
9276
|
+
lengths[docId] = length;
|
|
9277
|
+
}
|
|
9278
|
+
return lengths;
|
|
9279
|
+
}
|
|
9280
|
+
loadIntoIndex(index, data) {
|
|
9281
|
+
const idx = index;
|
|
9282
|
+
idx.totalDocs = data.metadata.totalDocs;
|
|
9283
|
+
idx.avgDocLength = data.metadata.avgDocLength;
|
|
9284
|
+
idx.docLengths = new Map(Object.entries(data.docLengths));
|
|
9285
|
+
for (const { term, idf, postings } of data.terms) {
|
|
9286
|
+
const termInfos = postings.map((p) => ({
|
|
9287
|
+
docId: p.docId,
|
|
9288
|
+
termFrequency: p.termFrequency,
|
|
9289
|
+
fieldPositions: p.positions
|
|
9290
|
+
}));
|
|
9291
|
+
idx.index.set(term, termInfos);
|
|
9292
|
+
idx.idfCache.set(term, idf);
|
|
9293
|
+
for (const info of termInfos) {
|
|
9294
|
+
if (!idx.docTerms.has(info.docId)) {
|
|
9295
|
+
idx.docTerms.set(info.docId, /* @__PURE__ */ new Set());
|
|
9296
|
+
}
|
|
9297
|
+
idx.docTerms.get(info.docId).add(term);
|
|
9298
|
+
}
|
|
9299
|
+
}
|
|
9300
|
+
}
|
|
9301
|
+
};
|
|
9302
|
+
|
|
9303
|
+
// src/fts/FullTextIndex.ts
|
|
9304
|
+
var FullTextIndex = class {
|
|
9305
|
+
/**
|
|
9306
|
+
* Create a new FullTextIndex.
|
|
9307
|
+
*
|
|
9308
|
+
* @param config - Index configuration
|
|
9309
|
+
*/
|
|
9310
|
+
constructor(config) {
|
|
9311
|
+
this.fields = config.fields;
|
|
9312
|
+
this.tokenizer = new BM25Tokenizer(config.tokenizer);
|
|
9313
|
+
this.scorer = new BM25Scorer(config.bm25);
|
|
9314
|
+
this.fieldIndexes = /* @__PURE__ */ new Map();
|
|
9315
|
+
this.combinedIndex = new BM25InvertedIndex();
|
|
9316
|
+
this.indexedDocs = /* @__PURE__ */ new Set();
|
|
9317
|
+
this.serializer = new IndexSerializer();
|
|
9318
|
+
this.documentTokensCache = /* @__PURE__ */ new Map();
|
|
9319
|
+
for (const field of this.fields) {
|
|
9320
|
+
this.fieldIndexes.set(field, new BM25InvertedIndex());
|
|
9321
|
+
}
|
|
9322
|
+
}
|
|
9323
|
+
/**
|
|
9324
|
+
* Index a document (add or update).
|
|
9325
|
+
* Called when a document is set in the CRDT map.
|
|
9326
|
+
*
|
|
9327
|
+
* @param docId - Document identifier
|
|
9328
|
+
* @param document - Document data containing fields to index
|
|
9329
|
+
*/
|
|
9330
|
+
onSet(docId, document) {
|
|
9331
|
+
if (!document || typeof document !== "object") {
|
|
9332
|
+
this.documentTokensCache.delete(docId);
|
|
9333
|
+
return;
|
|
9334
|
+
}
|
|
9335
|
+
if (this.indexedDocs.has(docId)) {
|
|
9336
|
+
this.removeFromIndexes(docId);
|
|
9337
|
+
}
|
|
9338
|
+
const allTokens = [];
|
|
9339
|
+
for (const field of this.fields) {
|
|
9340
|
+
const value = document[field];
|
|
9341
|
+
if (typeof value !== "string") {
|
|
9342
|
+
continue;
|
|
9343
|
+
}
|
|
9344
|
+
const tokens = this.tokenizer.tokenize(value);
|
|
9345
|
+
if (tokens.length > 0) {
|
|
9346
|
+
const fieldIndex = this.fieldIndexes.get(field);
|
|
9347
|
+
fieldIndex.addDocument(docId, tokens);
|
|
9348
|
+
allTokens.push(...tokens);
|
|
9349
|
+
}
|
|
9350
|
+
}
|
|
9351
|
+
if (allTokens.length > 0) {
|
|
9352
|
+
this.combinedIndex.addDocument(docId, allTokens);
|
|
9353
|
+
this.indexedDocs.add(docId);
|
|
9354
|
+
this.documentTokensCache.set(docId, allTokens);
|
|
9355
|
+
} else {
|
|
9356
|
+
this.documentTokensCache.delete(docId);
|
|
9357
|
+
}
|
|
9358
|
+
}
|
|
9359
|
+
/**
|
|
9360
|
+
* Remove a document from the index.
|
|
9361
|
+
* Called when a document is deleted from the CRDT map.
|
|
9362
|
+
*
|
|
9363
|
+
* @param docId - Document identifier to remove
|
|
9364
|
+
*/
|
|
9365
|
+
onRemove(docId) {
|
|
9366
|
+
if (!this.indexedDocs.has(docId)) {
|
|
9367
|
+
return;
|
|
9368
|
+
}
|
|
9369
|
+
this.removeFromIndexes(docId);
|
|
9370
|
+
this.indexedDocs.delete(docId);
|
|
9371
|
+
this.documentTokensCache.delete(docId);
|
|
9372
|
+
}
|
|
9373
|
+
/**
|
|
9374
|
+
* Search the index with a query.
|
|
9375
|
+
*
|
|
9376
|
+
* @param query - Search query text
|
|
9377
|
+
* @param options - Search options (limit, minScore, boost)
|
|
9378
|
+
* @returns Array of search results, sorted by relevance
|
|
9379
|
+
*/
|
|
9380
|
+
search(query, options) {
|
|
9381
|
+
const queryTerms = this.tokenizer.tokenize(query);
|
|
9382
|
+
if (queryTerms.length === 0) {
|
|
9383
|
+
return [];
|
|
9384
|
+
}
|
|
9385
|
+
const boost = options?.boost;
|
|
9386
|
+
let results;
|
|
9387
|
+
if (boost && Object.keys(boost).length > 0) {
|
|
9388
|
+
results = this.searchWithBoost(queryTerms, boost);
|
|
9389
|
+
} else {
|
|
9390
|
+
results = this.scorer.score(queryTerms, this.combinedIndex);
|
|
9391
|
+
}
|
|
9392
|
+
if (options?.minScore !== void 0) {
|
|
9393
|
+
results = results.filter((r) => r.score >= options.minScore);
|
|
9394
|
+
}
|
|
9395
|
+
if (options?.limit !== void 0 && options.limit > 0) {
|
|
9396
|
+
results = results.slice(0, options.limit);
|
|
9397
|
+
}
|
|
9398
|
+
return results.map((r) => ({
|
|
9399
|
+
docId: r.docId,
|
|
9400
|
+
score: r.score,
|
|
9401
|
+
matchedTerms: r.matchedTerms,
|
|
9402
|
+
source: "fulltext"
|
|
9403
|
+
}));
|
|
9404
|
+
}
|
|
9405
|
+
/**
|
|
9406
|
+
* Serialize the index state.
|
|
9407
|
+
*
|
|
9408
|
+
* @returns Serialized index data
|
|
9409
|
+
*/
|
|
9410
|
+
serialize() {
|
|
9411
|
+
return this.serializer.serialize(this.combinedIndex);
|
|
9412
|
+
}
|
|
9413
|
+
/**
|
|
9414
|
+
* Load index from serialized state.
|
|
9415
|
+
*
|
|
9416
|
+
* @param data - Serialized index data
|
|
9417
|
+
*/
|
|
9418
|
+
load(data) {
|
|
9419
|
+
this.combinedIndex = this.serializer.deserialize(data);
|
|
9420
|
+
this.indexedDocs.clear();
|
|
9421
|
+
for (const [docId] of this.combinedIndex.getDocLengths()) {
|
|
9422
|
+
this.indexedDocs.add(docId);
|
|
9423
|
+
}
|
|
9424
|
+
this.fieldIndexes.clear();
|
|
9425
|
+
for (const field of this.fields) {
|
|
9426
|
+
this.fieldIndexes.set(field, new BM25InvertedIndex());
|
|
9427
|
+
}
|
|
9428
|
+
this.documentTokensCache.clear();
|
|
9429
|
+
}
|
|
9430
|
+
/**
|
|
9431
|
+
* Build the index from an array of entries.
|
|
9432
|
+
* Useful for initial bulk loading.
|
|
9433
|
+
*
|
|
9434
|
+
* @param entries - Array of [docId, document] tuples
|
|
9435
|
+
*/
|
|
9436
|
+
buildFromEntries(entries) {
|
|
9437
|
+
for (const [docId, document] of entries) {
|
|
9438
|
+
this.onSet(docId, document);
|
|
9439
|
+
}
|
|
9440
|
+
}
|
|
9441
|
+
/**
|
|
9442
|
+
* Clear all data from the index.
|
|
9443
|
+
*/
|
|
9444
|
+
clear() {
|
|
9445
|
+
this.combinedIndex.clear();
|
|
9446
|
+
for (const fieldIndex of this.fieldIndexes.values()) {
|
|
9447
|
+
fieldIndex.clear();
|
|
9448
|
+
}
|
|
9449
|
+
this.indexedDocs.clear();
|
|
9450
|
+
this.documentTokensCache.clear();
|
|
9451
|
+
}
|
|
9452
|
+
/**
|
|
9453
|
+
* Get the number of indexed documents.
|
|
9454
|
+
*
|
|
9455
|
+
* @returns Number of documents in the index
|
|
9456
|
+
*/
|
|
9457
|
+
getSize() {
|
|
9458
|
+
return this.indexedDocs.size;
|
|
9459
|
+
}
|
|
9460
|
+
/**
|
|
9461
|
+
* Tokenize a query string using the index's tokenizer.
|
|
9462
|
+
* Public method for external use (e.g., SearchCoordinator).
|
|
9463
|
+
*
|
|
9464
|
+
* @param query - Query text to tokenize
|
|
9465
|
+
* @returns Array of tokenized terms
|
|
9466
|
+
*/
|
|
9467
|
+
tokenizeQuery(query) {
|
|
9468
|
+
return this.tokenizer.tokenize(query);
|
|
9469
|
+
}
|
|
9470
|
+
/**
|
|
9471
|
+
* Score a single document against query terms.
|
|
9472
|
+
* O(Q × D) complexity where Q = query terms, D = document tokens.
|
|
9473
|
+
*
|
|
9474
|
+
* This method is optimized for checking if a single document
|
|
9475
|
+
* matches a query, avoiding full index scan.
|
|
9476
|
+
*
|
|
9477
|
+
* @param docId - Document ID to score
|
|
9478
|
+
* @param queryTerms - Pre-tokenized query terms
|
|
9479
|
+
* @param document - Optional document data (used if not in cache)
|
|
9480
|
+
* @returns SearchResult with score and matched terms, or null if no match
|
|
9481
|
+
*/
|
|
9482
|
+
scoreSingleDocument(docId, queryTerms, document) {
|
|
9483
|
+
if (queryTerms.length === 0) {
|
|
9484
|
+
return null;
|
|
9485
|
+
}
|
|
9486
|
+
let docTokens = this.documentTokensCache.get(docId);
|
|
9487
|
+
if (!docTokens && document) {
|
|
9488
|
+
docTokens = this.tokenizeDocument(document);
|
|
9489
|
+
}
|
|
9490
|
+
if (!docTokens || docTokens.length === 0) {
|
|
9491
|
+
return null;
|
|
9492
|
+
}
|
|
9493
|
+
const docTokenSet = new Set(docTokens);
|
|
9494
|
+
const matchedTerms = queryTerms.filter((term) => docTokenSet.has(term));
|
|
9495
|
+
if (matchedTerms.length === 0) {
|
|
9496
|
+
return null;
|
|
9497
|
+
}
|
|
9498
|
+
const score = this.scorer.scoreSingleDocument(
|
|
9499
|
+
queryTerms,
|
|
9500
|
+
docTokens,
|
|
9501
|
+
this.combinedIndex
|
|
9502
|
+
);
|
|
9503
|
+
if (score <= 0) {
|
|
9504
|
+
return null;
|
|
9505
|
+
}
|
|
9506
|
+
return {
|
|
9507
|
+
docId,
|
|
9508
|
+
score,
|
|
9509
|
+
matchedTerms,
|
|
9510
|
+
source: "fulltext"
|
|
9511
|
+
};
|
|
9512
|
+
}
|
|
9513
|
+
/**
|
|
9514
|
+
* Tokenize all indexed fields of a document.
|
|
9515
|
+
* Internal helper for scoreSingleDocument when document not in cache.
|
|
9516
|
+
*
|
|
9517
|
+
* @param document - Document data
|
|
9518
|
+
* @returns Array of all tokens from indexed fields
|
|
9519
|
+
*/
|
|
9520
|
+
tokenizeDocument(document) {
|
|
9521
|
+
const allTokens = [];
|
|
9522
|
+
for (const field of this.fields) {
|
|
9523
|
+
const value = document[field];
|
|
9524
|
+
if (typeof value === "string") {
|
|
9525
|
+
const tokens = this.tokenizer.tokenize(value);
|
|
9526
|
+
allTokens.push(...tokens);
|
|
9527
|
+
}
|
|
9528
|
+
}
|
|
9529
|
+
return allTokens;
|
|
9530
|
+
}
|
|
9531
|
+
/**
|
|
9532
|
+
* Get the index name (for debugging/display).
|
|
9533
|
+
*
|
|
9534
|
+
* @returns Descriptive name including indexed fields
|
|
9535
|
+
*/
|
|
9536
|
+
get name() {
|
|
9537
|
+
return `FullTextIndex(${this.fields.join(", ")})`;
|
|
9538
|
+
}
|
|
9539
|
+
/**
|
|
9540
|
+
* Remove document from all indexes (internal).
|
|
9541
|
+
*/
|
|
9542
|
+
removeFromIndexes(docId) {
|
|
9543
|
+
this.combinedIndex.removeDocument(docId);
|
|
9544
|
+
for (const fieldIndex of this.fieldIndexes.values()) {
|
|
9545
|
+
fieldIndex.removeDocument(docId);
|
|
9546
|
+
}
|
|
9547
|
+
}
|
|
9548
|
+
/**
|
|
9549
|
+
* Search with field boosting.
|
|
9550
|
+
* Scores are computed per-field and combined with boost weights.
|
|
9551
|
+
*/
|
|
9552
|
+
searchWithBoost(queryTerms, boost) {
|
|
9553
|
+
const docScores = /* @__PURE__ */ new Map();
|
|
9554
|
+
for (const field of this.fields) {
|
|
9555
|
+
const fieldIndex = this.fieldIndexes.get(field);
|
|
9556
|
+
const boostWeight = boost[field] ?? 1;
|
|
9557
|
+
const fieldResults = this.scorer.score(queryTerms, fieldIndex);
|
|
9558
|
+
for (const result of fieldResults) {
|
|
9559
|
+
const current = docScores.get(result.docId) || {
|
|
9560
|
+
score: 0,
|
|
9561
|
+
terms: /* @__PURE__ */ new Set()
|
|
9562
|
+
};
|
|
9563
|
+
current.score += result.score * boostWeight;
|
|
9564
|
+
for (const term of result.matchedTerms) {
|
|
9565
|
+
current.terms.add(term);
|
|
9566
|
+
}
|
|
9567
|
+
docScores.set(result.docId, current);
|
|
9568
|
+
}
|
|
9569
|
+
}
|
|
9570
|
+
const results = [];
|
|
9571
|
+
for (const [docId, { score, terms }] of docScores) {
|
|
9572
|
+
results.push({
|
|
9573
|
+
docId,
|
|
9574
|
+
score,
|
|
9575
|
+
matchedTerms: Array.from(terms)
|
|
9576
|
+
});
|
|
9577
|
+
}
|
|
9578
|
+
results.sort((a, b) => b.score - a.score);
|
|
9579
|
+
return results;
|
|
9580
|
+
}
|
|
9581
|
+
};
|
|
9582
|
+
|
|
8452
9583
|
// src/IndexedORMap.ts
|
|
8453
9584
|
var IndexedORMap = class extends ORMap {
|
|
8454
9585
|
constructor(hlc, options = {}) {
|
|
8455
9586
|
super(hlc);
|
|
9587
|
+
// Full-Text Search (Phase 11)
|
|
9588
|
+
this.fullTextIndex = null;
|
|
8456
9589
|
this.options = options;
|
|
8457
9590
|
this.indexRegistry = new IndexRegistry();
|
|
8458
9591
|
this.queryOptimizer = new QueryOptimizer({
|
|
@@ -8533,6 +9666,104 @@ var IndexedORMap = class extends ORMap {
|
|
|
8533
9666
|
this.indexRegistry.addIndex(index);
|
|
8534
9667
|
this.buildIndexFromExisting(index);
|
|
8535
9668
|
}
|
|
9669
|
+
// ==================== Full-Text Search (Phase 11) ====================
|
|
9670
|
+
/**
|
|
9671
|
+
* Enable BM25-based full-text search on specified fields.
|
|
9672
|
+
* This creates a FullTextIndex for relevance-ranked search.
|
|
9673
|
+
*
|
|
9674
|
+
* Note: This is different from addInvertedIndex which provides
|
|
9675
|
+
* boolean matching (contains/containsAll/containsAny). This method
|
|
9676
|
+
* provides BM25 relevance scoring for true full-text search.
|
|
9677
|
+
*
|
|
9678
|
+
* @param config - Full-text index configuration
|
|
9679
|
+
* @returns The created FullTextIndex
|
|
9680
|
+
*
|
|
9681
|
+
* @example
|
|
9682
|
+
* ```typescript
|
|
9683
|
+
* const map = new IndexedORMap(hlc);
|
|
9684
|
+
* map.enableFullTextSearch({
|
|
9685
|
+
* fields: ['title', 'body'],
|
|
9686
|
+
* tokenizer: { minLength: 2 },
|
|
9687
|
+
* bm25: { k1: 1.2, b: 0.75 }
|
|
9688
|
+
* });
|
|
9689
|
+
*
|
|
9690
|
+
* map.add('doc1', { title: 'Hello World', body: 'Test content' });
|
|
9691
|
+
* const results = map.search('hello');
|
|
9692
|
+
* // [{ key: 'doc1', tag: '...', value: {...}, score: 0.5, matchedTerms: ['hello'] }]
|
|
9693
|
+
* ```
|
|
9694
|
+
*/
|
|
9695
|
+
enableFullTextSearch(config) {
|
|
9696
|
+
this.fullTextIndex = new FullTextIndex(config);
|
|
9697
|
+
const snapshot = this.getSnapshot();
|
|
9698
|
+
const entries = [];
|
|
9699
|
+
for (const [key, tagMap] of snapshot.items) {
|
|
9700
|
+
for (const [tag, record] of tagMap) {
|
|
9701
|
+
if (!snapshot.tombstones.has(tag)) {
|
|
9702
|
+
const compositeKey = this.createCompositeKey(key, tag);
|
|
9703
|
+
entries.push([compositeKey, record.value]);
|
|
9704
|
+
}
|
|
9705
|
+
}
|
|
9706
|
+
}
|
|
9707
|
+
this.fullTextIndex.buildFromEntries(entries);
|
|
9708
|
+
return this.fullTextIndex;
|
|
9709
|
+
}
|
|
9710
|
+
/**
|
|
9711
|
+
* Check if full-text search is enabled.
|
|
9712
|
+
*
|
|
9713
|
+
* @returns true if full-text search is enabled
|
|
9714
|
+
*/
|
|
9715
|
+
isFullTextSearchEnabled() {
|
|
9716
|
+
return this.fullTextIndex !== null;
|
|
9717
|
+
}
|
|
9718
|
+
/**
|
|
9719
|
+
* Get the full-text index (if enabled).
|
|
9720
|
+
*
|
|
9721
|
+
* @returns The FullTextIndex or null
|
|
9722
|
+
*/
|
|
9723
|
+
getFullTextIndex() {
|
|
9724
|
+
return this.fullTextIndex;
|
|
9725
|
+
}
|
|
9726
|
+
/**
|
|
9727
|
+
* Perform a BM25-ranked full-text search.
|
|
9728
|
+
* Results are sorted by relevance score (highest first).
|
|
9729
|
+
*
|
|
9730
|
+
* @param query - Search query text
|
|
9731
|
+
* @param options - Search options (limit, minScore, boost)
|
|
9732
|
+
* @returns Array of search results with scores, sorted by relevance
|
|
9733
|
+
*
|
|
9734
|
+
* @throws Error if full-text search is not enabled
|
|
9735
|
+
*/
|
|
9736
|
+
search(query, options) {
|
|
9737
|
+
if (!this.fullTextIndex) {
|
|
9738
|
+
throw new Error("Full-text search is not enabled. Call enableFullTextSearch() first.");
|
|
9739
|
+
}
|
|
9740
|
+
const scoredDocs = this.fullTextIndex.search(query, options);
|
|
9741
|
+
const results = [];
|
|
9742
|
+
for (const { docId: compositeKey, score, matchedTerms } of scoredDocs) {
|
|
9743
|
+
const [key, tag] = this.parseCompositeKey(compositeKey);
|
|
9744
|
+
const records = this.getRecords(key);
|
|
9745
|
+
const record = records.find((r) => r.tag === tag);
|
|
9746
|
+
if (record) {
|
|
9747
|
+
results.push({
|
|
9748
|
+
key,
|
|
9749
|
+
tag,
|
|
9750
|
+
value: record.value,
|
|
9751
|
+
score,
|
|
9752
|
+
matchedTerms: matchedTerms ?? []
|
|
9753
|
+
});
|
|
9754
|
+
}
|
|
9755
|
+
}
|
|
9756
|
+
return results;
|
|
9757
|
+
}
|
|
9758
|
+
/**
|
|
9759
|
+
* Disable full-text search and release the index.
|
|
9760
|
+
*/
|
|
9761
|
+
disableFullTextSearch() {
|
|
9762
|
+
if (this.fullTextIndex) {
|
|
9763
|
+
this.fullTextIndex.clear();
|
|
9764
|
+
this.fullTextIndex = null;
|
|
9765
|
+
}
|
|
9766
|
+
}
|
|
8536
9767
|
/**
|
|
8537
9768
|
* Remove an index.
|
|
8538
9769
|
*
|
|
@@ -8686,6 +9917,9 @@ var IndexedORMap = class extends ORMap {
|
|
|
8686
9917
|
const record = super.add(key, value, ttlMs);
|
|
8687
9918
|
const compositeKey = this.createCompositeKey(key, record.tag);
|
|
8688
9919
|
this.indexRegistry.onRecordAdded(compositeKey, value);
|
|
9920
|
+
if (this.fullTextIndex) {
|
|
9921
|
+
this.fullTextIndex.onSet(compositeKey, value);
|
|
9922
|
+
}
|
|
8689
9923
|
return record;
|
|
8690
9924
|
}
|
|
8691
9925
|
/**
|
|
@@ -8698,6 +9932,9 @@ var IndexedORMap = class extends ORMap {
|
|
|
8698
9932
|
for (const record of matchingRecords) {
|
|
8699
9933
|
const compositeKey = this.createCompositeKey(key, record.tag);
|
|
8700
9934
|
this.indexRegistry.onRecordRemoved(compositeKey, record.value);
|
|
9935
|
+
if (this.fullTextIndex) {
|
|
9936
|
+
this.fullTextIndex.onRemove(compositeKey);
|
|
9937
|
+
}
|
|
8701
9938
|
}
|
|
8702
9939
|
return result;
|
|
8703
9940
|
}
|
|
@@ -8709,6 +9946,9 @@ var IndexedORMap = class extends ORMap {
|
|
|
8709
9946
|
if (applied) {
|
|
8710
9947
|
const compositeKey = this.createCompositeKey(key, record.tag);
|
|
8711
9948
|
this.indexRegistry.onRecordAdded(compositeKey, record.value);
|
|
9949
|
+
if (this.fullTextIndex) {
|
|
9950
|
+
this.fullTextIndex.onSet(compositeKey, record.value);
|
|
9951
|
+
}
|
|
8712
9952
|
}
|
|
8713
9953
|
return applied;
|
|
8714
9954
|
}
|
|
@@ -8731,6 +9971,9 @@ var IndexedORMap = class extends ORMap {
|
|
|
8731
9971
|
if (removedValue !== void 0 && removedKey !== void 0) {
|
|
8732
9972
|
const compositeKey = this.createCompositeKey(removedKey, tag);
|
|
8733
9973
|
this.indexRegistry.onRecordRemoved(compositeKey, removedValue);
|
|
9974
|
+
if (this.fullTextIndex) {
|
|
9975
|
+
this.fullTextIndex.onRemove(compositeKey);
|
|
9976
|
+
}
|
|
8734
9977
|
}
|
|
8735
9978
|
}
|
|
8736
9979
|
/**
|
|
@@ -8739,6 +9982,9 @@ var IndexedORMap = class extends ORMap {
|
|
|
8739
9982
|
clear() {
|
|
8740
9983
|
super.clear();
|
|
8741
9984
|
this.indexRegistry.clear();
|
|
9985
|
+
if (this.fullTextIndex) {
|
|
9986
|
+
this.fullTextIndex.clear();
|
|
9987
|
+
}
|
|
8742
9988
|
}
|
|
8743
9989
|
// ==================== Helper Methods ====================
|
|
8744
9990
|
/**
|
|
@@ -9091,6 +10337,7 @@ var IndexedORMap = class extends ORMap {
|
|
|
9091
10337
|
};
|
|
9092
10338
|
export {
|
|
9093
10339
|
AuthMessageSchema,
|
|
10340
|
+
BM25Scorer,
|
|
9094
10341
|
BatchMessageSchema,
|
|
9095
10342
|
BuiltInProcessors,
|
|
9096
10343
|
BuiltInResolvers,
|
|
@@ -9114,6 +10361,7 @@ export {
|
|
|
9114
10361
|
DEFAULT_RESOLVER_RATE_LIMITS,
|
|
9115
10362
|
DEFAULT_STOP_WORDS,
|
|
9116
10363
|
DEFAULT_WRITE_CONCERN_TIMEOUT,
|
|
10364
|
+
ENGLISH_STOPWORDS,
|
|
9117
10365
|
EntryProcessBatchRequestSchema,
|
|
9118
10366
|
EntryProcessBatchResponseSchema,
|
|
9119
10367
|
EntryProcessKeyResultSchema,
|
|
@@ -9123,8 +10371,11 @@ export {
|
|
|
9123
10371
|
EntryProcessorSchema,
|
|
9124
10372
|
EventJournalImpl,
|
|
9125
10373
|
FORBIDDEN_PATTERNS,
|
|
10374
|
+
BM25InvertedIndex as FTSInvertedIndex,
|
|
10375
|
+
BM25Tokenizer as FTSTokenizer,
|
|
9126
10376
|
FallbackIndex,
|
|
9127
10377
|
FilteringResultSet,
|
|
10378
|
+
FullTextIndex,
|
|
9128
10379
|
HLC,
|
|
9129
10380
|
HashIndex,
|
|
9130
10381
|
IndexRegistry,
|
|
@@ -9191,6 +10442,18 @@ export {
|
|
|
9191
10442
|
RegisterResolverRequestSchema,
|
|
9192
10443
|
RegisterResolverResponseSchema,
|
|
9193
10444
|
Ringbuffer,
|
|
10445
|
+
SearchMessageSchema,
|
|
10446
|
+
SearchOptionsSchema,
|
|
10447
|
+
SearchPayloadSchema,
|
|
10448
|
+
SearchRespMessageSchema,
|
|
10449
|
+
SearchRespPayloadSchema,
|
|
10450
|
+
SearchSubMessageSchema,
|
|
10451
|
+
SearchSubPayloadSchema,
|
|
10452
|
+
SearchUnsubMessageSchema,
|
|
10453
|
+
SearchUnsubPayloadSchema,
|
|
10454
|
+
SearchUpdateMessageSchema,
|
|
10455
|
+
SearchUpdatePayloadSchema,
|
|
10456
|
+
SearchUpdateTypeSchema,
|
|
9194
10457
|
SetResultSet,
|
|
9195
10458
|
SimpleAttribute,
|
|
9196
10459
|
SortedMap,
|
|
@@ -9236,6 +10499,7 @@ export {
|
|
|
9236
10499
|
isUsingNativeHash,
|
|
9237
10500
|
isWriteConcernAchieved,
|
|
9238
10501
|
multiAttribute,
|
|
10502
|
+
porterStem,
|
|
9239
10503
|
resetNativeHash,
|
|
9240
10504
|
serialize,
|
|
9241
10505
|
simpleAttribute,
|