@topgunbuild/core 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1973,6 +1973,95 @@ var Predicates = class {
1973
1973
  static containsAny(attribute, values) {
1974
1974
  return { op: "containsAny", attribute, value: values };
1975
1975
  }
1976
+ // ============== Full-Text Search Predicates (Phase 12) ==============
1977
+ /**
1978
+ * Create a 'match' predicate for full-text search.
1979
+ * Uses BM25 scoring to find relevant documents.
1980
+ *
1981
+ * @param attribute - Field to search in
1982
+ * @param query - Search query string
1983
+ * @param options - Match options (minScore, boost, operator, fuzziness)
1984
+ *
1985
+ * @example
1986
+ * ```typescript
1987
+ * // Simple match
1988
+ * Predicates.match('title', 'machine learning')
1989
+ *
1990
+ * // With options
1991
+ * Predicates.match('body', 'neural networks', { minScore: 1.0, boost: 2.0 })
1992
+ * ```
1993
+ */
1994
+ static match(attribute, query, options) {
1995
+ return { op: "match", attribute, query, matchOptions: options };
1996
+ }
1997
+ /**
1998
+ * Create a 'matchPhrase' predicate for exact phrase matching.
1999
+ * Matches documents containing the exact phrase (words in order).
2000
+ *
2001
+ * @param attribute - Field to search in
2002
+ * @param query - Phrase to match
2003
+ * @param slop - Word distance tolerance (0 = exact, 1 = allow 1 word between)
2004
+ *
2005
+ * @example
2006
+ * ```typescript
2007
+ * // Exact phrase
2008
+ * Predicates.matchPhrase('body', 'machine learning')
2009
+ *
2010
+ * // With slop (allows "machine deep learning")
2011
+ * Predicates.matchPhrase('body', 'machine learning', 1)
2012
+ * ```
2013
+ */
2014
+ static matchPhrase(attribute, query, slop) {
2015
+ return { op: "matchPhrase", attribute, query, slop };
2016
+ }
2017
+ /**
2018
+ * Create a 'matchPrefix' predicate for prefix matching.
2019
+ * Matches documents where field starts with the given prefix.
2020
+ *
2021
+ * @param attribute - Field to search in
2022
+ * @param prefix - Prefix to match
2023
+ * @param maxExpansions - Maximum number of term expansions
2024
+ *
2025
+ * @example
2026
+ * ```typescript
2027
+ * // Match titles starting with "mach"
2028
+ * Predicates.matchPrefix('title', 'mach')
2029
+ *
2030
+ * // Limit expansions for performance
2031
+ * Predicates.matchPrefix('title', 'mach', 50)
2032
+ * ```
2033
+ */
2034
+ static matchPrefix(attribute, prefix, maxExpansions) {
2035
+ return { op: "matchPrefix", attribute, prefix, maxExpansions };
2036
+ }
2037
+ /**
2038
+ * Create a multi-field match predicate.
2039
+ * Searches across multiple fields with optional per-field boosting.
2040
+ *
2041
+ * @param attributes - Fields to search in
2042
+ * @param query - Search query string
2043
+ * @param options - Options including per-field boost factors
2044
+ *
2045
+ * @example
2046
+ * ```typescript
2047
+ * // Search title and body
2048
+ * Predicates.multiMatch(['title', 'body'], 'machine learning')
2049
+ *
2050
+ * // With boosting (title 2x more important)
2051
+ * Predicates.multiMatch(['title', 'body'], 'machine learning', {
2052
+ * boost: { title: 2.0, body: 1.0 }
2053
+ * })
2054
+ * ```
2055
+ */
2056
+ static multiMatch(attributes, query, options) {
2057
+ const children = attributes.map((attr) => ({
2058
+ op: "match",
2059
+ attribute: attr,
2060
+ query,
2061
+ matchOptions: options?.boost?.[attr] ? { boost: options.boost[attr] } : void 0
2062
+ }));
2063
+ return { op: "or", children };
2064
+ }
1976
2065
  };
1977
2066
  function evaluatePredicate(predicate, data) {
1978
2067
  if (!data) return false;
@@ -2416,6 +2505,66 @@ var JournalReadResponseSchema = z3.object({
2416
2505
  events: z3.array(JournalEventDataSchema),
2417
2506
  hasMore: z3.boolean()
2418
2507
  });
2508
+ var SearchOptionsSchema = z3.object({
2509
+ limit: z3.number().optional(),
2510
+ minScore: z3.number().optional(),
2511
+ boost: z3.record(z3.string(), z3.number()).optional()
2512
+ });
2513
+ var SearchPayloadSchema = z3.object({
2514
+ requestId: z3.string(),
2515
+ mapName: z3.string(),
2516
+ query: z3.string(),
2517
+ options: SearchOptionsSchema.optional()
2518
+ });
2519
+ var SearchMessageSchema = z3.object({
2520
+ type: z3.literal("SEARCH"),
2521
+ payload: SearchPayloadSchema
2522
+ });
2523
+ var SearchRespPayloadSchema = z3.object({
2524
+ requestId: z3.string(),
2525
+ results: z3.array(z3.object({
2526
+ key: z3.string(),
2527
+ value: z3.unknown(),
2528
+ score: z3.number(),
2529
+ matchedTerms: z3.array(z3.string())
2530
+ })),
2531
+ totalCount: z3.number(),
2532
+ error: z3.string().optional()
2533
+ });
2534
+ var SearchRespMessageSchema = z3.object({
2535
+ type: z3.literal("SEARCH_RESP"),
2536
+ payload: SearchRespPayloadSchema
2537
+ });
2538
+ var SearchUpdateTypeSchema = z3.enum(["ENTER", "UPDATE", "LEAVE"]);
2539
+ var SearchSubPayloadSchema = z3.object({
2540
+ subscriptionId: z3.string(),
2541
+ mapName: z3.string(),
2542
+ query: z3.string(),
2543
+ options: SearchOptionsSchema.optional()
2544
+ });
2545
+ var SearchSubMessageSchema = z3.object({
2546
+ type: z3.literal("SEARCH_SUB"),
2547
+ payload: SearchSubPayloadSchema
2548
+ });
2549
+ var SearchUpdatePayloadSchema = z3.object({
2550
+ subscriptionId: z3.string(),
2551
+ key: z3.string(),
2552
+ value: z3.unknown(),
2553
+ score: z3.number(),
2554
+ matchedTerms: z3.array(z3.string()),
2555
+ type: SearchUpdateTypeSchema
2556
+ });
2557
+ var SearchUpdateMessageSchema = z3.object({
2558
+ type: z3.literal("SEARCH_UPDATE"),
2559
+ payload: SearchUpdatePayloadSchema
2560
+ });
2561
+ var SearchUnsubPayloadSchema = z3.object({
2562
+ subscriptionId: z3.string()
2563
+ });
2564
+ var SearchUnsubMessageSchema = z3.object({
2565
+ type: z3.literal("SEARCH_UNSUB"),
2566
+ payload: SearchUnsubPayloadSchema
2567
+ });
2419
2568
  var ConflictResolverSchema = z3.object({
2420
2569
  name: z3.string().min(1).max(100),
2421
2570
  code: z3.string().max(5e4),
@@ -2547,7 +2696,14 @@ var MessageSchema = z3.discriminatedUnion("type", [
2547
2696
  UnregisterResolverResponseSchema,
2548
2697
  MergeRejectedMessageSchema,
2549
2698
  ListResolversRequestSchema,
2550
- ListResolversResponseSchema
2699
+ ListResolversResponseSchema,
2700
+ // Phase 11.1: Full-Text Search
2701
+ SearchMessageSchema,
2702
+ SearchRespMessageSchema,
2703
+ // Phase 11.1b: Live Search Subscriptions
2704
+ SearchSubMessageSchema,
2705
+ SearchUpdateMessageSchema,
2706
+ SearchUnsubMessageSchema
2551
2707
  ]);
2552
2708
 
2553
2709
  // src/types/WriteConcern.ts
@@ -3528,6 +3684,9 @@ function isSimpleQuery(query) {
3528
3684
  function isLogicalQuery(query) {
3529
3685
  return query.type === "and" || query.type === "or" || query.type === "not";
3530
3686
  }
3687
+ function isFTSQuery(query) {
3688
+ return query.type === "match" || query.type === "matchPhrase" || query.type === "matchPrefix";
3689
+ }
3531
3690
 
3532
3691
  // src/query/indexes/StandingQueryIndex.ts
3533
3692
  var _StandingQueryIndex = class _StandingQueryIndex {
@@ -5786,11 +5945,48 @@ var QueryOptimizer = class {
5786
5945
  if ("indexRegistry" in indexRegistryOrOptions) {
5787
5946
  this.indexRegistry = indexRegistryOrOptions.indexRegistry;
5788
5947
  this.standingQueryRegistry = indexRegistryOrOptions.standingQueryRegistry;
5948
+ this.fullTextIndexes = indexRegistryOrOptions.fullTextIndexes ?? /* @__PURE__ */ new Map();
5789
5949
  } else {
5790
5950
  this.indexRegistry = indexRegistryOrOptions;
5791
5951
  this.standingQueryRegistry = standingQueryRegistry;
5952
+ this.fullTextIndexes = /* @__PURE__ */ new Map();
5792
5953
  }
5793
5954
  }
5955
+ /**
5956
+ * Register a full-text index for a field (Phase 12).
5957
+ *
5958
+ * @param field - Field name
5959
+ * @param index - FullTextIndex instance
5960
+ */
5961
+ registerFullTextIndex(field, index) {
5962
+ this.fullTextIndexes.set(field, index);
5963
+ }
5964
+ /**
5965
+ * Unregister a full-text index (Phase 12).
5966
+ *
5967
+ * @param field - Field name
5968
+ */
5969
+ unregisterFullTextIndex(field) {
5970
+ this.fullTextIndexes.delete(field);
5971
+ }
5972
+ /**
5973
+ * Get registered full-text index for a field (Phase 12).
5974
+ *
5975
+ * @param field - Field name
5976
+ * @returns FullTextIndex or undefined
5977
+ */
5978
+ getFullTextIndex(field) {
5979
+ return this.fullTextIndexes.get(field);
5980
+ }
5981
+ /**
5982
+ * Check if a full-text index exists for a field (Phase 12).
5983
+ *
5984
+ * @param field - Field name
5985
+ * @returns True if FTS index exists
5986
+ */
5987
+ hasFullTextIndex(field) {
5988
+ return this.fullTextIndexes.has(field);
5989
+ }
5794
5990
  /**
5795
5991
  * Optimize a query and return an execution plan.
5796
5992
  *
@@ -5864,12 +6060,151 @@ var QueryOptimizer = class {
5864
6060
  optimizeNode(query) {
5865
6061
  if (isLogicalQuery(query)) {
5866
6062
  return this.optimizeLogical(query);
6063
+ } else if (isFTSQuery(query)) {
6064
+ return this.optimizeFTS(query);
5867
6065
  } else if (isSimpleQuery(query)) {
5868
6066
  return this.optimizeSimple(query);
5869
6067
  } else {
5870
6068
  return { type: "full-scan", predicate: query };
5871
6069
  }
5872
6070
  }
6071
+ /**
6072
+ * Optimize a full-text search query (Phase 12).
6073
+ */
6074
+ optimizeFTS(query) {
6075
+ const field = query.attribute;
6076
+ if (!this.hasFullTextIndex(field)) {
6077
+ return { type: "full-scan", predicate: query };
6078
+ }
6079
+ return this.buildFTSScanStep(query);
6080
+ }
6081
+ /**
6082
+ * Build an FTS scan step from a query node (Phase 12).
6083
+ */
6084
+ buildFTSScanStep(query) {
6085
+ const field = query.attribute;
6086
+ switch (query.type) {
6087
+ case "match":
6088
+ return {
6089
+ type: "fts-scan",
6090
+ field,
6091
+ query: query.query,
6092
+ ftsType: "match",
6093
+ options: query.options,
6094
+ returnsScored: true,
6095
+ estimatedCost: this.estimateFTSCost(field)
6096
+ };
6097
+ case "matchPhrase":
6098
+ return {
6099
+ type: "fts-scan",
6100
+ field,
6101
+ query: query.query,
6102
+ ftsType: "matchPhrase",
6103
+ options: query.slop !== void 0 ? { fuzziness: query.slop } : void 0,
6104
+ returnsScored: true,
6105
+ estimatedCost: this.estimateFTSCost(field)
6106
+ };
6107
+ case "matchPrefix":
6108
+ return {
6109
+ type: "fts-scan",
6110
+ field,
6111
+ query: query.prefix,
6112
+ ftsType: "matchPrefix",
6113
+ options: query.maxExpansions !== void 0 ? { fuzziness: query.maxExpansions } : void 0,
6114
+ returnsScored: true,
6115
+ estimatedCost: this.estimateFTSCost(field)
6116
+ };
6117
+ default:
6118
+ throw new Error(`Unknown FTS query type: ${query.type}`);
6119
+ }
6120
+ }
6121
+ /**
6122
+ * Estimate cost of FTS query based on index size (Phase 12).
6123
+ */
6124
+ estimateFTSCost(field) {
6125
+ const index = this.fullTextIndexes.get(field);
6126
+ if (!index) {
6127
+ return Number.MAX_SAFE_INTEGER;
6128
+ }
6129
+ const docCount = index.getSize();
6130
+ return 50 + Math.log2(docCount + 1) * 10;
6131
+ }
6132
+ /**
6133
+ * Classify predicates by type for hybrid query planning (Phase 12).
6134
+ *
6135
+ * @param predicates - Array of predicates to classify
6136
+ * @returns Classified predicates
6137
+ */
6138
+ classifyPredicates(predicates) {
6139
+ const result = {
6140
+ exactPredicates: [],
6141
+ rangePredicates: [],
6142
+ ftsPredicates: [],
6143
+ otherPredicates: []
6144
+ };
6145
+ for (const pred of predicates) {
6146
+ if (isFTSQuery(pred)) {
6147
+ result.ftsPredicates.push(pred);
6148
+ } else if (isSimpleQuery(pred)) {
6149
+ switch (pred.type) {
6150
+ case "eq":
6151
+ case "neq":
6152
+ case "in":
6153
+ result.exactPredicates.push(pred);
6154
+ break;
6155
+ case "gt":
6156
+ case "gte":
6157
+ case "lt":
6158
+ case "lte":
6159
+ case "between":
6160
+ result.rangePredicates.push(pred);
6161
+ break;
6162
+ default:
6163
+ result.otherPredicates.push(pred);
6164
+ }
6165
+ } else if (isLogicalQuery(pred)) {
6166
+ result.otherPredicates.push(pred);
6167
+ } else {
6168
+ result.otherPredicates.push(pred);
6169
+ }
6170
+ }
6171
+ return result;
6172
+ }
6173
+ /**
6174
+ * Determine fusion strategy based on step types (Phase 12).
6175
+ *
6176
+ * Strategy selection:
6177
+ * - All binary (exact/range with no scores) → 'intersection'
6178
+ * - All scored (FTS) → 'score-filter' (filter by score, sort by score)
6179
+ * - Mixed (binary + scored) → 'rrf' (Reciprocal Rank Fusion)
6180
+ *
6181
+ * @param steps - Plan steps to fuse
6182
+ * @returns Fusion strategy
6183
+ */
6184
+ determineFusionStrategy(steps) {
6185
+ const hasScored = steps.some((s) => this.stepReturnsScored(s));
6186
+ const hasBinary = steps.some((s) => !this.stepReturnsScored(s));
6187
+ if (hasScored && hasBinary) {
6188
+ return "rrf";
6189
+ } else if (hasScored) {
6190
+ return "score-filter";
6191
+ } else {
6192
+ return "intersection";
6193
+ }
6194
+ }
6195
+ /**
6196
+ * Check if a plan step returns scored results (Phase 12).
6197
+ */
6198
+ stepReturnsScored(step) {
6199
+ switch (step.type) {
6200
+ case "fts-scan":
6201
+ return true;
6202
+ case "fusion":
6203
+ return step.returnsScored;
6204
+ default:
6205
+ return false;
6206
+ }
6207
+ }
5873
6208
  /**
5874
6209
  * Optimize a simple (attribute-based) query.
5875
6210
  */
@@ -6125,6 +6460,18 @@ var QueryOptimizer = class {
6125
6460
  return this.estimateCost(step.source) + 10;
6126
6461
  case "not":
6127
6462
  return this.estimateCost(step.source) + 100;
6463
+ // Phase 12: FTS step types
6464
+ case "fts-scan":
6465
+ return step.estimatedCost;
6466
+ case "fusion":
6467
+ return step.steps.reduce((sum, s) => {
6468
+ const cost = this.estimateCost(s);
6469
+ if (cost === Number.MAX_SAFE_INTEGER) {
6470
+ return Number.MAX_SAFE_INTEGER;
6471
+ }
6472
+ return Math.min(sum + cost, Number.MAX_SAFE_INTEGER);
6473
+ }, 0) + 20;
6474
+ // Fusion overhead
6128
6475
  default:
6129
6476
  return Number.MAX_SAFE_INTEGER;
6130
6477
  }
@@ -6145,6 +6492,12 @@ var QueryOptimizer = class {
6145
6492
  return this.usesIndexes(step.source);
6146
6493
  case "not":
6147
6494
  return this.usesIndexes(step.source);
6495
+ // Phase 12: FTS step types
6496
+ case "fts-scan":
6497
+ return true;
6498
+ // FTS uses FullTextIndex
6499
+ case "fusion":
6500
+ return step.steps.some((s) => this.usesIndexes(s));
6148
6501
  default:
6149
6502
  return false;
6150
6503
  }
@@ -7693,6 +8046,131 @@ var DefaultIndexingStrategy = class {
7693
8046
  }
7694
8047
  };
7695
8048
 
8049
+ // src/search/ReciprocalRankFusion.ts
8050
+ var ReciprocalRankFusion = class {
8051
+ constructor(config) {
8052
+ this.k = config?.k ?? 60;
8053
+ }
8054
+ /**
8055
+ * Merge multiple ranked result lists using RRF.
8056
+ *
8057
+ * Formula: RRF_score(d) = Σ 1 / (k + rank_i(d))
8058
+ *
8059
+ * @param resultSets - Array of ranked result lists from different search methods
8060
+ * @returns Merged results sorted by RRF score (descending)
8061
+ */
8062
+ merge(resultSets) {
8063
+ const nonEmptySets = resultSets.filter((set) => set.length > 0);
8064
+ if (nonEmptySets.length === 0) {
8065
+ return [];
8066
+ }
8067
+ const scoreMap = /* @__PURE__ */ new Map();
8068
+ for (const resultSet of nonEmptySets) {
8069
+ for (let rank = 0; rank < resultSet.length; rank++) {
8070
+ const result = resultSet[rank];
8071
+ const { docId, score, source } = result;
8072
+ const rrfContribution = 1 / (this.k + rank + 1);
8073
+ const existing = scoreMap.get(docId);
8074
+ if (existing) {
8075
+ existing.rrfScore += rrfContribution;
8076
+ existing.sources.add(source);
8077
+ existing.originalScores[source] = score;
8078
+ } else {
8079
+ scoreMap.set(docId, {
8080
+ rrfScore: rrfContribution,
8081
+ sources: /* @__PURE__ */ new Set([source]),
8082
+ originalScores: { [source]: score }
8083
+ });
8084
+ }
8085
+ }
8086
+ }
8087
+ const merged = [];
8088
+ for (const [docId, data] of scoreMap) {
8089
+ merged.push({
8090
+ docId,
8091
+ score: data.rrfScore,
8092
+ source: Array.from(data.sources).sort().join("+"),
8093
+ originalScores: data.originalScores
8094
+ });
8095
+ }
8096
+ merged.sort((a, b) => b.score - a.score);
8097
+ return merged;
8098
+ }
8099
+ /**
8100
+ * Merge with weighted RRF for different method priorities.
8101
+ *
8102
+ * Weighted formula: RRF_score(d) = Σ weight_i * (1 / (k + rank_i(d)))
8103
+ *
8104
+ * @param resultSets - Array of ranked result lists
8105
+ * @param weights - Weights for each result set (same order as resultSets)
8106
+ * @returns Merged results sorted by weighted RRF score (descending)
8107
+ *
8108
+ * @example
8109
+ * ```typescript
8110
+ * const rrf = new ReciprocalRankFusion();
8111
+ *
8112
+ * // Prioritize exact matches (weight 2.0) over FTS (weight 1.0)
8113
+ * const merged = rrf.mergeWeighted(
8114
+ * [exactResults, ftsResults],
8115
+ * [2.0, 1.0]
8116
+ * );
8117
+ * ```
8118
+ */
8119
+ mergeWeighted(resultSets, weights) {
8120
+ if (weights.length !== resultSets.length) {
8121
+ throw new Error(
8122
+ `Weights array length (${weights.length}) must match resultSets length (${resultSets.length})`
8123
+ );
8124
+ }
8125
+ const nonEmptyPairs = [];
8126
+ for (let i = 0; i < resultSets.length; i++) {
8127
+ if (resultSets[i].length > 0) {
8128
+ nonEmptyPairs.push({ resultSet: resultSets[i], weight: weights[i] });
8129
+ }
8130
+ }
8131
+ if (nonEmptyPairs.length === 0) {
8132
+ return [];
8133
+ }
8134
+ const scoreMap = /* @__PURE__ */ new Map();
8135
+ for (const { resultSet, weight } of nonEmptyPairs) {
8136
+ for (let rank = 0; rank < resultSet.length; rank++) {
8137
+ const result = resultSet[rank];
8138
+ const { docId, score, source } = result;
8139
+ const rrfContribution = weight * (1 / (this.k + rank + 1));
8140
+ const existing = scoreMap.get(docId);
8141
+ if (existing) {
8142
+ existing.rrfScore += rrfContribution;
8143
+ existing.sources.add(source);
8144
+ existing.originalScores[source] = score;
8145
+ } else {
8146
+ scoreMap.set(docId, {
8147
+ rrfScore: rrfContribution,
8148
+ sources: /* @__PURE__ */ new Set([source]),
8149
+ originalScores: { [source]: score }
8150
+ });
8151
+ }
8152
+ }
8153
+ }
8154
+ const merged = [];
8155
+ for (const [docId, data] of scoreMap) {
8156
+ merged.push({
8157
+ docId,
8158
+ score: data.rrfScore,
8159
+ source: Array.from(data.sources).sort().join("+"),
8160
+ originalScores: data.originalScores
8161
+ });
8162
+ }
8163
+ merged.sort((a, b) => b.score - a.score);
8164
+ return merged;
8165
+ }
8166
+ /**
8167
+ * Get the k constant used for RRF calculation.
8168
+ */
8169
+ getK() {
8170
+ return this.k;
8171
+ }
8172
+ };
8173
+
7696
8174
  // src/IndexedLWWMap.ts
7697
8175
  var IndexedLWWMap = class extends LWWMap {
7698
8176
  constructor(hlc, options = {}) {
@@ -8449,49 +8927,1115 @@ var IndexedLWWMap = class extends LWWMap {
8449
8927
  }
8450
8928
  };
8451
8929
 
8452
- // src/IndexedORMap.ts
8453
- var IndexedORMap = class extends ORMap {
8454
- constructor(hlc, options = {}) {
8455
- super(hlc);
8456
- this.options = options;
8457
- this.indexRegistry = new IndexRegistry();
8458
- this.queryOptimizer = new QueryOptimizer({
8459
- indexRegistry: this.indexRegistry
8460
- });
8461
- this.indexRegistry.setFallbackIndex(
8462
- new FallbackIndex(
8463
- () => this.getAllCompositeKeys(),
8464
- (compositeKey) => this.getRecordByCompositeKey(compositeKey),
8465
- (record, query) => this.matchesIndexQuery(record, query)
8466
- )
8467
- );
8468
- this.queryTracker = new QueryPatternTracker();
8469
- this.indexAdvisor = new IndexAdvisor(this.queryTracker);
8470
- if (options.adaptiveIndexing?.autoIndex?.enabled) {
8471
- this.autoIndexManager = new AutoIndexManager(
8472
- this.queryTracker,
8473
- this.indexAdvisor,
8474
- options.adaptiveIndexing.autoIndex
8475
- );
8476
- this.autoIndexManager.setMap(this);
8477
- } else {
8478
- this.autoIndexManager = null;
8930
+ // src/query/tokenization/stopwords.ts
8931
+ var ENGLISH_STOPWORDS = /* @__PURE__ */ new Set([
8932
+ // Articles
8933
+ "a",
8934
+ "an",
8935
+ "the",
8936
+ // Pronouns
8937
+ "i",
8938
+ "me",
8939
+ "my",
8940
+ "myself",
8941
+ "we",
8942
+ "our",
8943
+ "ours",
8944
+ "ourselves",
8945
+ "you",
8946
+ "your",
8947
+ "yours",
8948
+ "yourself",
8949
+ "yourselves",
8950
+ "he",
8951
+ "him",
8952
+ "his",
8953
+ "himself",
8954
+ "she",
8955
+ "her",
8956
+ "hers",
8957
+ "herself",
8958
+ "it",
8959
+ "its",
8960
+ "itself",
8961
+ "they",
8962
+ "them",
8963
+ "their",
8964
+ "theirs",
8965
+ "themselves",
8966
+ "what",
8967
+ "which",
8968
+ "who",
8969
+ "whom",
8970
+ "this",
8971
+ "that",
8972
+ "these",
8973
+ "those",
8974
+ // Auxiliary verbs
8975
+ "am",
8976
+ "is",
8977
+ "are",
8978
+ "was",
8979
+ "were",
8980
+ "be",
8981
+ "been",
8982
+ "being",
8983
+ "have",
8984
+ "has",
8985
+ "had",
8986
+ "having",
8987
+ "do",
8988
+ "does",
8989
+ "did",
8990
+ "doing",
8991
+ "will",
8992
+ "would",
8993
+ "shall",
8994
+ "should",
8995
+ "can",
8996
+ "could",
8997
+ "may",
8998
+ "might",
8999
+ "must",
9000
+ "ought",
9001
+ // Prepositions
9002
+ "about",
9003
+ "above",
9004
+ "across",
9005
+ "after",
9006
+ "against",
9007
+ "along",
9008
+ "among",
9009
+ "around",
9010
+ "at",
9011
+ "before",
9012
+ "behind",
9013
+ "below",
9014
+ "beneath",
9015
+ "beside",
9016
+ "between",
9017
+ "beyond",
9018
+ "by",
9019
+ "down",
9020
+ "during",
9021
+ "except",
9022
+ "for",
9023
+ "from",
9024
+ "in",
9025
+ "inside",
9026
+ "into",
9027
+ "near",
9028
+ "of",
9029
+ "off",
9030
+ "on",
9031
+ "onto",
9032
+ "out",
9033
+ "outside",
9034
+ "over",
9035
+ "past",
9036
+ "since",
9037
+ "through",
9038
+ "throughout",
9039
+ "to",
9040
+ "toward",
9041
+ "towards",
9042
+ "under",
9043
+ "underneath",
9044
+ "until",
9045
+ "up",
9046
+ "upon",
9047
+ "with",
9048
+ "within",
9049
+ "without",
9050
+ // Conjunctions
9051
+ "and",
9052
+ "but",
9053
+ "or",
9054
+ "nor",
9055
+ "so",
9056
+ "yet",
9057
+ "both",
9058
+ "either",
9059
+ "neither",
9060
+ "not",
9061
+ "only",
9062
+ "as",
9063
+ "if",
9064
+ "than",
9065
+ "when",
9066
+ "while",
9067
+ "although",
9068
+ "because",
9069
+ "unless",
9070
+ "whether",
9071
+ // Adverbs
9072
+ "here",
9073
+ "there",
9074
+ "where",
9075
+ "when",
9076
+ "how",
9077
+ "why",
9078
+ "all",
9079
+ "each",
9080
+ "every",
9081
+ "any",
9082
+ "some",
9083
+ "no",
9084
+ "none",
9085
+ "more",
9086
+ "most",
9087
+ "other",
9088
+ "such",
9089
+ "own",
9090
+ "same",
9091
+ "too",
9092
+ "very",
9093
+ "just",
9094
+ "also",
9095
+ "now",
9096
+ "then",
9097
+ "again",
9098
+ "ever",
9099
+ "once",
9100
+ // Misc
9101
+ "few",
9102
+ "many",
9103
+ "much",
9104
+ "several",
9105
+ "s",
9106
+ "t",
9107
+ "d",
9108
+ "ll",
9109
+ "m",
9110
+ "ve",
9111
+ "re"
9112
+ ]);
9113
+
9114
+ // src/query/tokenization/porter-stemmer.ts
9115
+ function porterStem(word) {
9116
+ if (!word || word.length < 3) {
9117
+ return word;
9118
+ }
9119
+ let stem = word;
9120
+ if (stem.endsWith("sses")) {
9121
+ stem = stem.slice(0, -2);
9122
+ } else if (stem.endsWith("ies")) {
9123
+ stem = stem.slice(0, -2);
9124
+ } else if (!stem.endsWith("ss") && stem.endsWith("s")) {
9125
+ stem = stem.slice(0, -1);
9126
+ }
9127
+ const step1bRegex = /^(.+?)(eed|ed|ing)$/;
9128
+ const step1bMatch = stem.match(step1bRegex);
9129
+ if (step1bMatch) {
9130
+ const [, base, suffix] = step1bMatch;
9131
+ if (suffix === "eed") {
9132
+ if (getMeasure(base) > 0) {
9133
+ stem = base + "ee";
9134
+ }
9135
+ } else if (hasVowel(base)) {
9136
+ stem = base;
9137
+ if (stem.endsWith("at") || stem.endsWith("bl") || stem.endsWith("iz")) {
9138
+ stem = stem + "e";
9139
+ } else if (endsWithDoubleConsonant(stem) && !stem.match(/[lsz]$/)) {
9140
+ stem = stem.slice(0, -1);
9141
+ } else if (getMeasure(stem) === 1 && endsWithCVC(stem)) {
9142
+ stem = stem + "e";
9143
+ }
9144
+ }
9145
+ }
9146
+ if (stem.endsWith("y") && hasVowel(stem.slice(0, -1))) {
9147
+ stem = stem.slice(0, -1) + "i";
9148
+ }
9149
+ const step2Suffixes = [
9150
+ [/ational$/, "ate", 0],
9151
+ [/tional$/, "tion", 0],
9152
+ [/enci$/, "ence", 0],
9153
+ [/anci$/, "ance", 0],
9154
+ [/izer$/, "ize", 0],
9155
+ [/abli$/, "able", 0],
9156
+ [/alli$/, "al", 0],
9157
+ [/entli$/, "ent", 0],
9158
+ [/eli$/, "e", 0],
9159
+ [/ousli$/, "ous", 0],
9160
+ [/ization$/, "ize", 0],
9161
+ [/ation$/, "ate", 0],
9162
+ [/ator$/, "ate", 0],
9163
+ [/alism$/, "al", 0],
9164
+ [/iveness$/, "ive", 0],
9165
+ [/fulness$/, "ful", 0],
9166
+ [/ousness$/, "ous", 0],
9167
+ [/aliti$/, "al", 0],
9168
+ [/iviti$/, "ive", 0],
9169
+ [/biliti$/, "ble", 0]
9170
+ ];
9171
+ for (const [regex, replacement, minMeasure] of step2Suffixes) {
9172
+ if (regex.test(stem)) {
9173
+ const base = stem.replace(regex, "");
9174
+ if (getMeasure(base) > minMeasure) {
9175
+ stem = base + replacement;
9176
+ break;
9177
+ }
8479
9178
  }
8480
- if (options.defaultIndexing && options.defaultIndexing !== "none") {
8481
- this.defaultIndexingStrategy = new DefaultIndexingStrategy(options.defaultIndexing);
8482
- } else {
8483
- this.defaultIndexingStrategy = null;
9179
+ }
9180
+ const step3Suffixes = [
9181
+ [/icate$/, "ic", 0],
9182
+ [/ative$/, "", 0],
9183
+ [/alize$/, "al", 0],
9184
+ [/iciti$/, "ic", 0],
9185
+ [/ical$/, "ic", 0],
9186
+ [/ful$/, "", 0],
9187
+ [/ness$/, "", 0]
9188
+ ];
9189
+ for (const [regex, replacement, minMeasure] of step3Suffixes) {
9190
+ if (regex.test(stem)) {
9191
+ const base = stem.replace(regex, "");
9192
+ if (getMeasure(base) > minMeasure) {
9193
+ stem = base + replacement;
9194
+ break;
9195
+ }
8484
9196
  }
8485
9197
  }
8486
- // ==================== Index Management ====================
8487
- /**
8488
- * Add a hash index on an attribute.
8489
- *
8490
- * @param attribute - Attribute to index
8491
- * @returns Created HashIndex
8492
- */
8493
- addHashIndex(attribute) {
8494
- const index = new HashIndex(attribute);
9198
+ const step4Suffixes = [
9199
+ [/al$/, 1],
9200
+ [/ance$/, 1],
9201
+ [/ence$/, 1],
9202
+ [/er$/, 1],
9203
+ [/ic$/, 1],
9204
+ [/able$/, 1],
9205
+ [/ible$/, 1],
9206
+ [/ant$/, 1],
9207
+ [/ement$/, 1],
9208
+ [/ment$/, 1],
9209
+ [/ent$/, 1],
9210
+ [/ion$/, 1],
9211
+ [/ou$/, 1],
9212
+ [/ism$/, 1],
9213
+ [/ate$/, 1],
9214
+ [/iti$/, 1],
9215
+ [/ous$/, 1],
9216
+ [/ive$/, 1],
9217
+ [/ize$/, 1]
9218
+ ];
9219
+ for (const [regex, minMeasure] of step4Suffixes) {
9220
+ if (regex.test(stem)) {
9221
+ const base = stem.replace(regex, "");
9222
+ if (getMeasure(base) > minMeasure) {
9223
+ if (regex.source === "ion$") {
9224
+ if (base.match(/[st]$/)) {
9225
+ stem = base;
9226
+ }
9227
+ } else {
9228
+ stem = base;
9229
+ }
9230
+ break;
9231
+ }
9232
+ }
9233
+ }
9234
+ if (stem.endsWith("e")) {
9235
+ const base = stem.slice(0, -1);
9236
+ const measure = getMeasure(base);
9237
+ if (measure > 1 || measure === 1 && !endsWithCVC(base)) {
9238
+ stem = base;
9239
+ }
9240
+ }
9241
+ if (getMeasure(stem) > 1 && endsWithDoubleConsonant(stem) && stem.endsWith("l")) {
9242
+ stem = stem.slice(0, -1);
9243
+ }
9244
+ return stem;
9245
+ }
9246
+ function isVowel(char, prevChar) {
9247
+ if ("aeiou".includes(char)) {
9248
+ return true;
9249
+ }
9250
+ if (char === "y" && prevChar && !"aeiou".includes(prevChar)) {
9251
+ return true;
9252
+ }
9253
+ return false;
9254
+ }
9255
+ function hasVowel(str) {
9256
+ for (let i = 0; i < str.length; i++) {
9257
+ if (isVowel(str[i], i > 0 ? str[i - 1] : void 0)) {
9258
+ return true;
9259
+ }
9260
+ }
9261
+ return false;
9262
+ }
9263
+ function getMeasure(str) {
9264
+ let pattern = "";
9265
+ for (let i = 0; i < str.length; i++) {
9266
+ pattern += isVowel(str[i], i > 0 ? str[i - 1] : void 0) ? "v" : "c";
9267
+ }
9268
+ const matches = pattern.match(/vc/g);
9269
+ return matches ? matches.length : 0;
9270
+ }
9271
+ function endsWithDoubleConsonant(str) {
9272
+ if (str.length < 2) return false;
9273
+ const last = str[str.length - 1];
9274
+ const secondLast = str[str.length - 2];
9275
+ return last === secondLast && !"aeiou".includes(last);
9276
+ }
9277
+ function endsWithCVC(str) {
9278
+ if (str.length < 3) return false;
9279
+ const last3 = str.slice(-3);
9280
+ const c1 = !"aeiou".includes(last3[0]);
9281
+ const v = isVowel(last3[1], last3[0]);
9282
+ const c2 = !"aeiou".includes(last3[2]) && !"wxy".includes(last3[2]);
9283
+ return c1 && v && c2;
9284
+ }
9285
+
9286
+ // src/fts/Tokenizer.ts
9287
+ var BM25Tokenizer = class {
9288
+ /**
9289
+ * Create a new BM25Tokenizer.
9290
+ *
9291
+ * @param options - Configuration options
9292
+ */
9293
+ constructor(options) {
9294
+ this.options = {
9295
+ lowercase: true,
9296
+ stopwords: ENGLISH_STOPWORDS,
9297
+ stemmer: porterStem,
9298
+ minLength: 2,
9299
+ maxLength: 40,
9300
+ ...options
9301
+ };
9302
+ }
9303
+ /**
9304
+ * Tokenize text into an array of normalized tokens.
9305
+ *
9306
+ * @param text - Text to tokenize
9307
+ * @returns Array of tokens
9308
+ */
9309
+ tokenize(text) {
9310
+ if (!text || typeof text !== "string") {
9311
+ return [];
9312
+ }
9313
+ let processed = this.options.lowercase ? text.toLowerCase() : text;
9314
+ const words = processed.split(/[^\p{L}\p{N}]+/u).filter((w) => w.length > 0);
9315
+ const tokens = [];
9316
+ for (const word of words) {
9317
+ if (word.length < this.options.minLength) {
9318
+ continue;
9319
+ }
9320
+ if (this.options.stopwords.has(word)) {
9321
+ continue;
9322
+ }
9323
+ const stemmed = this.options.stemmer(word);
9324
+ if (stemmed.length < this.options.minLength) {
9325
+ continue;
9326
+ }
9327
+ if (stemmed.length > this.options.maxLength) {
9328
+ continue;
9329
+ }
9330
+ tokens.push(stemmed);
9331
+ }
9332
+ return tokens;
9333
+ }
9334
+ };
9335
+
9336
+ // src/fts/BM25InvertedIndex.ts
9337
+ var BM25InvertedIndex = class {
9338
+ constructor() {
9339
+ this.index = /* @__PURE__ */ new Map();
9340
+ this.docLengths = /* @__PURE__ */ new Map();
9341
+ this.docTerms = /* @__PURE__ */ new Map();
9342
+ this.idfCache = /* @__PURE__ */ new Map();
9343
+ this.totalDocs = 0;
9344
+ this.avgDocLength = 0;
9345
+ }
9346
+ /**
9347
+ * Add a document to the index.
9348
+ *
9349
+ * @param docId - Unique document identifier
9350
+ * @param tokens - Array of tokens (already tokenized/stemmed)
9351
+ */
9352
+ addDocument(docId, tokens) {
9353
+ const termFreqs = /* @__PURE__ */ new Map();
9354
+ const uniqueTerms = /* @__PURE__ */ new Set();
9355
+ for (const token of tokens) {
9356
+ termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
9357
+ uniqueTerms.add(token);
9358
+ }
9359
+ for (const [term, freq] of termFreqs) {
9360
+ if (!this.index.has(term)) {
9361
+ this.index.set(term, []);
9362
+ }
9363
+ this.index.get(term).push({
9364
+ docId,
9365
+ termFrequency: freq
9366
+ });
9367
+ }
9368
+ this.docLengths.set(docId, tokens.length);
9369
+ this.docTerms.set(docId, uniqueTerms);
9370
+ this.totalDocs++;
9371
+ this.updateAvgDocLength();
9372
+ this.idfCache.clear();
9373
+ }
9374
+ /**
9375
+ * Remove a document from the index.
9376
+ *
9377
+ * @param docId - Document identifier to remove
9378
+ */
9379
+ removeDocument(docId) {
9380
+ const terms = this.docTerms.get(docId);
9381
+ if (!terms) {
9382
+ return;
9383
+ }
9384
+ for (const term of terms) {
9385
+ const termInfos = this.index.get(term);
9386
+ if (termInfos) {
9387
+ const filtered = termInfos.filter((info) => info.docId !== docId);
9388
+ if (filtered.length === 0) {
9389
+ this.index.delete(term);
9390
+ } else {
9391
+ this.index.set(term, filtered);
9392
+ }
9393
+ }
9394
+ }
9395
+ this.docLengths.delete(docId);
9396
+ this.docTerms.delete(docId);
9397
+ this.totalDocs--;
9398
+ this.updateAvgDocLength();
9399
+ this.idfCache.clear();
9400
+ }
9401
+ /**
9402
+ * Get all documents containing a term.
9403
+ *
9404
+ * @param term - Term to look up
9405
+ * @returns Array of TermInfo objects
9406
+ */
9407
+ getDocumentsForTerm(term) {
9408
+ return this.index.get(term) || [];
9409
+ }
9410
+ /**
9411
+ * Calculate IDF (Inverse Document Frequency) for a term.
9412
+ *
9413
+ * Uses BM25 IDF formula:
9414
+ * IDF = log((N - df + 0.5) / (df + 0.5) + 1)
9415
+ *
9416
+ * Where:
9417
+ * - N = total documents
9418
+ * - df = document frequency (docs containing term)
9419
+ *
9420
+ * @param term - Term to calculate IDF for
9421
+ * @returns IDF value (0 if term doesn't exist)
9422
+ */
9423
+ getIDF(term) {
9424
+ if (this.idfCache.has(term)) {
9425
+ return this.idfCache.get(term);
9426
+ }
9427
+ const termInfos = this.index.get(term);
9428
+ if (!termInfos || termInfos.length === 0) {
9429
+ return 0;
9430
+ }
9431
+ const docFreq = termInfos.length;
9432
+ const idf = Math.log((this.totalDocs - docFreq + 0.5) / (docFreq + 0.5) + 1);
9433
+ this.idfCache.set(term, idf);
9434
+ return idf;
9435
+ }
9436
+ /**
9437
+ * Get the length of a document (number of tokens).
9438
+ *
9439
+ * @param docId - Document identifier
9440
+ * @returns Document length (0 if not found)
9441
+ */
9442
+ getDocLength(docId) {
9443
+ return this.docLengths.get(docId) || 0;
9444
+ }
9445
+ /**
9446
+ * Get the average document length.
9447
+ *
9448
+ * @returns Average length across all documents
9449
+ */
9450
+ getAvgDocLength() {
9451
+ return this.avgDocLength;
9452
+ }
9453
+ /**
9454
+ * Get the total number of documents in the index.
9455
+ *
9456
+ * @returns Total document count
9457
+ */
9458
+ getTotalDocs() {
9459
+ return this.totalDocs;
9460
+ }
9461
+ /**
9462
+ * Get iterator for document lengths (useful for serialization).
9463
+ *
9464
+ * @returns Iterator of [docId, length] pairs
9465
+ */
9466
+ getDocLengths() {
9467
+ return this.docLengths.entries();
9468
+ }
9469
+ /**
9470
+ * Get the number of documents in the index (alias for getTotalDocs).
9471
+ *
9472
+ * @returns Number of indexed documents
9473
+ */
9474
+ getSize() {
9475
+ return this.totalDocs;
9476
+ }
9477
+ /**
9478
+ * Clear all data from the index.
9479
+ */
9480
+ clear() {
9481
+ this.index.clear();
9482
+ this.docLengths.clear();
9483
+ this.docTerms.clear();
9484
+ this.idfCache.clear();
9485
+ this.totalDocs = 0;
9486
+ this.avgDocLength = 0;
9487
+ }
9488
+ /**
9489
+ * Check if a document exists in the index.
9490
+ *
9491
+ * @param docId - Document identifier
9492
+ * @returns True if document exists
9493
+ */
9494
+ hasDocument(docId) {
9495
+ return this.docTerms.has(docId);
9496
+ }
9497
+ /**
9498
+ * Get all unique terms in the index.
9499
+ *
9500
+ * @returns Iterator of all terms
9501
+ */
9502
+ getTerms() {
9503
+ return this.index.keys();
9504
+ }
9505
+ /**
9506
+ * Get the number of unique terms in the index.
9507
+ *
9508
+ * @returns Number of unique terms
9509
+ */
9510
+ getTermCount() {
9511
+ return this.index.size;
9512
+ }
9513
+ /**
9514
+ * Update the average document length after add/remove.
9515
+ */
9516
+ updateAvgDocLength() {
9517
+ if (this.totalDocs === 0) {
9518
+ this.avgDocLength = 0;
9519
+ return;
9520
+ }
9521
+ let sum = 0;
9522
+ for (const length of this.docLengths.values()) {
9523
+ sum += length;
9524
+ }
9525
+ this.avgDocLength = sum / this.totalDocs;
9526
+ }
9527
+ };
9528
+
9529
+ // src/fts/BM25Scorer.ts
9530
+ var BM25Scorer = class {
9531
+ /**
9532
+ * Create a new BM25 scorer.
9533
+ *
9534
+ * @param options - BM25 configuration options
9535
+ */
9536
+ constructor(options) {
9537
+ this.k1 = options?.k1 ?? 1.2;
9538
+ this.b = options?.b ?? 0.75;
9539
+ }
9540
+ /**
9541
+ * Score documents against a query.
9542
+ *
9543
+ * @param queryTerms - Array of query terms (already tokenized/stemmed)
9544
+ * @param index - The inverted index to search
9545
+ * @returns Array of scored documents, sorted by relevance (descending)
9546
+ */
9547
+ score(queryTerms, index) {
9548
+ if (queryTerms.length === 0 || index.getTotalDocs() === 0) {
9549
+ return [];
9550
+ }
9551
+ const avgDocLength = index.getAvgDocLength();
9552
+ const docScores = /* @__PURE__ */ new Map();
9553
+ for (const term of queryTerms) {
9554
+ const idf = index.getIDF(term);
9555
+ if (idf === 0) {
9556
+ continue;
9557
+ }
9558
+ const termInfos = index.getDocumentsForTerm(term);
9559
+ for (const { docId, termFrequency } of termInfos) {
9560
+ const docLength = index.getDocLength(docId);
9561
+ const numerator = termFrequency * (this.k1 + 1);
9562
+ const denominator = termFrequency + this.k1 * (1 - this.b + this.b * (docLength / avgDocLength));
9563
+ const termScore = idf * (numerator / denominator);
9564
+ const current = docScores.get(docId) || { score: 0, terms: /* @__PURE__ */ new Set() };
9565
+ current.score += termScore;
9566
+ current.terms.add(term);
9567
+ docScores.set(docId, current);
9568
+ }
9569
+ }
9570
+ const results = [];
9571
+ for (const [docId, { score, terms }] of docScores) {
9572
+ results.push({
9573
+ docId,
9574
+ score,
9575
+ matchedTerms: Array.from(terms)
9576
+ });
9577
+ }
9578
+ results.sort((a, b) => b.score - a.score);
9579
+ return results;
9580
+ }
9581
+ /**
9582
+ * Score a single document against query terms.
9583
+ * Uses pre-computed IDF from index but calculates TF locally.
9584
+ *
9585
+ * Complexity: O(Q × D) where Q = query terms, D = document tokens
9586
+ *
9587
+ * @param queryTerms - Tokenized query terms
9588
+ * @param docTokens - Tokenized document terms
9589
+ * @param index - Inverted index for IDF and avgDocLength
9590
+ * @returns BM25 score (0 if no matching terms)
9591
+ */
9592
+ scoreSingleDocument(queryTerms, docTokens, index) {
9593
+ if (queryTerms.length === 0 || docTokens.length === 0) {
9594
+ return 0;
9595
+ }
9596
+ const avgDocLength = index.getAvgDocLength();
9597
+ const docLength = docTokens.length;
9598
+ if (avgDocLength === 0) {
9599
+ return 0;
9600
+ }
9601
+ const termFreqs = /* @__PURE__ */ new Map();
9602
+ for (const token of docTokens) {
9603
+ termFreqs.set(token, (termFreqs.get(token) || 0) + 1);
9604
+ }
9605
+ let score = 0;
9606
+ for (const term of queryTerms) {
9607
+ const tf = termFreqs.get(term) || 0;
9608
+ if (tf === 0) {
9609
+ continue;
9610
+ }
9611
+ const idf = index.getIDF(term);
9612
+ if (idf <= 0) {
9613
+ continue;
9614
+ }
9615
+ const numerator = tf * (this.k1 + 1);
9616
+ const denominator = tf + this.k1 * (1 - this.b + this.b * (docLength / avgDocLength));
9617
+ const termScore = idf * (numerator / denominator);
9618
+ score += termScore;
9619
+ }
9620
+ return score;
9621
+ }
9622
+ /**
9623
+ * Get the k1 parameter value.
9624
+ */
9625
+ getK1() {
9626
+ return this.k1;
9627
+ }
9628
+ /**
9629
+ * Get the b parameter value.
9630
+ */
9631
+ getB() {
9632
+ return this.b;
9633
+ }
9634
+ };
9635
+
9636
+ // src/fts/IndexSerializer.ts
9637
+ var IndexSerializer = class {
9638
+ /**
9639
+ * Serialize inverted index to a JSON-serializable object.
9640
+ * Note: In a real app, you might want to encoding this to binary (msgpack) later.
9641
+ */
9642
+ serialize(index) {
9643
+ const data = {
9644
+ version: 1,
9645
+ metadata: {
9646
+ totalDocs: index.getTotalDocs(),
9647
+ avgDocLength: index.getAvgDocLength(),
9648
+ createdAt: Date.now(),
9649
+ lastModified: Date.now()
9650
+ },
9651
+ terms: this.serializeTerms(index),
9652
+ docLengths: this.serializeDocLengths(index)
9653
+ };
9654
+ return data;
9655
+ }
9656
+ /**
9657
+ * Deserialize from object into a new BM25InvertedIndex.
9658
+ */
9659
+ deserialize(data) {
9660
+ if (data.version !== 1) {
9661
+ throw new Error(`Unsupported index version: ${data.version}`);
9662
+ }
9663
+ const index = new BM25InvertedIndex();
9664
+ this.loadIntoIndex(index, data);
9665
+ return index;
9666
+ }
9667
+ serializeTerms(index) {
9668
+ const terms = [];
9669
+ const indexMap = index.index;
9670
+ for (const term of index.getTerms()) {
9671
+ const termInfos = index.getDocumentsForTerm(term);
9672
+ terms.push({
9673
+ term,
9674
+ idf: index.getIDF(term),
9675
+ postings: termInfos.map((info) => ({
9676
+ docId: info.docId,
9677
+ termFrequency: info.termFrequency,
9678
+ positions: info.fieldPositions
9679
+ }))
9680
+ });
9681
+ }
9682
+ return terms;
9683
+ }
9684
+ serializeDocLengths(index) {
9685
+ const lengths = {};
9686
+ for (const [docId, length] of index.getDocLengths()) {
9687
+ lengths[docId] = length;
9688
+ }
9689
+ return lengths;
9690
+ }
9691
+ loadIntoIndex(index, data) {
9692
+ const idx = index;
9693
+ idx.totalDocs = data.metadata.totalDocs;
9694
+ idx.avgDocLength = data.metadata.avgDocLength;
9695
+ idx.docLengths = new Map(Object.entries(data.docLengths));
9696
+ for (const { term, idf, postings } of data.terms) {
9697
+ const termInfos = postings.map((p) => ({
9698
+ docId: p.docId,
9699
+ termFrequency: p.termFrequency,
9700
+ fieldPositions: p.positions
9701
+ }));
9702
+ idx.index.set(term, termInfos);
9703
+ idx.idfCache.set(term, idf);
9704
+ for (const info of termInfos) {
9705
+ if (!idx.docTerms.has(info.docId)) {
9706
+ idx.docTerms.set(info.docId, /* @__PURE__ */ new Set());
9707
+ }
9708
+ idx.docTerms.get(info.docId).add(term);
9709
+ }
9710
+ }
9711
+ }
9712
+ };
9713
+
9714
+ // src/fts/FullTextIndex.ts
9715
+ var FullTextIndex = class {
9716
+ /**
9717
+ * Create a new FullTextIndex.
9718
+ *
9719
+ * @param config - Index configuration
9720
+ */
9721
+ constructor(config) {
9722
+ this.fields = config.fields;
9723
+ this.tokenizer = new BM25Tokenizer(config.tokenizer);
9724
+ this.scorer = new BM25Scorer(config.bm25);
9725
+ this.fieldIndexes = /* @__PURE__ */ new Map();
9726
+ this.combinedIndex = new BM25InvertedIndex();
9727
+ this.indexedDocs = /* @__PURE__ */ new Set();
9728
+ this.serializer = new IndexSerializer();
9729
+ this.documentTokensCache = /* @__PURE__ */ new Map();
9730
+ for (const field of this.fields) {
9731
+ this.fieldIndexes.set(field, new BM25InvertedIndex());
9732
+ }
9733
+ }
9734
+ /**
9735
+ * Index a document (add or update).
9736
+ * Called when a document is set in the CRDT map.
9737
+ *
9738
+ * @param docId - Document identifier
9739
+ * @param document - Document data containing fields to index
9740
+ */
9741
+ onSet(docId, document) {
9742
+ if (!document || typeof document !== "object") {
9743
+ this.documentTokensCache.delete(docId);
9744
+ return;
9745
+ }
9746
+ if (this.indexedDocs.has(docId)) {
9747
+ this.removeFromIndexes(docId);
9748
+ }
9749
+ const allTokens = [];
9750
+ for (const field of this.fields) {
9751
+ const value = document[field];
9752
+ if (typeof value !== "string") {
9753
+ continue;
9754
+ }
9755
+ const tokens = this.tokenizer.tokenize(value);
9756
+ if (tokens.length > 0) {
9757
+ const fieldIndex = this.fieldIndexes.get(field);
9758
+ fieldIndex.addDocument(docId, tokens);
9759
+ allTokens.push(...tokens);
9760
+ }
9761
+ }
9762
+ if (allTokens.length > 0) {
9763
+ this.combinedIndex.addDocument(docId, allTokens);
9764
+ this.indexedDocs.add(docId);
9765
+ this.documentTokensCache.set(docId, allTokens);
9766
+ } else {
9767
+ this.documentTokensCache.delete(docId);
9768
+ }
9769
+ }
9770
+ /**
9771
+ * Remove a document from the index.
9772
+ * Called when a document is deleted from the CRDT map.
9773
+ *
9774
+ * @param docId - Document identifier to remove
9775
+ */
9776
+ onRemove(docId) {
9777
+ if (!this.indexedDocs.has(docId)) {
9778
+ return;
9779
+ }
9780
+ this.removeFromIndexes(docId);
9781
+ this.indexedDocs.delete(docId);
9782
+ this.documentTokensCache.delete(docId);
9783
+ }
9784
+ /**
9785
+ * Search the index with a query.
9786
+ *
9787
+ * @param query - Search query text
9788
+ * @param options - Search options (limit, minScore, boost)
9789
+ * @returns Array of search results, sorted by relevance
9790
+ */
9791
+ search(query, options) {
9792
+ const queryTerms = this.tokenizer.tokenize(query);
9793
+ if (queryTerms.length === 0) {
9794
+ return [];
9795
+ }
9796
+ const boost = options?.boost;
9797
+ let results;
9798
+ if (boost && Object.keys(boost).length > 0) {
9799
+ results = this.searchWithBoost(queryTerms, boost);
9800
+ } else {
9801
+ results = this.scorer.score(queryTerms, this.combinedIndex);
9802
+ }
9803
+ if (options?.minScore !== void 0) {
9804
+ results = results.filter((r) => r.score >= options.minScore);
9805
+ }
9806
+ if (options?.limit !== void 0 && options.limit > 0) {
9807
+ results = results.slice(0, options.limit);
9808
+ }
9809
+ return results.map((r) => ({
9810
+ docId: r.docId,
9811
+ score: r.score,
9812
+ matchedTerms: r.matchedTerms,
9813
+ source: "fulltext"
9814
+ }));
9815
+ }
9816
+ /**
9817
+ * Serialize the index state.
9818
+ *
9819
+ * @returns Serialized index data
9820
+ */
9821
+ serialize() {
9822
+ return this.serializer.serialize(this.combinedIndex);
9823
+ }
9824
+ /**
9825
+ * Load index from serialized state.
9826
+ *
9827
+ * @param data - Serialized index data
9828
+ */
9829
+ load(data) {
9830
+ this.combinedIndex = this.serializer.deserialize(data);
9831
+ this.indexedDocs.clear();
9832
+ for (const [docId] of this.combinedIndex.getDocLengths()) {
9833
+ this.indexedDocs.add(docId);
9834
+ }
9835
+ this.fieldIndexes.clear();
9836
+ for (const field of this.fields) {
9837
+ this.fieldIndexes.set(field, new BM25InvertedIndex());
9838
+ }
9839
+ this.documentTokensCache.clear();
9840
+ }
9841
+ /**
9842
+ * Build the index from an array of entries.
9843
+ * Useful for initial bulk loading.
9844
+ *
9845
+ * @param entries - Array of [docId, document] tuples
9846
+ */
9847
+ buildFromEntries(entries) {
9848
+ for (const [docId, document] of entries) {
9849
+ this.onSet(docId, document);
9850
+ }
9851
+ }
9852
+ /**
9853
+ * Clear all data from the index.
9854
+ */
9855
+ clear() {
9856
+ this.combinedIndex.clear();
9857
+ for (const fieldIndex of this.fieldIndexes.values()) {
9858
+ fieldIndex.clear();
9859
+ }
9860
+ this.indexedDocs.clear();
9861
+ this.documentTokensCache.clear();
9862
+ }
9863
+ /**
9864
+ * Get the number of indexed documents.
9865
+ *
9866
+ * @returns Number of documents in the index
9867
+ */
9868
+ getSize() {
9869
+ return this.indexedDocs.size;
9870
+ }
9871
+ /**
9872
+ * Tokenize a query string using the index's tokenizer.
9873
+ * Public method for external use (e.g., SearchCoordinator).
9874
+ *
9875
+ * @param query - Query text to tokenize
9876
+ * @returns Array of tokenized terms
9877
+ */
9878
+ tokenizeQuery(query) {
9879
+ return this.tokenizer.tokenize(query);
9880
+ }
9881
+ /**
9882
+ * Score a single document against query terms.
9883
+ * O(Q × D) complexity where Q = query terms, D = document tokens.
9884
+ *
9885
+ * This method is optimized for checking if a single document
9886
+ * matches a query, avoiding full index scan.
9887
+ *
9888
+ * @param docId - Document ID to score
9889
+ * @param queryTerms - Pre-tokenized query terms
9890
+ * @param document - Optional document data (used if not in cache)
9891
+ * @returns SearchResult with score and matched terms, or null if no match
9892
+ */
9893
+ scoreSingleDocument(docId, queryTerms, document) {
9894
+ if (queryTerms.length === 0) {
9895
+ return null;
9896
+ }
9897
+ let docTokens = this.documentTokensCache.get(docId);
9898
+ if (!docTokens && document) {
9899
+ docTokens = this.tokenizeDocument(document);
9900
+ }
9901
+ if (!docTokens || docTokens.length === 0) {
9902
+ return null;
9903
+ }
9904
+ const docTokenSet = new Set(docTokens);
9905
+ const matchedTerms = queryTerms.filter((term) => docTokenSet.has(term));
9906
+ if (matchedTerms.length === 0) {
9907
+ return null;
9908
+ }
9909
+ const score = this.scorer.scoreSingleDocument(
9910
+ queryTerms,
9911
+ docTokens,
9912
+ this.combinedIndex
9913
+ );
9914
+ if (score <= 0) {
9915
+ return null;
9916
+ }
9917
+ return {
9918
+ docId,
9919
+ score,
9920
+ matchedTerms,
9921
+ source: "fulltext"
9922
+ };
9923
+ }
9924
+ /**
9925
+ * Tokenize all indexed fields of a document.
9926
+ * Internal helper for scoreSingleDocument when document not in cache.
9927
+ *
9928
+ * @param document - Document data
9929
+ * @returns Array of all tokens from indexed fields
9930
+ */
9931
+ tokenizeDocument(document) {
9932
+ const allTokens = [];
9933
+ for (const field of this.fields) {
9934
+ const value = document[field];
9935
+ if (typeof value === "string") {
9936
+ const tokens = this.tokenizer.tokenize(value);
9937
+ allTokens.push(...tokens);
9938
+ }
9939
+ }
9940
+ return allTokens;
9941
+ }
9942
+ /**
9943
+ * Get the index name (for debugging/display).
9944
+ *
9945
+ * @returns Descriptive name including indexed fields
9946
+ */
9947
+ get name() {
9948
+ return `FullTextIndex(${this.fields.join(", ")})`;
9949
+ }
9950
+ /**
9951
+ * Remove document from all indexes (internal).
9952
+ */
9953
+ removeFromIndexes(docId) {
9954
+ this.combinedIndex.removeDocument(docId);
9955
+ for (const fieldIndex of this.fieldIndexes.values()) {
9956
+ fieldIndex.removeDocument(docId);
9957
+ }
9958
+ }
9959
+ /**
9960
+ * Search with field boosting.
9961
+ * Scores are computed per-field and combined with boost weights.
9962
+ */
9963
+ searchWithBoost(queryTerms, boost) {
9964
+ const docScores = /* @__PURE__ */ new Map();
9965
+ for (const field of this.fields) {
9966
+ const fieldIndex = this.fieldIndexes.get(field);
9967
+ const boostWeight = boost[field] ?? 1;
9968
+ const fieldResults = this.scorer.score(queryTerms, fieldIndex);
9969
+ for (const result of fieldResults) {
9970
+ const current = docScores.get(result.docId) || {
9971
+ score: 0,
9972
+ terms: /* @__PURE__ */ new Set()
9973
+ };
9974
+ current.score += result.score * boostWeight;
9975
+ for (const term of result.matchedTerms) {
9976
+ current.terms.add(term);
9977
+ }
9978
+ docScores.set(result.docId, current);
9979
+ }
9980
+ }
9981
+ const results = [];
9982
+ for (const [docId, { score, terms }] of docScores) {
9983
+ results.push({
9984
+ docId,
9985
+ score,
9986
+ matchedTerms: Array.from(terms)
9987
+ });
9988
+ }
9989
+ results.sort((a, b) => b.score - a.score);
9990
+ return results;
9991
+ }
9992
+ };
9993
+
9994
+ // src/IndexedORMap.ts
9995
+ var IndexedORMap = class extends ORMap {
9996
+ constructor(hlc, options = {}) {
9997
+ super(hlc);
9998
+ // Full-Text Search (Phase 11)
9999
+ this.fullTextIndex = null;
10000
+ this.options = options;
10001
+ this.indexRegistry = new IndexRegistry();
10002
+ this.queryOptimizer = new QueryOptimizer({
10003
+ indexRegistry: this.indexRegistry
10004
+ });
10005
+ this.indexRegistry.setFallbackIndex(
10006
+ new FallbackIndex(
10007
+ () => this.getAllCompositeKeys(),
10008
+ (compositeKey) => this.getRecordByCompositeKey(compositeKey),
10009
+ (record, query) => this.matchesIndexQuery(record, query)
10010
+ )
10011
+ );
10012
+ this.queryTracker = new QueryPatternTracker();
10013
+ this.indexAdvisor = new IndexAdvisor(this.queryTracker);
10014
+ if (options.adaptiveIndexing?.autoIndex?.enabled) {
10015
+ this.autoIndexManager = new AutoIndexManager(
10016
+ this.queryTracker,
10017
+ this.indexAdvisor,
10018
+ options.adaptiveIndexing.autoIndex
10019
+ );
10020
+ this.autoIndexManager.setMap(this);
10021
+ } else {
10022
+ this.autoIndexManager = null;
10023
+ }
10024
+ if (options.defaultIndexing && options.defaultIndexing !== "none") {
10025
+ this.defaultIndexingStrategy = new DefaultIndexingStrategy(options.defaultIndexing);
10026
+ } else {
10027
+ this.defaultIndexingStrategy = null;
10028
+ }
10029
+ }
10030
+ // ==================== Index Management ====================
10031
+ /**
10032
+ * Add a hash index on an attribute.
10033
+ *
10034
+ * @param attribute - Attribute to index
10035
+ * @returns Created HashIndex
10036
+ */
10037
+ addHashIndex(attribute) {
10038
+ const index = new HashIndex(attribute);
8495
10039
  this.indexRegistry.addIndex(index);
8496
10040
  this.buildIndexFromExisting(index);
8497
10041
  return index;
@@ -8533,6 +10077,104 @@ var IndexedORMap = class extends ORMap {
8533
10077
  this.indexRegistry.addIndex(index);
8534
10078
  this.buildIndexFromExisting(index);
8535
10079
  }
10080
+ // ==================== Full-Text Search (Phase 11) ====================
10081
+ /**
10082
+ * Enable BM25-based full-text search on specified fields.
10083
+ * This creates a FullTextIndex for relevance-ranked search.
10084
+ *
10085
+ * Note: This is different from addInvertedIndex which provides
10086
+ * boolean matching (contains/containsAll/containsAny). This method
10087
+ * provides BM25 relevance scoring for true full-text search.
10088
+ *
10089
+ * @param config - Full-text index configuration
10090
+ * @returns The created FullTextIndex
10091
+ *
10092
+ * @example
10093
+ * ```typescript
10094
+ * const map = new IndexedORMap(hlc);
10095
+ * map.enableFullTextSearch({
10096
+ * fields: ['title', 'body'],
10097
+ * tokenizer: { minLength: 2 },
10098
+ * bm25: { k1: 1.2, b: 0.75 }
10099
+ * });
10100
+ *
10101
+ * map.add('doc1', { title: 'Hello World', body: 'Test content' });
10102
+ * const results = map.search('hello');
10103
+ * // [{ key: 'doc1', tag: '...', value: {...}, score: 0.5, matchedTerms: ['hello'] }]
10104
+ * ```
10105
+ */
10106
+ enableFullTextSearch(config) {
10107
+ this.fullTextIndex = new FullTextIndex(config);
10108
+ const snapshot = this.getSnapshot();
10109
+ const entries = [];
10110
+ for (const [key, tagMap] of snapshot.items) {
10111
+ for (const [tag, record] of tagMap) {
10112
+ if (!snapshot.tombstones.has(tag)) {
10113
+ const compositeKey = this.createCompositeKey(key, tag);
10114
+ entries.push([compositeKey, record.value]);
10115
+ }
10116
+ }
10117
+ }
10118
+ this.fullTextIndex.buildFromEntries(entries);
10119
+ return this.fullTextIndex;
10120
+ }
10121
+ /**
10122
+ * Check if full-text search is enabled.
10123
+ *
10124
+ * @returns true if full-text search is enabled
10125
+ */
10126
+ isFullTextSearchEnabled() {
10127
+ return this.fullTextIndex !== null;
10128
+ }
10129
+ /**
10130
+ * Get the full-text index (if enabled).
10131
+ *
10132
+ * @returns The FullTextIndex or null
10133
+ */
10134
+ getFullTextIndex() {
10135
+ return this.fullTextIndex;
10136
+ }
10137
+ /**
10138
+ * Perform a BM25-ranked full-text search.
10139
+ * Results are sorted by relevance score (highest first).
10140
+ *
10141
+ * @param query - Search query text
10142
+ * @param options - Search options (limit, minScore, boost)
10143
+ * @returns Array of search results with scores, sorted by relevance
10144
+ *
10145
+ * @throws Error if full-text search is not enabled
10146
+ */
10147
+ search(query, options) {
10148
+ if (!this.fullTextIndex) {
10149
+ throw new Error("Full-text search is not enabled. Call enableFullTextSearch() first.");
10150
+ }
10151
+ const scoredDocs = this.fullTextIndex.search(query, options);
10152
+ const results = [];
10153
+ for (const { docId: compositeKey, score, matchedTerms } of scoredDocs) {
10154
+ const [key, tag] = this.parseCompositeKey(compositeKey);
10155
+ const records = this.getRecords(key);
10156
+ const record = records.find((r) => r.tag === tag);
10157
+ if (record) {
10158
+ results.push({
10159
+ key,
10160
+ tag,
10161
+ value: record.value,
10162
+ score,
10163
+ matchedTerms: matchedTerms ?? []
10164
+ });
10165
+ }
10166
+ }
10167
+ return results;
10168
+ }
10169
+ /**
10170
+ * Disable full-text search and release the index.
10171
+ */
10172
+ disableFullTextSearch() {
10173
+ if (this.fullTextIndex) {
10174
+ this.fullTextIndex.clear();
10175
+ this.fullTextIndex = null;
10176
+ }
10177
+ }
8536
10178
  /**
8537
10179
  * Remove an index.
8538
10180
  *
@@ -8686,6 +10328,9 @@ var IndexedORMap = class extends ORMap {
8686
10328
  const record = super.add(key, value, ttlMs);
8687
10329
  const compositeKey = this.createCompositeKey(key, record.tag);
8688
10330
  this.indexRegistry.onRecordAdded(compositeKey, value);
10331
+ if (this.fullTextIndex) {
10332
+ this.fullTextIndex.onSet(compositeKey, value);
10333
+ }
8689
10334
  return record;
8690
10335
  }
8691
10336
  /**
@@ -8698,6 +10343,9 @@ var IndexedORMap = class extends ORMap {
8698
10343
  for (const record of matchingRecords) {
8699
10344
  const compositeKey = this.createCompositeKey(key, record.tag);
8700
10345
  this.indexRegistry.onRecordRemoved(compositeKey, record.value);
10346
+ if (this.fullTextIndex) {
10347
+ this.fullTextIndex.onRemove(compositeKey);
10348
+ }
8701
10349
  }
8702
10350
  return result;
8703
10351
  }
@@ -8709,6 +10357,9 @@ var IndexedORMap = class extends ORMap {
8709
10357
  if (applied) {
8710
10358
  const compositeKey = this.createCompositeKey(key, record.tag);
8711
10359
  this.indexRegistry.onRecordAdded(compositeKey, record.value);
10360
+ if (this.fullTextIndex) {
10361
+ this.fullTextIndex.onSet(compositeKey, record.value);
10362
+ }
8712
10363
  }
8713
10364
  return applied;
8714
10365
  }
@@ -8731,6 +10382,9 @@ var IndexedORMap = class extends ORMap {
8731
10382
  if (removedValue !== void 0 && removedKey !== void 0) {
8732
10383
  const compositeKey = this.createCompositeKey(removedKey, tag);
8733
10384
  this.indexRegistry.onRecordRemoved(compositeKey, removedValue);
10385
+ if (this.fullTextIndex) {
10386
+ this.fullTextIndex.onRemove(compositeKey);
10387
+ }
8734
10388
  }
8735
10389
  }
8736
10390
  /**
@@ -8739,6 +10393,9 @@ var IndexedORMap = class extends ORMap {
8739
10393
  clear() {
8740
10394
  super.clear();
8741
10395
  this.indexRegistry.clear();
10396
+ if (this.fullTextIndex) {
10397
+ this.fullTextIndex.clear();
10398
+ }
8742
10399
  }
8743
10400
  // ==================== Helper Methods ====================
8744
10401
  /**
@@ -9091,6 +10748,7 @@ var IndexedORMap = class extends ORMap {
9091
10748
  };
9092
10749
  export {
9093
10750
  AuthMessageSchema,
10751
+ BM25Scorer,
9094
10752
  BatchMessageSchema,
9095
10753
  BuiltInProcessors,
9096
10754
  BuiltInResolvers,
@@ -9114,6 +10772,7 @@ export {
9114
10772
  DEFAULT_RESOLVER_RATE_LIMITS,
9115
10773
  DEFAULT_STOP_WORDS,
9116
10774
  DEFAULT_WRITE_CONCERN_TIMEOUT,
10775
+ ENGLISH_STOPWORDS,
9117
10776
  EntryProcessBatchRequestSchema,
9118
10777
  EntryProcessBatchResponseSchema,
9119
10778
  EntryProcessKeyResultSchema,
@@ -9123,8 +10782,11 @@ export {
9123
10782
  EntryProcessorSchema,
9124
10783
  EventJournalImpl,
9125
10784
  FORBIDDEN_PATTERNS,
10785
+ BM25InvertedIndex as FTSInvertedIndex,
10786
+ BM25Tokenizer as FTSTokenizer,
9126
10787
  FallbackIndex,
9127
10788
  FilteringResultSet,
10789
+ FullTextIndex,
9128
10790
  HLC,
9129
10791
  HashIndex,
9130
10792
  IndexRegistry,
@@ -9188,9 +10850,22 @@ export {
9188
10850
  QuerySubMessageSchema,
9189
10851
  QueryUnsubMessageSchema,
9190
10852
  RESOLVER_FORBIDDEN_PATTERNS,
10853
+ ReciprocalRankFusion,
9191
10854
  RegisterResolverRequestSchema,
9192
10855
  RegisterResolverResponseSchema,
9193
10856
  Ringbuffer,
10857
+ SearchMessageSchema,
10858
+ SearchOptionsSchema,
10859
+ SearchPayloadSchema,
10860
+ SearchRespMessageSchema,
10861
+ SearchRespPayloadSchema,
10862
+ SearchSubMessageSchema,
10863
+ SearchSubPayloadSchema,
10864
+ SearchUnsubMessageSchema,
10865
+ SearchUnsubPayloadSchema,
10866
+ SearchUpdateMessageSchema,
10867
+ SearchUpdatePayloadSchema,
10868
+ SearchUpdateTypeSchema,
9194
10869
  SetResultSet,
9195
10870
  SimpleAttribute,
9196
10871
  SortedMap,
@@ -9236,6 +10911,7 @@ export {
9236
10911
  isUsingNativeHash,
9237
10912
  isWriteConcernAchieved,
9238
10913
  multiAttribute,
10914
+ porterStem,
9239
10915
  resetNativeHash,
9240
10916
  serialize,
9241
10917
  simpleAttribute,