npm - @topgunbuild/core - Versions diffs - 0.8.0 → 0.9.0 - Mend

@topgunbuild/core 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.mjs CHANGED Viewed

@@ -1973,6 +1973,95 @@ var Predicates = class {
   static containsAny(attribute, values) {
     return { op: "containsAny", attribute, value: values };
   }
+  // ============== Full-Text Search Predicates (Phase 12) ==============
+  /**
+   * Create a 'match' predicate for full-text search.
+   * Uses BM25 scoring to find relevant documents.
+   *
+   * @param attribute - Field to search in
+   * @param query - Search query string
+   * @param options - Match options (minScore, boost, operator, fuzziness)
+   *
+   * @example
+   * ```typescript
+   * // Simple match
+   * Predicates.match('title', 'machine learning')
+   *
+   * // With options
+   * Predicates.match('body', 'neural networks', { minScore: 1.0, boost: 2.0 })
+   * ```
+   */
+  static match(attribute, query, options) {
+    return { op: "match", attribute, query, matchOptions: options };
+  }
+  /**
+   * Create a 'matchPhrase' predicate for exact phrase matching.
+   * Matches documents containing the exact phrase (words in order).
+   *
+   * @param attribute - Field to search in
+   * @param query - Phrase to match
+   * @param slop - Word distance tolerance (0 = exact, 1 = allow 1 word between)
+   *
+   * @example
+   * ```typescript
+   * // Exact phrase
+   * Predicates.matchPhrase('body', 'machine learning')
+   *
+   * // With slop (allows "machine deep learning")
+   * Predicates.matchPhrase('body', 'machine learning', 1)
+   * ```
+   */
+  static matchPhrase(attribute, query, slop) {
+    return { op: "matchPhrase", attribute, query, slop };
+  }
+  /**
+   * Create a 'matchPrefix' predicate for prefix matching.
+   * Matches documents where field starts with the given prefix.
+   *
+   * @param attribute - Field to search in
+   * @param prefix - Prefix to match
+   * @param maxExpansions - Maximum number of term expansions
+   *
+   * @example
+   * ```typescript
+   * // Match titles starting with "mach"
+   * Predicates.matchPrefix('title', 'mach')
+   *
+   * // Limit expansions for performance
+   * Predicates.matchPrefix('title', 'mach', 50)
+   * ```
+   */
+  static matchPrefix(attribute, prefix, maxExpansions) {
+    return { op: "matchPrefix", attribute, prefix, maxExpansions };
+  }
+  /**
+   * Create a multi-field match predicate.
+   * Searches across multiple fields with optional per-field boosting.
+   *
+   * @param attributes - Fields to search in
+   * @param query - Search query string
+   * @param options - Options including per-field boost factors
+   *
+   * @example
+   * ```typescript
+   * // Search title and body
+   * Predicates.multiMatch(['title', 'body'], 'machine learning')
+   *
+   * // With boosting (title 2x more important)
+   * Predicates.multiMatch(['title', 'body'], 'machine learning', {
+   *   boost: { title: 2.0, body: 1.0 }
+   * })
+   * ```
+   */
+  static multiMatch(attributes, query, options) {
+    const children = attributes.map((attr) => ({
+      op: "match",
+      attribute: attr,
+      query,
+      matchOptions: options?.boost?.[attr] ? { boost: options.boost[attr] } : void 0
+    }));
+    return { op: "or", children };
+  }
 };
 function evaluatePredicate(predicate, data) {
   if (!data) return false;
@@ -3595,6 +3684,9 @@ function isSimpleQuery(query) {
 function isLogicalQuery(query) {
   return query.type === "and" || query.type === "or" || query.type === "not";
 }
+function isFTSQuery(query) {
+  return query.type === "match" || query.type === "matchPhrase" || query.type === "matchPrefix";
+}
 // src/query/indexes/StandingQueryIndex.ts
 var _StandingQueryIndex = class _StandingQueryIndex {
@@ -5853,11 +5945,48 @@ var QueryOptimizer = class {
     if ("indexRegistry" in indexRegistryOrOptions) {
       this.indexRegistry = indexRegistryOrOptions.indexRegistry;
       this.standingQueryRegistry = indexRegistryOrOptions.standingQueryRegistry;
+      this.fullTextIndexes = indexRegistryOrOptions.fullTextIndexes ?? /* @__PURE__ */ new Map();
     } else {
       this.indexRegistry = indexRegistryOrOptions;
       this.standingQueryRegistry = standingQueryRegistry;
+      this.fullTextIndexes = /* @__PURE__ */ new Map();
     }
   }
+  /**
+   * Register a full-text index for a field (Phase 12).
+   *
+   * @param field - Field name
+   * @param index - FullTextIndex instance
+   */
+  registerFullTextIndex(field, index) {
+    this.fullTextIndexes.set(field, index);
+  }
+  /**
+   * Unregister a full-text index (Phase 12).
+   *
+   * @param field - Field name
+   */
+  unregisterFullTextIndex(field) {
+    this.fullTextIndexes.delete(field);
+  }
+  /**
+   * Get registered full-text index for a field (Phase 12).
+   *
+   * @param field - Field name
+   * @returns FullTextIndex or undefined
+   */
+  getFullTextIndex(field) {
+    return this.fullTextIndexes.get(field);
+  }
+  /**
+   * Check if a full-text index exists for a field (Phase 12).
+   *
+   * @param field - Field name
+   * @returns True if FTS index exists
+   */
+  hasFullTextIndex(field) {
+    return this.fullTextIndexes.has(field);
+  }
   /**
    * Optimize a query and return an execution plan.
    *
@@ -5931,12 +6060,151 @@ var QueryOptimizer = class {
   optimizeNode(query) {
     if (isLogicalQuery(query)) {
       return this.optimizeLogical(query);
+    } else if (isFTSQuery(query)) {
+      return this.optimizeFTS(query);
     } else if (isSimpleQuery(query)) {
       return this.optimizeSimple(query);
     } else {
       return { type: "full-scan", predicate: query };
     }
   }
+  /**
+   * Optimize a full-text search query (Phase 12).
+   */
+  optimizeFTS(query) {
+    const field = query.attribute;
+    if (!this.hasFullTextIndex(field)) {
+      return { type: "full-scan", predicate: query };
+    }
+    return this.buildFTSScanStep(query);
+  }
+  /**
+   * Build an FTS scan step from a query node (Phase 12).
+   */
+  buildFTSScanStep(query) {
+    const field = query.attribute;
+    switch (query.type) {
+      case "match":
+        return {
+          type: "fts-scan",
+          field,
+          query: query.query,
+          ftsType: "match",
+          options: query.options,
+          returnsScored: true,
+          estimatedCost: this.estimateFTSCost(field)
+        };
+      case "matchPhrase":
+        return {
+          type: "fts-scan",
+          field,
+          query: query.query,
+          ftsType: "matchPhrase",
+          options: query.slop !== void 0 ? { fuzziness: query.slop } : void 0,
+          returnsScored: true,
+          estimatedCost: this.estimateFTSCost(field)
+        };
+      case "matchPrefix":
+        return {
+          type: "fts-scan",
+          field,
+          query: query.prefix,
+          ftsType: "matchPrefix",
+          options: query.maxExpansions !== void 0 ? { fuzziness: query.maxExpansions } : void 0,
+          returnsScored: true,
+          estimatedCost: this.estimateFTSCost(field)
+        };
+      default:
+        throw new Error(`Unknown FTS query type: ${query.type}`);
+    }
+  }
+  /**
+   * Estimate cost of FTS query based on index size (Phase 12).
+   */
+  estimateFTSCost(field) {
+    const index = this.fullTextIndexes.get(field);
+    if (!index) {
+      return Number.MAX_SAFE_INTEGER;
+    }
+    const docCount = index.getSize();
+    return 50 + Math.log2(docCount + 1) * 10;
+  }
+  /**
+   * Classify predicates by type for hybrid query planning (Phase 12).
+   *
+   * @param predicates - Array of predicates to classify
+   * @returns Classified predicates
+   */
+  classifyPredicates(predicates) {
+    const result = {
+      exactPredicates: [],
+      rangePredicates: [],
+      ftsPredicates: [],
+      otherPredicates: []
+    };
+    for (const pred of predicates) {
+      if (isFTSQuery(pred)) {
+        result.ftsPredicates.push(pred);
+      } else if (isSimpleQuery(pred)) {
+        switch (pred.type) {
+          case "eq":
+          case "neq":
+          case "in":
+            result.exactPredicates.push(pred);
+            break;
+          case "gt":
+          case "gte":
+          case "lt":
+          case "lte":
+          case "between":
+            result.rangePredicates.push(pred);
+            break;
+          default:
+            result.otherPredicates.push(pred);
+        }
+      } else if (isLogicalQuery(pred)) {
+        result.otherPredicates.push(pred);
+      } else {
+        result.otherPredicates.push(pred);
+      }
+    }
+    return result;
+  }
+  /**
+   * Determine fusion strategy based on step types (Phase 12).
+   *
+   * Strategy selection:
+   * - All binary (exact/range with no scores) → 'intersection'
+   * - All scored (FTS) → 'score-filter' (filter by score, sort by score)
+   * - Mixed (binary + scored) → 'rrf' (Reciprocal Rank Fusion)
+   *
+   * @param steps - Plan steps to fuse
+   * @returns Fusion strategy
+   */
+  determineFusionStrategy(steps) {
+    const hasScored = steps.some((s) => this.stepReturnsScored(s));
+    const hasBinary = steps.some((s) => !this.stepReturnsScored(s));
+    if (hasScored && hasBinary) {
+      return "rrf";
+    } else if (hasScored) {
+      return "score-filter";
+    } else {
+      return "intersection";
+    }
+  }
+  /**
+   * Check if a plan step returns scored results (Phase 12).
+   */
+  stepReturnsScored(step) {
+    switch (step.type) {
+      case "fts-scan":
+        return true;
+      case "fusion":
+        return step.returnsScored;
+      default:
+        return false;
+    }
+  }
   /**
    * Optimize a simple (attribute-based) query.
    */
@@ -6192,6 +6460,18 @@ var QueryOptimizer = class {
         return this.estimateCost(step.source) + 10;
       case "not":
         return this.estimateCost(step.source) + 100;
+      // Phase 12: FTS step types
+      case "fts-scan":
+        return step.estimatedCost;
+      case "fusion":
+        return step.steps.reduce((sum, s) => {
+          const cost = this.estimateCost(s);
+          if (cost === Number.MAX_SAFE_INTEGER) {
+            return Number.MAX_SAFE_INTEGER;
+          }
+          return Math.min(sum + cost, Number.MAX_SAFE_INTEGER);
+        }, 0) + 20;
+      // Fusion overhead
       default:
         return Number.MAX_SAFE_INTEGER;
     }
@@ -6212,6 +6492,12 @@ var QueryOptimizer = class {
         return this.usesIndexes(step.source);
       case "not":
         return this.usesIndexes(step.source);
+      // Phase 12: FTS step types
+      case "fts-scan":
+        return true;
+      // FTS uses FullTextIndex
+      case "fusion":
+        return step.steps.some((s) => this.usesIndexes(s));
       default:
         return false;
     }
@@ -7760,6 +8046,131 @@ var DefaultIndexingStrategy = class {
   }
 };
+// src/search/ReciprocalRankFusion.ts
+var ReciprocalRankFusion = class {
+  constructor(config) {
+    this.k = config?.k ?? 60;
+  }
+  /**
+   * Merge multiple ranked result lists using RRF.
+   *
+   * Formula: RRF_score(d) = Σ 1 / (k + rank_i(d))
+   *
+   * @param resultSets - Array of ranked result lists from different search methods
+   * @returns Merged results sorted by RRF score (descending)
+   */
+  merge(resultSets) {
+    const nonEmptySets = resultSets.filter((set) => set.length > 0);
+    if (nonEmptySets.length === 0) {
+      return [];
+    }
+    const scoreMap = /* @__PURE__ */ new Map();
+    for (const resultSet of nonEmptySets) {
+      for (let rank = 0; rank < resultSet.length; rank++) {
+        const result = resultSet[rank];
+        const { docId, score, source } = result;
+        const rrfContribution = 1 / (this.k + rank + 1);
+        const existing = scoreMap.get(docId);
+        if (existing) {
+          existing.rrfScore += rrfContribution;
+          existing.sources.add(source);
+          existing.originalScores[source] = score;
+        } else {
+          scoreMap.set(docId, {
+            rrfScore: rrfContribution,
+            sources: /* @__PURE__ */ new Set([source]),
+            originalScores: { [source]: score }
+          });
+        }
+      }
+    }
+    const merged = [];
+    for (const [docId, data] of scoreMap) {
+      merged.push({
+        docId,
+        score: data.rrfScore,
+        source: Array.from(data.sources).sort().join("+"),
+        originalScores: data.originalScores
+      });
+    }
+    merged.sort((a, b) => b.score - a.score);
+    return merged;
+  }
+  /**
+   * Merge with weighted RRF for different method priorities.
+   *
+   * Weighted formula: RRF_score(d) = Σ weight_i * (1 / (k + rank_i(d)))
+   *
+   * @param resultSets - Array of ranked result lists
+   * @param weights - Weights for each result set (same order as resultSets)
+   * @returns Merged results sorted by weighted RRF score (descending)
+   *
+   * @example
+   * ```typescript
+   * const rrf = new ReciprocalRankFusion();
+   *
+   * // Prioritize exact matches (weight 2.0) over FTS (weight 1.0)
+   * const merged = rrf.mergeWeighted(
+   *   [exactResults, ftsResults],
+   *   [2.0, 1.0]
+   * );
+   * ```
+   */
+  mergeWeighted(resultSets, weights) {
+    if (weights.length !== resultSets.length) {
+      throw new Error(
+        `Weights array length (${weights.length}) must match resultSets length (${resultSets.length})`
+      );
+    }
+    const nonEmptyPairs = [];
+    for (let i = 0; i < resultSets.length; i++) {
+      if (resultSets[i].length > 0) {
+        nonEmptyPairs.push({ resultSet: resultSets[i], weight: weights[i] });
+      }
+    }
+    if (nonEmptyPairs.length === 0) {
+      return [];
+    }
+    const scoreMap = /* @__PURE__ */ new Map();
+    for (const { resultSet, weight } of nonEmptyPairs) {
+      for (let rank = 0; rank < resultSet.length; rank++) {
+        const result = resultSet[rank];
+        const { docId, score, source } = result;
+        const rrfContribution = weight * (1 / (this.k + rank + 1));
+        const existing = scoreMap.get(docId);
+        if (existing) {
+          existing.rrfScore += rrfContribution;
+          existing.sources.add(source);
+          existing.originalScores[source] = score;
+        } else {
+          scoreMap.set(docId, {
+            rrfScore: rrfContribution,
+            sources: /* @__PURE__ */ new Set([source]),
+            originalScores: { [source]: score }
+          });
+        }
+      }
+    }
+    const merged = [];
+    for (const [docId, data] of scoreMap) {
+      merged.push({
+        docId,
+        score: data.rrfScore,
+        source: Array.from(data.sources).sort().join("+"),
+        originalScores: data.originalScores
+      });
+    }
+    merged.sort((a, b) => b.score - a.score);
+    return merged;
+  }
+  /**
+   * Get the k constant used for RRF calculation.
+   */
+  getK() {
+    return this.k;
+  }
+};
 // src/IndexedLWWMap.ts
 var IndexedLWWMap = class extends LWWMap {
   constructor(hlc, options = {}) {
@@ -10439,6 +10850,7 @@ export {
   QuerySubMessageSchema,
   QueryUnsubMessageSchema,
   RESOLVER_FORBIDDEN_PATTERNS,
+  ReciprocalRankFusion,
   RegisterResolverRequestSchema,
   RegisterResolverResponseSchema,
   Ringbuffer,