@absolutejs/absolute 0.19.0-beta.641 → 0.19.0-beta.642

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -16711,6 +16711,13 @@ var prepareRAGDirectoryDocuments = async (input) => prepareRAGDocuments(await lo
16711
16711
 
16712
16712
  // src/ai/rag/collection.ts
16713
16713
  var DEFAULT_TOP_K2 = 6;
16714
+ var MULTIVECTOR_VARIANT_CHUNK_DELIMITER = "__mv__";
16715
+ var MULTIVECTOR_PARENT_CHUNK_ID = "absoluteMultivectorParentChunkId";
16716
+ var MULTIVECTOR_VARIANT_ID = "absoluteMultivectorVariantId";
16717
+ var MULTIVECTOR_VARIANT_LABEL = "absoluteMultivectorVariantLabel";
16718
+ var MULTIVECTOR_VARIANT_TEXT = "absoluteMultivectorVariantText";
16719
+ var MULTIVECTOR_VARIANT_METADATA = "absoluteMultivectorVariantMetadata";
16720
+ var MULTIVECTOR_PRIMARY = "absoluteMultivectorPrimary";
16714
16721
  var resolveNativeQueryProfile = (input) => {
16715
16722
  if (!input.profile) {
16716
16723
  return;
@@ -16826,6 +16833,108 @@ var mergeQueryResults = (results) => {
16826
16833
  return left.chunkId.localeCompare(right.chunkId);
16827
16834
  });
16828
16835
  };
16836
+ var getMultivectorParentChunkId = (result) => typeof result.metadata?.[MULTIVECTOR_PARENT_CHUNK_ID] === "string" ? result.metadata[MULTIVECTOR_PARENT_CHUNK_ID] : undefined;
16837
+ var stripMultivectorInternalMetadata = (metadata) => {
16838
+ if (!metadata) {
16839
+ return;
16840
+ }
16841
+ const cleaned = { ...metadata };
16842
+ delete cleaned[MULTIVECTOR_PARENT_CHUNK_ID];
16843
+ delete cleaned[MULTIVECTOR_VARIANT_ID];
16844
+ delete cleaned[MULTIVECTOR_VARIANT_LABEL];
16845
+ delete cleaned[MULTIVECTOR_VARIANT_TEXT];
16846
+ delete cleaned[MULTIVECTOR_VARIANT_METADATA];
16847
+ delete cleaned[MULTIVECTOR_PRIMARY];
16848
+ return cleaned;
16849
+ };
16850
+ var collapseMultivectorResults = (results) => {
16851
+ const grouped = new Map;
16852
+ for (const result of results) {
16853
+ const parentChunkId = getMultivectorParentChunkId(result) ?? result.chunkId;
16854
+ const existing = grouped.get(parentChunkId);
16855
+ const nextVariantHits = getMultivectorParentChunkId(result) ? 1 : 0;
16856
+ if (existing) {
16857
+ existing.results.push(result);
16858
+ existing.variantHits += nextVariantHits;
16859
+ continue;
16860
+ }
16861
+ grouped.set(parentChunkId, {
16862
+ parentChunkId,
16863
+ results: [result],
16864
+ variantHits: nextVariantHits
16865
+ });
16866
+ }
16867
+ let variantHits = 0;
16868
+ let collapsedParents = 0;
16869
+ const collapsed = [...grouped.values()].map((entry) => {
16870
+ variantHits += entry.variantHits;
16871
+ if (entry.variantHits > 0) {
16872
+ collapsedParents += 1;
16873
+ }
16874
+ const preferred = entry.results.reduce((best, current) => current.score > best.score ? current : best);
16875
+ const variantMatches = entry.results.map((result) => {
16876
+ const variantId = result.metadata?.[MULTIVECTOR_VARIANT_ID];
16877
+ if (typeof variantId !== "string") {
16878
+ return;
16879
+ }
16880
+ return {
16881
+ id: variantId,
16882
+ label: typeof result.metadata?.[MULTIVECTOR_VARIANT_LABEL] === "string" ? String(result.metadata?.[MULTIVECTOR_VARIANT_LABEL]) : undefined,
16883
+ score: result.score
16884
+ };
16885
+ }).filter((value) => value !== undefined).sort((left, right) => right.score - left.score);
16886
+ const cleanedMetadata = stripMultivectorInternalMetadata(preferred.metadata);
16887
+ return {
16888
+ ...preferred,
16889
+ chunkId: entry.parentChunkId,
16890
+ metadata: {
16891
+ ...cleanedMetadata ?? {},
16892
+ multivectorMatchedVariantCount: variantMatches.length,
16893
+ multivectorMatchedVariantId: variantMatches[0]?.id,
16894
+ multivectorMatchedVariantLabel: variantMatches[0]?.label,
16895
+ multivectorMatchedVariants: variantMatches.length > 0 ? variantMatches.map((match) => ({
16896
+ id: match.id,
16897
+ label: match.label,
16898
+ score: match.score
16899
+ })) : undefined
16900
+ }
16901
+ };
16902
+ }).sort((left, right) => {
16903
+ if (right.score !== left.score) {
16904
+ return right.score - left.score;
16905
+ }
16906
+ return left.chunkId.localeCompare(right.chunkId);
16907
+ });
16908
+ return {
16909
+ collapsed,
16910
+ collapsedParents,
16911
+ variantHits
16912
+ };
16913
+ };
16914
+ var expandChunkForMultivectorStorage = (chunk) => {
16915
+ const expanded = [{ ...chunk }];
16916
+ for (const variant of chunk.embeddingVariants ?? []) {
16917
+ if (!variant.id) {
16918
+ continue;
16919
+ }
16920
+ expanded.push({
16921
+ ...chunk,
16922
+ chunkId: `${chunk.chunkId}${MULTIVECTOR_VARIANT_CHUNK_DELIMITER}${variant.id}`,
16923
+ embedding: variant.embedding,
16924
+ metadata: {
16925
+ ...chunk.metadata ?? {},
16926
+ [MULTIVECTOR_PARENT_CHUNK_ID]: chunk.chunkId,
16927
+ [MULTIVECTOR_PRIMARY]: false,
16928
+ [MULTIVECTOR_VARIANT_ID]: variant.id,
16929
+ ...variant.label ? { [MULTIVECTOR_VARIANT_LABEL]: variant.label } : {},
16930
+ ...variant.text ? { [MULTIVECTOR_VARIANT_TEXT]: variant.text } : {},
16931
+ ...variant.metadata ? { [MULTIVECTOR_VARIANT_METADATA]: variant.metadata } : {}
16932
+ },
16933
+ text: variant.text ?? chunk.text
16934
+ });
16935
+ }
16936
+ return expanded;
16937
+ };
16829
16938
  var getRAGSourceDiversityKey = (result) => {
16830
16939
  const documentId = typeof result.metadata?.documentId === "string" ? result.metadata.documentId : undefined;
16831
16940
  return result.source ?? documentId ?? result.title ?? result.chunkId;
@@ -17272,23 +17381,34 @@ var createRAGCollection = (options) => {
17272
17381
  topK: lexicalTopK
17273
17382
  }) ?? Promise.resolve([]) : Promise.resolve([])
17274
17383
  ]);
17384
+ const annotatedLexicalResults = annotateRetrievalQueryOrigin({
17385
+ inputQuery: input.query,
17386
+ query,
17387
+ queryIndex,
17388
+ results: weightQueryResults(lexicalResults2, queryIndex),
17389
+ transformedQuery: transformed.query
17390
+ });
17391
+ const annotatedVectorResults = annotateRetrievalQueryOrigin({
17392
+ inputQuery: input.query,
17393
+ query,
17394
+ queryIndex,
17395
+ results: weightQueryResults(vectorResults2, queryIndex),
17396
+ transformedQuery: transformed.query
17397
+ });
17398
+ const collapsedLexicalResults = collapseMultivectorResults(annotatedLexicalResults);
17399
+ const collapsedVectorResults = collapseMultivectorResults(annotatedVectorResults);
17275
17400
  return {
17276
- lexicalResults: annotateRetrievalQueryOrigin({
17277
- inputQuery: input.query,
17278
- query,
17279
- queryIndex,
17280
- results: weightQueryResults(lexicalResults2, queryIndex),
17281
- transformedQuery: transformed.query
17282
- }),
17283
- vectorResults: annotateRetrievalQueryOrigin({
17284
- inputQuery: input.query,
17285
- query,
17286
- queryIndex,
17287
- results: weightQueryResults(vectorResults2, queryIndex),
17288
- transformedQuery: transformed.query
17289
- })
17401
+ lexicalResults: collapsedLexicalResults.collapsed,
17402
+ lexicalVariantHits: collapsedLexicalResults.variantHits,
17403
+ lexicalCollapsedParents: collapsedLexicalResults.collapsedParents,
17404
+ vectorResults: collapsedVectorResults.collapsed,
17405
+ vectorVariantHits: collapsedVectorResults.variantHits,
17406
+ vectorCollapsedParents: collapsedVectorResults.collapsedParents
17290
17407
  };
17291
17408
  }));
17409
+ const vectorVariantHits = resultGroups.reduce((total, group) => total + group.vectorVariantHits, 0);
17410
+ const lexicalVariantHits = resultGroups.reduce((total, group) => total + group.lexicalVariantHits, 0);
17411
+ const collapsedParents = resultGroups.reduce((total, group) => total + Math.max(group.vectorCollapsedParents, group.lexicalCollapsedParents), 0);
17292
17412
  const vectorResults = mergeQueryResults(resultGroups.flatMap((group) => group.vectorResults));
17293
17413
  if (runVector) {
17294
17414
  const vectorPlanMetadata = buildStoreQueryPlanTraceMetadata(options.store);
@@ -17296,6 +17416,8 @@ var createRAGCollection = (options) => {
17296
17416
  count: vectorResults.length,
17297
17417
  label: "Collected vector candidates",
17298
17418
  metadata: {
17419
+ collapsedParents,
17420
+ multiVectorVariantHits: vectorVariantHits,
17299
17421
  queryCount: searchQueries.length,
17300
17422
  topK: candidateTopK,
17301
17423
  ...vectorPlanMetadata
@@ -17311,6 +17433,8 @@ var createRAGCollection = (options) => {
17311
17433
  count: lexicalResults.length,
17312
17434
  label: "Collected lexical candidates",
17313
17435
  metadata: {
17436
+ collapsedParents,
17437
+ multiVectorVariantHits: lexicalVariantHits,
17314
17438
  queryCount: searchQueries.length,
17315
17439
  topK: lexicalTopK
17316
17440
  },
@@ -17419,6 +17543,12 @@ var createRAGCollection = (options) => {
17419
17543
  queryTransformLabel: transformed.label,
17420
17544
  queryTransformProvider: resolvedQueryTransform?.providerName,
17421
17545
  queryTransformReason: transformed.reason,
17546
+ multiVector: {
17547
+ collapsedParents,
17548
+ configured: vectorVariantHits > 0 || lexicalVariantHits > 0,
17549
+ lexicalVariantHits,
17550
+ vectorVariantHits
17551
+ },
17422
17552
  resultCounts: {
17423
17553
  final: limited.length,
17424
17554
  fused: results.length,
@@ -17475,6 +17605,12 @@ var createRAGCollection = (options) => {
17475
17605
  queryTransformLabel: transformed.label,
17476
17606
  queryTransformProvider: resolvedQueryTransform?.providerName,
17477
17607
  queryTransformReason: transformed.reason,
17608
+ multiVector: {
17609
+ collapsedParents,
17610
+ configured: vectorVariantHits > 0 || lexicalVariantHits > 0,
17611
+ lexicalVariantHits,
17612
+ vectorVariantHits
17613
+ },
17478
17614
  resultCounts: {
17479
17615
  final: filtered.length,
17480
17616
  fused: results.length,
@@ -17502,19 +17638,31 @@ var createRAGCollection = (options) => {
17502
17638
  return result.results;
17503
17639
  };
17504
17640
  const ingest = async (input) => {
17505
- const chunks = await Promise.all(input.chunks.map(async (chunk) => {
17506
- if (chunk.embedding) {
17507
- validateRAGEmbeddingDimensions(chunk.embedding, getExpectedDimensions(), "chunk");
17508
- return chunk;
17509
- }
17510
- return {
17511
- ...chunk,
17512
- embedding: await embed({
17641
+ const chunks = (await Promise.all(input.chunks.map(async (chunk) => {
17642
+ const normalizedEmbedding = chunk.embedding ? (validateRAGEmbeddingDimensions(chunk.embedding, getExpectedDimensions(), "chunk"), chunk.embedding) : await embed({
17643
+ model: options.defaultModel,
17644
+ text: chunk.text
17645
+ }, "chunk");
17646
+ const normalizedVariants = chunk.embeddingVariants ? await Promise.all(chunk.embeddingVariants.map(async (variant) => {
17647
+ const embedding = variant.embedding ? (validateRAGEmbeddingDimensions(variant.embedding, getExpectedDimensions(), "chunk"), variant.embedding) : await embed({
17513
17648
  model: options.defaultModel,
17514
- text: chunk.text
17515
- }, "chunk")
17516
- };
17517
- }));
17649
+ text: variant.text ?? chunk.text
17650
+ }, "chunk");
17651
+ return {
17652
+ ...variant,
17653
+ embedding
17654
+ };
17655
+ })) : undefined;
17656
+ return expandChunkForMultivectorStorage({
17657
+ ...chunk,
17658
+ embedding: normalizedEmbedding,
17659
+ embeddingVariants: normalizedVariants,
17660
+ metadata: {
17661
+ ...chunk.metadata ?? {},
17662
+ [MULTIVECTOR_PRIMARY]: true
17663
+ }
17664
+ });
17665
+ }))).flat();
17518
17666
  await options.store.upsert({ chunks });
17519
17667
  };
17520
17668
  return {
@@ -33909,5 +34057,5 @@ export {
33909
34057
  addRAGEvaluationSuiteCase
33910
34058
  };
33911
34059
 
33912
- //# debugId=27461AB67370F55164756E2164756E21
34060
+ //# debugId=7958E184D4E3DB1764756E2164756E21
33913
34061
  //# sourceMappingURL=index.js.map