@absolutejs/absolute 0.19.0-beta.506 → 0.19.0-beta.508

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -2221,10 +2221,31 @@ var extractWeightedLexicalFields = (result) => {
2221
2221
  const source = result.source ?? "";
2222
2222
  const archivePath = typeof metadata.archivePath === "string" ? metadata.archivePath : source.includes("#") ? source.split("#")[1] ?? "" : "";
2223
2223
  const mediaSegments = Array.isArray(metadata.mediaSegments) ? metadata.mediaSegments.map((segment) => segment && typeof segment === "object" ? toFieldText(segment) : "").filter(Boolean).join(" ") : "";
2224
+ const mediaTimestampFocus = metadata.sourceNativeKind === "media_segment" ? [
2225
+ typeof metadata.mediaKind === "string" ? metadata.mediaKind : "",
2226
+ "audio",
2227
+ "video",
2228
+ "media",
2229
+ "timestamp",
2230
+ "segment",
2231
+ typeof metadata.mediaSegmentStartMs === "number" ? `timestamp ${metadata.mediaSegmentStartMs}` : "",
2232
+ typeof metadata.mediaSegmentEndMs === "number" ? `timestamp ${metadata.mediaSegmentEndMs}` : ""
2233
+ ].filter(Boolean).join(" ") : "";
2234
+ const spreadsheetFocus = metadata.sourceNativeKind === "spreadsheet_sheet" ? [
2235
+ "spreadsheet",
2236
+ "workbook",
2237
+ "worksheet",
2238
+ "sheet",
2239
+ typeof metadata.sheetName === "string" ? `sheet named ${metadata.sheetName}` : "",
2240
+ typeof metadata.sheetIndex === "number" ? `worksheet ${metadata.sheetIndex + 1}` : ""
2241
+ ].filter(Boolean).join(" ") : "";
2224
2242
  const metadataFocus = [
2225
2243
  metadata.sourceNativeKind,
2244
+ mediaTimestampFocus,
2245
+ spreadsheetFocus,
2226
2246
  metadata.sheetName,
2227
2247
  metadata.sheetNames,
2248
+ metadata.sheetIndex,
2228
2249
  metadata.slideNumber,
2229
2250
  metadata.slideTitle,
2230
2251
  metadata.slideTitles,
@@ -2248,10 +2269,10 @@ var FIELD_WEIGHTS = {
2248
2269
  archivePath: 4.2,
2249
2270
  chunkText: 1,
2250
2271
  mediaSegments: 3.8,
2251
- metadataFocus: 3.2,
2272
+ metadataFocus: 4.1,
2252
2273
  metadataText: 1.4,
2253
2274
  source: 3.4,
2254
- title: 2
2275
+ title: 2.8
2255
2276
  };
2256
2277
  var getWeightedFieldTokens = (result) => {
2257
2278
  const fields = extractWeightedLexicalFields({
@@ -2435,7 +2456,9 @@ var resolveFileKindBoost = (queryTokens, metadata) => {
2435
2456
  "framework",
2436
2457
  "transcript",
2437
2458
  "audio",
2438
- "video"
2459
+ "video",
2460
+ "timestamp",
2461
+ "segment"
2439
2462
  ])) {
2440
2463
  return 0.75;
2441
2464
  }
@@ -2451,7 +2474,8 @@ var resolveTranscriptBoost = (queryTokens, metadata) => {
2451
2474
  return 0;
2452
2475
  }
2453
2476
  const overlap = queryTokens.filter((token) => segmentText.includes(token)).length;
2454
- return overlap / Math.max(1, queryTokens.length);
2477
+ const timestampBoost = queryTokens.includes("timestamp") ? 0.35 : 0;
2478
+ return Math.min(1, overlap / Math.max(1, queryTokens.length) + timestampBoost);
2455
2479
  };
2456
2480
  var resolveArchiveBoost = (queryTokens, result) => {
2457
2481
  const archivePath = typeof result.metadata?.archivePath === "string" ? result.metadata.archivePath.toLowerCase() : typeof result.source === "string" && result.source.includes("#") ? result.source.split("#")[1]?.toLowerCase() ?? "" : "";
@@ -2626,6 +2650,7 @@ var DOMAIN_EXPANSIONS = {
2626
2650
  video: ["video", "media", "recording", "transcript", "timestamp"]
2627
2651
  };
2628
2652
  var TERM_EXPANSIONS = {
2653
+ audio: ["timestamp", "transcript", "mp3", "speaker"],
2629
2654
  frontend: [
2630
2655
  "frontend",
2631
2656
  "framework",
@@ -2649,7 +2674,9 @@ var TERM_EXPANSIONS = {
2649
2674
  procedure: ["recovery", "runbook"],
2650
2675
  procedur: ["recovery", "runbook"],
2651
2676
  receipt: ["invoice", "ocr", "pdf"],
2677
+ named: ["sheet", "worksheet", "title"],
2652
2678
  sheet: ["worksheet", "workbook", "xlsx"],
2679
+ timestamp: ["audio", "media", "transcript", "segment"],
2653
2680
  transcript: ["audio", "video", "media"],
2654
2681
  workbook: ["sheet", "spreadsheet", "xlsx"]
2655
2682
  };
@@ -2699,6 +2726,22 @@ var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform
2699
2726
  }
2700
2727
  const expandedTerms = domains.flatMap((domain) => DOMAIN_EXPANSIONS[domain] ?? []);
2701
2728
  const tokenExpansions = tokens.flatMap((token) => TERM_EXPANSIONS[token] ?? []);
2729
+ const spreadsheetNamedVariant = domains.includes("spreadsheet") ? uniqueQueryStrings([
2730
+ ...tokens,
2731
+ "spreadsheet",
2732
+ "workbook",
2733
+ "worksheet",
2734
+ "sheet",
2735
+ "named"
2736
+ ]).join(" ") : "";
2737
+ const mediaTimestampVariant = domains.includes("audio") || domains.includes("video") ? uniqueQueryStrings([
2738
+ ...tokens,
2739
+ "audio",
2740
+ "media",
2741
+ "timestamp",
2742
+ "transcript",
2743
+ "segment"
2744
+ ]).join(" ") : "";
2702
2745
  const rewrittenQuery = uniqueQueryStrings([
2703
2746
  ...tokens,
2704
2747
  ...expandedTerms,
@@ -2709,9 +2752,15 @@ var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform
2709
2752
  ...DOMAIN_EXPANSIONS[domain] ?? [],
2710
2753
  ...tokenExpansions
2711
2754
  ]).join(" "));
2755
+ if (spreadsheetNamedVariant.length > 0) {
2756
+ variants.push(spreadsheetNamedVariant);
2757
+ }
2758
+ if (mediaTimestampVariant.length > 0) {
2759
+ variants.push(mediaTimestampVariant);
2760
+ }
2712
2761
  return {
2713
2762
  query: rewrittenQuery,
2714
- variants
2763
+ variants: uniqueQueryStrings(variants)
2715
2764
  };
2716
2765
  }
2717
2766
  });
@@ -3020,6 +3069,16 @@ var normalizeWhitespace = (value) => value.replace(/\r\n?/g, `
3020
3069
  `).replace(/\n{3,}/g, `
3021
3070
 
3022
3071
  `).trim();
3072
+ var formatMediaTimestampForIngest = (value) => {
3073
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
3074
+ return;
3075
+ }
3076
+ const totalSeconds = Math.floor(value / 1000);
3077
+ const minutes = Math.floor(totalSeconds / 60);
3078
+ const seconds = totalSeconds % 60;
3079
+ const milliseconds = Math.floor(value % 1000);
3080
+ return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(milliseconds).padStart(3, "0")}`;
3081
+ };
3023
3082
  var decodeHtmlEntities = (value) => {
3024
3083
  let output = value;
3025
3084
  for (const [pattern, replacement] of HTML_ENTITY_REPLACEMENTS) {
@@ -3552,6 +3611,7 @@ var createOfficeDocumentExtractor = () => ({
3552
3611
  } else if (extension === ".xlsx" || extension === ".ods") {
3553
3612
  text = spreadsheetText(entries);
3554
3613
  const sheets = spreadsheetSheetTexts(entries);
3614
+ const workbookLabel = input.title ?? input.name ?? input.path ?? input.source ?? DEFAULT_BINARY_NAME;
3555
3615
  officeMetadata = {
3556
3616
  sheetNames: spreadsheetSheetNames(entries)
3557
3617
  };
@@ -3568,9 +3628,9 @@ var createOfficeDocumentExtractor = () => ({
3568
3628
  sheetName: sheet.name
3569
3629
  },
3570
3630
  source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
3571
- text: normalizeWhitespace(`Spreadsheet sheet ${sheet.name} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
3631
+ text: normalizeWhitespace(`Spreadsheet workbook ${workbookLabel}. ` + `Worksheet ${index + 1}. ` + `Workbook sheet named ${sheet.name}. ` + `Sheet ${sheet.name} from spreadsheet workbook ${workbookLabel}.` + `
3572
3632
  ${sheet.text}`),
3573
- title: input.title ? `${input.title} \xB7 ${sheet.name}` : sheet.name
3633
+ title: input.title ? `${input.title} \xB7 Sheet ${sheet.name}` : `Sheet ${sheet.name}`
3574
3634
  }));
3575
3635
  } else if (extension === ".pptx" || extension === ".odp") {
3576
3636
  text = presentationText(entries);
@@ -3651,6 +3711,9 @@ var createRAGMediaFileExtractor = (transcriber) => ({
3651
3711
  }
3652
3712
  const startMs = typeof segment.startMs === "number" ? segment.startMs : undefined;
3653
3713
  const endMs = typeof segment.endMs === "number" ? segment.endMs : undefined;
3714
+ const startLabel = formatMediaTimestampForIngest(startMs);
3715
+ const endLabel = formatMediaTimestampForIngest(endMs);
3716
+ const mediaKind = typeof result.metadata?.mediaKind === "string" ? result.metadata.mediaKind : "media";
3654
3717
  segmentDocuments.push({
3655
3718
  chunking: input.chunking,
3656
3719
  contentType: input.contentType,
@@ -3667,9 +3730,9 @@ var createRAGMediaFileExtractor = (transcriber) => ({
3667
3730
  speaker: typeof segment.speaker === "string" ? segment.speaker : undefined
3668
3731
  },
3669
3732
  source,
3670
- text: normalizeWhitespace(`Media transcript segment${typeof startMs === "number" ? ` ${startMs}-${endMs ?? startMs}ms` : ""} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}.
3733
+ text: normalizeWhitespace(`${mediaKind} transcript segment${startLabel ? ` at timestamp ${startLabel}${endLabel ? ` to ${endLabel}` : ""}` : ""} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}. ` + `${mediaKind} timestamp evidence${startLabel ? ` ${startLabel}${endLabel ? ` to ${endLabel}` : ""}` : ""}.` + `
3671
3734
  ${text}`),
3672
- title: input.title ? `${input.title} \xB7 Segment ${index + 1}` : `Segment ${index + 1}`
3735
+ title: input.title ? `${input.title} \xB7 ${mediaKind[0]?.toUpperCase() + mediaKind.slice(1)} segment ${index + 1}` : `${mediaKind[0]?.toUpperCase() + mediaKind.slice(1)} segment ${index + 1}`
3673
3736
  });
3674
3737
  }
3675
3738
  const summaryDocument = {
@@ -10202,5 +10265,5 @@ export {
10202
10265
  aiChat
10203
10266
  };
10204
10267
 
10205
- //# debugId=135B1E72591570CC64756E2164756E21
10268
+ //# debugId=B845F096CD7ACDA464756E2164756E21
10206
10269
  //# sourceMappingURL=index.js.map