@absolutejs/absolute 0.19.0-beta.506 → 0.19.0-beta.508
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.js +73 -10
- package/dist/ai/index.js.map +5 -5
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -2221,10 +2221,31 @@ var extractWeightedLexicalFields = (result) => {
|
|
|
2221
2221
|
const source = result.source ?? "";
|
|
2222
2222
|
const archivePath = typeof metadata.archivePath === "string" ? metadata.archivePath : source.includes("#") ? source.split("#")[1] ?? "" : "";
|
|
2223
2223
|
const mediaSegments = Array.isArray(metadata.mediaSegments) ? metadata.mediaSegments.map((segment) => segment && typeof segment === "object" ? toFieldText(segment) : "").filter(Boolean).join(" ") : "";
|
|
2224
|
+
const mediaTimestampFocus = metadata.sourceNativeKind === "media_segment" ? [
|
|
2225
|
+
typeof metadata.mediaKind === "string" ? metadata.mediaKind : "",
|
|
2226
|
+
"audio",
|
|
2227
|
+
"video",
|
|
2228
|
+
"media",
|
|
2229
|
+
"timestamp",
|
|
2230
|
+
"segment",
|
|
2231
|
+
typeof metadata.mediaSegmentStartMs === "number" ? `timestamp ${metadata.mediaSegmentStartMs}` : "",
|
|
2232
|
+
typeof metadata.mediaSegmentEndMs === "number" ? `timestamp ${metadata.mediaSegmentEndMs}` : ""
|
|
2233
|
+
].filter(Boolean).join(" ") : "";
|
|
2234
|
+
const spreadsheetFocus = metadata.sourceNativeKind === "spreadsheet_sheet" ? [
|
|
2235
|
+
"spreadsheet",
|
|
2236
|
+
"workbook",
|
|
2237
|
+
"worksheet",
|
|
2238
|
+
"sheet",
|
|
2239
|
+
typeof metadata.sheetName === "string" ? `sheet named ${metadata.sheetName}` : "",
|
|
2240
|
+
typeof metadata.sheetIndex === "number" ? `worksheet ${metadata.sheetIndex + 1}` : ""
|
|
2241
|
+
].filter(Boolean).join(" ") : "";
|
|
2224
2242
|
const metadataFocus = [
|
|
2225
2243
|
metadata.sourceNativeKind,
|
|
2244
|
+
mediaTimestampFocus,
|
|
2245
|
+
spreadsheetFocus,
|
|
2226
2246
|
metadata.sheetName,
|
|
2227
2247
|
metadata.sheetNames,
|
|
2248
|
+
metadata.sheetIndex,
|
|
2228
2249
|
metadata.slideNumber,
|
|
2229
2250
|
metadata.slideTitle,
|
|
2230
2251
|
metadata.slideTitles,
|
|
@@ -2248,10 +2269,10 @@ var FIELD_WEIGHTS = {
|
|
|
2248
2269
|
archivePath: 4.2,
|
|
2249
2270
|
chunkText: 1,
|
|
2250
2271
|
mediaSegments: 3.8,
|
|
2251
|
-
metadataFocus:
|
|
2272
|
+
metadataFocus: 4.1,
|
|
2252
2273
|
metadataText: 1.4,
|
|
2253
2274
|
source: 3.4,
|
|
2254
|
-
title: 2
|
|
2275
|
+
title: 2.8
|
|
2255
2276
|
};
|
|
2256
2277
|
var getWeightedFieldTokens = (result) => {
|
|
2257
2278
|
const fields = extractWeightedLexicalFields({
|
|
@@ -2435,7 +2456,9 @@ var resolveFileKindBoost = (queryTokens, metadata) => {
|
|
|
2435
2456
|
"framework",
|
|
2436
2457
|
"transcript",
|
|
2437
2458
|
"audio",
|
|
2438
|
-
"video"
|
|
2459
|
+
"video",
|
|
2460
|
+
"timestamp",
|
|
2461
|
+
"segment"
|
|
2439
2462
|
])) {
|
|
2440
2463
|
return 0.75;
|
|
2441
2464
|
}
|
|
@@ -2451,7 +2474,8 @@ var resolveTranscriptBoost = (queryTokens, metadata) => {
|
|
|
2451
2474
|
return 0;
|
|
2452
2475
|
}
|
|
2453
2476
|
const overlap = queryTokens.filter((token) => segmentText.includes(token)).length;
|
|
2454
|
-
|
|
2477
|
+
const timestampBoost = queryTokens.includes("timestamp") ? 0.35 : 0;
|
|
2478
|
+
return Math.min(1, overlap / Math.max(1, queryTokens.length) + timestampBoost);
|
|
2455
2479
|
};
|
|
2456
2480
|
var resolveArchiveBoost = (queryTokens, result) => {
|
|
2457
2481
|
const archivePath = typeof result.metadata?.archivePath === "string" ? result.metadata.archivePath.toLowerCase() : typeof result.source === "string" && result.source.includes("#") ? result.source.split("#")[1]?.toLowerCase() ?? "" : "";
|
|
@@ -2626,6 +2650,7 @@ var DOMAIN_EXPANSIONS = {
|
|
|
2626
2650
|
video: ["video", "media", "recording", "transcript", "timestamp"]
|
|
2627
2651
|
};
|
|
2628
2652
|
var TERM_EXPANSIONS = {
|
|
2653
|
+
audio: ["timestamp", "transcript", "mp3", "speaker"],
|
|
2629
2654
|
frontend: [
|
|
2630
2655
|
"frontend",
|
|
2631
2656
|
"framework",
|
|
@@ -2649,7 +2674,9 @@ var TERM_EXPANSIONS = {
|
|
|
2649
2674
|
procedure: ["recovery", "runbook"],
|
|
2650
2675
|
procedur: ["recovery", "runbook"],
|
|
2651
2676
|
receipt: ["invoice", "ocr", "pdf"],
|
|
2677
|
+
named: ["sheet", "worksheet", "title"],
|
|
2652
2678
|
sheet: ["worksheet", "workbook", "xlsx"],
|
|
2679
|
+
timestamp: ["audio", "media", "transcript", "segment"],
|
|
2653
2680
|
transcript: ["audio", "video", "media"],
|
|
2654
2681
|
workbook: ["sheet", "spreadsheet", "xlsx"]
|
|
2655
2682
|
};
|
|
@@ -2699,6 +2726,22 @@ var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform
|
|
|
2699
2726
|
}
|
|
2700
2727
|
const expandedTerms = domains.flatMap((domain) => DOMAIN_EXPANSIONS[domain] ?? []);
|
|
2701
2728
|
const tokenExpansions = tokens.flatMap((token) => TERM_EXPANSIONS[token] ?? []);
|
|
2729
|
+
const spreadsheetNamedVariant = domains.includes("spreadsheet") ? uniqueQueryStrings([
|
|
2730
|
+
...tokens,
|
|
2731
|
+
"spreadsheet",
|
|
2732
|
+
"workbook",
|
|
2733
|
+
"worksheet",
|
|
2734
|
+
"sheet",
|
|
2735
|
+
"named"
|
|
2736
|
+
]).join(" ") : "";
|
|
2737
|
+
const mediaTimestampVariant = domains.includes("audio") || domains.includes("video") ? uniqueQueryStrings([
|
|
2738
|
+
...tokens,
|
|
2739
|
+
"audio",
|
|
2740
|
+
"media",
|
|
2741
|
+
"timestamp",
|
|
2742
|
+
"transcript",
|
|
2743
|
+
"segment"
|
|
2744
|
+
]).join(" ") : "";
|
|
2702
2745
|
const rewrittenQuery = uniqueQueryStrings([
|
|
2703
2746
|
...tokens,
|
|
2704
2747
|
...expandedTerms,
|
|
@@ -2709,9 +2752,15 @@ var createHeuristicRAGQueryTransform = (options = {}) => createRAGQueryTransform
|
|
|
2709
2752
|
...DOMAIN_EXPANSIONS[domain] ?? [],
|
|
2710
2753
|
...tokenExpansions
|
|
2711
2754
|
]).join(" "));
|
|
2755
|
+
if (spreadsheetNamedVariant.length > 0) {
|
|
2756
|
+
variants.push(spreadsheetNamedVariant);
|
|
2757
|
+
}
|
|
2758
|
+
if (mediaTimestampVariant.length > 0) {
|
|
2759
|
+
variants.push(mediaTimestampVariant);
|
|
2760
|
+
}
|
|
2712
2761
|
return {
|
|
2713
2762
|
query: rewrittenQuery,
|
|
2714
|
-
variants
|
|
2763
|
+
variants: uniqueQueryStrings(variants)
|
|
2715
2764
|
};
|
|
2716
2765
|
}
|
|
2717
2766
|
});
|
|
@@ -3020,6 +3069,16 @@ var normalizeWhitespace = (value) => value.replace(/\r\n?/g, `
|
|
|
3020
3069
|
`).replace(/\n{3,}/g, `
|
|
3021
3070
|
|
|
3022
3071
|
`).trim();
|
|
3072
|
+
var formatMediaTimestampForIngest = (value) => {
|
|
3073
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
3074
|
+
return;
|
|
3075
|
+
}
|
|
3076
|
+
const totalSeconds = Math.floor(value / 1000);
|
|
3077
|
+
const minutes = Math.floor(totalSeconds / 60);
|
|
3078
|
+
const seconds = totalSeconds % 60;
|
|
3079
|
+
const milliseconds = Math.floor(value % 1000);
|
|
3080
|
+
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(milliseconds).padStart(3, "0")}`;
|
|
3081
|
+
};
|
|
3023
3082
|
var decodeHtmlEntities = (value) => {
|
|
3024
3083
|
let output = value;
|
|
3025
3084
|
for (const [pattern, replacement] of HTML_ENTITY_REPLACEMENTS) {
|
|
@@ -3552,6 +3611,7 @@ var createOfficeDocumentExtractor = () => ({
|
|
|
3552
3611
|
} else if (extension === ".xlsx" || extension === ".ods") {
|
|
3553
3612
|
text = spreadsheetText(entries);
|
|
3554
3613
|
const sheets = spreadsheetSheetTexts(entries);
|
|
3614
|
+
const workbookLabel = input.title ?? input.name ?? input.path ?? input.source ?? DEFAULT_BINARY_NAME;
|
|
3555
3615
|
officeMetadata = {
|
|
3556
3616
|
sheetNames: spreadsheetSheetNames(entries)
|
|
3557
3617
|
};
|
|
@@ -3568,9 +3628,9 @@ var createOfficeDocumentExtractor = () => ({
|
|
|
3568
3628
|
sheetName: sheet.name
|
|
3569
3629
|
},
|
|
3570
3630
|
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}${extension || ".office"}`,
|
|
3571
|
-
text: normalizeWhitespace(`Spreadsheet sheet ${sheet.name}
|
|
3631
|
+
text: normalizeWhitespace(`Spreadsheet workbook ${workbookLabel}. ` + `Worksheet ${index + 1}. ` + `Workbook sheet named ${sheet.name}. ` + `Sheet ${sheet.name} from spreadsheet workbook ${workbookLabel}.` + `
|
|
3572
3632
|
${sheet.text}`),
|
|
3573
|
-
title: input.title ? `${input.title} \xB7 ${sheet.name}` : sheet.name
|
|
3633
|
+
title: input.title ? `${input.title} \xB7 Sheet ${sheet.name}` : `Sheet ${sheet.name}`
|
|
3574
3634
|
}));
|
|
3575
3635
|
} else if (extension === ".pptx" || extension === ".odp") {
|
|
3576
3636
|
text = presentationText(entries);
|
|
@@ -3651,6 +3711,9 @@ var createRAGMediaFileExtractor = (transcriber) => ({
|
|
|
3651
3711
|
}
|
|
3652
3712
|
const startMs = typeof segment.startMs === "number" ? segment.startMs : undefined;
|
|
3653
3713
|
const endMs = typeof segment.endMs === "number" ? segment.endMs : undefined;
|
|
3714
|
+
const startLabel = formatMediaTimestampForIngest(startMs);
|
|
3715
|
+
const endLabel = formatMediaTimestampForIngest(endMs);
|
|
3716
|
+
const mediaKind = typeof result.metadata?.mediaKind === "string" ? result.metadata.mediaKind : "media";
|
|
3654
3717
|
segmentDocuments.push({
|
|
3655
3718
|
chunking: input.chunking,
|
|
3656
3719
|
contentType: input.contentType,
|
|
@@ -3667,9 +3730,9 @@ var createRAGMediaFileExtractor = (transcriber) => ({
|
|
|
3667
3730
|
speaker: typeof segment.speaker === "string" ? segment.speaker : undefined
|
|
3668
3731
|
},
|
|
3669
3732
|
source,
|
|
3670
|
-
text: normalizeWhitespace(
|
|
3733
|
+
text: normalizeWhitespace(`${mediaKind} transcript segment${startLabel ? ` at timestamp ${startLabel}${endLabel ? ` to ${endLabel}` : ""}` : ""} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}. ` + `${mediaKind} timestamp evidence${startLabel ? ` ${startLabel}${endLabel ? ` to ${endLabel}` : ""}` : ""}.` + `
|
|
3671
3734
|
${text}`),
|
|
3672
|
-
title: input.title ? `${input.title} \xB7
|
|
3735
|
+
title: input.title ? `${input.title} \xB7 ${mediaKind[0]?.toUpperCase() + mediaKind.slice(1)} segment ${index + 1}` : `${mediaKind[0]?.toUpperCase() + mediaKind.slice(1)} segment ${index + 1}`
|
|
3673
3736
|
});
|
|
3674
3737
|
}
|
|
3675
3738
|
const summaryDocument = {
|
|
@@ -10202,5 +10265,5 @@ export {
|
|
|
10202
10265
|
aiChat
|
|
10203
10266
|
};
|
|
10204
10267
|
|
|
10205
|
-
//# debugId=
|
|
10268
|
+
//# debugId=B845F096CD7ACDA464756E2164756E21
|
|
10206
10269
|
//# sourceMappingURL=index.js.map
|