@absolutejs/absolute 0.19.0-beta.507 → 0.19.0-beta.508
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.js +32 -5
- package/dist/ai/index.js.map +4 -4
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -2221,6 +2221,16 @@ var extractWeightedLexicalFields = (result) => {
|
|
|
2221
2221
|
const source = result.source ?? "";
|
|
2222
2222
|
const archivePath = typeof metadata.archivePath === "string" ? metadata.archivePath : source.includes("#") ? source.split("#")[1] ?? "" : "";
|
|
2223
2223
|
const mediaSegments = Array.isArray(metadata.mediaSegments) ? metadata.mediaSegments.map((segment) => segment && typeof segment === "object" ? toFieldText(segment) : "").filter(Boolean).join(" ") : "";
|
|
2224
|
+
const mediaTimestampFocus = metadata.sourceNativeKind === "media_segment" ? [
|
|
2225
|
+
typeof metadata.mediaKind === "string" ? metadata.mediaKind : "",
|
|
2226
|
+
"audio",
|
|
2227
|
+
"video",
|
|
2228
|
+
"media",
|
|
2229
|
+
"timestamp",
|
|
2230
|
+
"segment",
|
|
2231
|
+
typeof metadata.mediaSegmentStartMs === "number" ? `timestamp ${metadata.mediaSegmentStartMs}` : "",
|
|
2232
|
+
typeof metadata.mediaSegmentEndMs === "number" ? `timestamp ${metadata.mediaSegmentEndMs}` : ""
|
|
2233
|
+
].filter(Boolean).join(" ") : "";
|
|
2224
2234
|
const spreadsheetFocus = metadata.sourceNativeKind === "spreadsheet_sheet" ? [
|
|
2225
2235
|
"spreadsheet",
|
|
2226
2236
|
"workbook",
|
|
@@ -2231,6 +2241,7 @@ var extractWeightedLexicalFields = (result) => {
|
|
|
2231
2241
|
].filter(Boolean).join(" ") : "";
|
|
2232
2242
|
const metadataFocus = [
|
|
2233
2243
|
metadata.sourceNativeKind,
|
|
2244
|
+
mediaTimestampFocus,
|
|
2234
2245
|
spreadsheetFocus,
|
|
2235
2246
|
metadata.sheetName,
|
|
2236
2247
|
metadata.sheetNames,
|
|
@@ -2445,7 +2456,9 @@ var resolveFileKindBoost = (queryTokens, metadata) => {
|
|
|
2445
2456
|
"framework",
|
|
2446
2457
|
"transcript",
|
|
2447
2458
|
"audio",
|
|
2448
|
-
"video"
|
|
2459
|
+
"video",
|
|
2460
|
+
"timestamp",
|
|
2461
|
+
"segment"
|
|
2449
2462
|
])) {
|
|
2450
2463
|
return 0.75;
|
|
2451
2464
|
}
|
|
@@ -2461,7 +2474,8 @@ var resolveTranscriptBoost = (queryTokens, metadata) => {
|
|
|
2461
2474
|
return 0;
|
|
2462
2475
|
}
|
|
2463
2476
|
const overlap = queryTokens.filter((token) => segmentText.includes(token)).length;
|
|
2464
|
-
|
|
2477
|
+
const timestampBoost = queryTokens.includes("timestamp") ? 0.35 : 0;
|
|
2478
|
+
return Math.min(1, overlap / Math.max(1, queryTokens.length) + timestampBoost);
|
|
2465
2479
|
};
|
|
2466
2480
|
var resolveArchiveBoost = (queryTokens, result) => {
|
|
2467
2481
|
const archivePath = typeof result.metadata?.archivePath === "string" ? result.metadata.archivePath.toLowerCase() : typeof result.source === "string" && result.source.includes("#") ? result.source.split("#")[1]?.toLowerCase() ?? "" : "";
|
|
@@ -3055,6 +3069,16 @@ var normalizeWhitespace = (value) => value.replace(/\r\n?/g, `
|
|
|
3055
3069
|
`).replace(/\n{3,}/g, `
|
|
3056
3070
|
|
|
3057
3071
|
`).trim();
|
|
3072
|
+
var formatMediaTimestampForIngest = (value) => {
|
|
3073
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
3074
|
+
return;
|
|
3075
|
+
}
|
|
3076
|
+
const totalSeconds = Math.floor(value / 1000);
|
|
3077
|
+
const minutes = Math.floor(totalSeconds / 60);
|
|
3078
|
+
const seconds = totalSeconds % 60;
|
|
3079
|
+
const milliseconds = Math.floor(value % 1000);
|
|
3080
|
+
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(milliseconds).padStart(3, "0")}`;
|
|
3081
|
+
};
|
|
3058
3082
|
var decodeHtmlEntities = (value) => {
|
|
3059
3083
|
let output = value;
|
|
3060
3084
|
for (const [pattern, replacement] of HTML_ENTITY_REPLACEMENTS) {
|
|
@@ -3687,6 +3711,9 @@ var createRAGMediaFileExtractor = (transcriber) => ({
|
|
|
3687
3711
|
}
|
|
3688
3712
|
const startMs = typeof segment.startMs === "number" ? segment.startMs : undefined;
|
|
3689
3713
|
const endMs = typeof segment.endMs === "number" ? segment.endMs : undefined;
|
|
3714
|
+
const startLabel = formatMediaTimestampForIngest(startMs);
|
|
3715
|
+
const endLabel = formatMediaTimestampForIngest(endMs);
|
|
3716
|
+
const mediaKind = typeof result.metadata?.mediaKind === "string" ? result.metadata.mediaKind : "media";
|
|
3690
3717
|
segmentDocuments.push({
|
|
3691
3718
|
chunking: input.chunking,
|
|
3692
3719
|
contentType: input.contentType,
|
|
@@ -3703,9 +3730,9 @@ var createRAGMediaFileExtractor = (transcriber) => ({
|
|
|
3703
3730
|
speaker: typeof segment.speaker === "string" ? segment.speaker : undefined
|
|
3704
3731
|
},
|
|
3705
3732
|
source,
|
|
3706
|
-
text: normalizeWhitespace(
|
|
3733
|
+
text: normalizeWhitespace(`${mediaKind} transcript segment${startLabel ? ` at timestamp ${startLabel}${endLabel ? ` to ${endLabel}` : ""}` : ""} from ${input.title ?? input.name ?? input.path ?? DEFAULT_BINARY_NAME}. ` + `${mediaKind} timestamp evidence${startLabel ? ` ${startLabel}${endLabel ? ` to ${endLabel}` : ""}` : ""}.` + `
|
|
3707
3734
|
${text}`),
|
|
3708
|
-
title: input.title ? `${input.title} \xB7
|
|
3735
|
+
title: input.title ? `${input.title} \xB7 ${mediaKind[0]?.toUpperCase() + mediaKind.slice(1)} segment ${index + 1}` : `${mediaKind[0]?.toUpperCase() + mediaKind.slice(1)} segment ${index + 1}`
|
|
3709
3736
|
});
|
|
3710
3737
|
}
|
|
3711
3738
|
const summaryDocument = {
|
|
@@ -10238,5 +10265,5 @@ export {
|
|
|
10238
10265
|
aiChat
|
|
10239
10266
|
};
|
|
10240
10267
|
|
|
10241
|
-
//# debugId=
|
|
10268
|
+
//# debugId=B845F096CD7ACDA464756E2164756E21
|
|
10242
10269
|
//# sourceMappingURL=index.js.map
|