@absolutejs/absolute 0.19.0-beta.492 → 0.19.0-beta.493

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -2161,10 +2161,66 @@ var collectMetadataStrings = (value) => {
2161
2161
  }
2162
2162
  return [];
2163
2163
  };
2164
+ var normalizeSourceForLexical = (source) => source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle");
2165
+ var toFieldText = (value) => collectMetadataStrings(value).filter(Boolean).join(" ");
2166
+ var scoreTokenCoverage = (queryTokens, text) => {
2167
+ const normalizedText = text.toLowerCase();
2168
+ if (normalizedText.length === 0) {
2169
+ return 0;
2170
+ }
2171
+ const tokens = tokenize(normalizedText);
2172
+ if (tokens.length === 0) {
2173
+ return 0;
2174
+ }
2175
+ const tokenSet = new Set(tokens);
2176
+ const overlap = queryTokens.filter((token) => tokenSet.has(token)).length;
2177
+ return overlap / Math.max(1, queryTokens.length);
2178
+ };
2179
+ var scorePhraseMatch = (query, text) => {
2180
+ const normalizedQuery = tokenize(query).join(" ");
2181
+ const normalizedText = tokenize(text).join(" ");
2182
+ if (normalizedQuery.length === 0 || normalizedText.length === 0) {
2183
+ return 0;
2184
+ }
2185
+ return normalizedText.includes(normalizedQuery) ? 1 : 0;
2186
+ };
2187
+ var scoreWeightedField = ({
2188
+ coverageWeight,
2189
+ phraseWeight,
2190
+ query,
2191
+ queryTokens,
2192
+ text
2193
+ }) => scoreTokenCoverage(queryTokens, text) * coverageWeight + scorePhraseMatch(query, text) * phraseWeight;
2194
+ var extractWeightedLexicalFields = (result) => {
2195
+ const metadata = result.metadata ?? {};
2196
+ const source = result.source ?? "";
2197
+ const archivePath = typeof metadata.archivePath === "string" ? metadata.archivePath : source.includes("#") ? source.split("#")[1] ?? "" : "";
2198
+ const mediaSegments = Array.isArray(metadata.mediaSegments) ? metadata.mediaSegments.map((segment) => segment && typeof segment === "object" ? toFieldText(segment) : "").filter(Boolean).join(" ") : "";
2199
+ const metadataFocus = [
2200
+ metadata.sheetName,
2201
+ metadata.sheetNames,
2202
+ metadata.slideTitle,
2203
+ metadata.slideTitles,
2204
+ metadata.threadTopic,
2205
+ metadata.speaker,
2206
+ metadata.fileKind,
2207
+ metadata.transcriptSource,
2208
+ metadata.archiveType
2209
+ ].flatMap((value) => collectMetadataStrings(value)).join(" ");
2210
+ return {
2211
+ archivePath,
2212
+ chunkText: result.chunkText,
2213
+ mediaSegments,
2214
+ metadataFocus,
2215
+ metadataText: toFieldText(metadata),
2216
+ source: source ? normalizeSourceForLexical(source) : "",
2217
+ title: result.title ?? ""
2218
+ };
2219
+ };
2164
2220
  var buildRAGLexicalHaystack = (result) => [
2165
2221
  result.title,
2166
2222
  result.source,
2167
- typeof result.source === "string" ? result.source.replace(/[#/_.-]+/g, " ").replace(/\bmd\b/g, "markdown").replace(/\bpptx\b/g, "presentation").replace(/\bxlsx\b/g, "spreadsheet workbook sheet").replace(/\bmp3\b/g, "audio transcript media").replace(/\bmp4\b/g, "video transcript media").replace(/\bzip\b/g, "archive bundle") : undefined,
2223
+ typeof result.source === "string" ? normalizeSourceForLexical(result.source) : undefined,
2168
2224
  result.chunkText,
2169
2225
  ...collectMetadataStrings(result.metadata)
2170
2226
  ].filter((value) => Boolean(value)).join(" ");
@@ -2173,20 +2229,67 @@ var scoreRAGLexicalMatch = (query, result) => {
2173
2229
  if (queryTokens.length === 0) {
2174
2230
  return 0;
2175
2231
  }
2232
+ const fields = extractWeightedLexicalFields(result);
2176
2233
  const haystack = buildRAGLexicalHaystack(result).toLowerCase();
2177
- const haystackTokens = tokenize(haystack);
2178
- const haystackSet = new Set(haystackTokens);
2179
- const overlap = queryTokens.filter((token) => haystackSet.has(token)).length;
2180
- if (overlap === 0) {
2234
+ const overallCoverage = scoreTokenCoverage(queryTokens, haystack);
2235
+ if (overallCoverage === 0) {
2181
2236
  return 0;
2182
2237
  }
2183
- const exactPhraseBoost = haystack.includes(query.toLowerCase()) ? 1 : 0;
2184
- const sourceBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
2185
- const coverageBoost = overlap / queryTokens.length;
2238
+ const titleScore = scoreWeightedField({
2239
+ coverageWeight: 1.8,
2240
+ phraseWeight: 1.2,
2241
+ query,
2242
+ queryTokens,
2243
+ text: fields.title
2244
+ });
2245
+ const sourceScore = scoreWeightedField({
2246
+ coverageWeight: 2.6,
2247
+ phraseWeight: 1.4,
2248
+ query,
2249
+ queryTokens,
2250
+ text: fields.source
2251
+ });
2252
+ const metadataFocusScore = scoreWeightedField({
2253
+ coverageWeight: 2.8,
2254
+ phraseWeight: 1.6,
2255
+ query,
2256
+ queryTokens,
2257
+ text: fields.metadataFocus
2258
+ });
2259
+ const archivePathScore = scoreWeightedField({
2260
+ coverageWeight: 3.2,
2261
+ phraseWeight: 2.2,
2262
+ query,
2263
+ queryTokens,
2264
+ text: fields.archivePath
2265
+ });
2266
+ const mediaSegmentScore = scoreWeightedField({
2267
+ coverageWeight: 3,
2268
+ phraseWeight: 1.8,
2269
+ query,
2270
+ queryTokens,
2271
+ text: fields.mediaSegments
2272
+ });
2273
+ const metadataScore = scoreWeightedField({
2274
+ coverageWeight: 1.2,
2275
+ phraseWeight: 0.8,
2276
+ query,
2277
+ queryTokens,
2278
+ text: fields.metadataText
2279
+ });
2280
+ const chunkScore = scoreWeightedField({
2281
+ coverageWeight: 0.9,
2282
+ phraseWeight: 0.6,
2283
+ query,
2284
+ queryTokens,
2285
+ text: fields.chunkText
2286
+ });
2287
+ const exactPhraseBoost = scorePhraseMatch(query, haystack);
2288
+ const coverageBoost = overallCoverage;
2186
2289
  const fileKindBoost = resolveFileKindBoost(queryTokens, result.metadata);
2187
2290
  const transcriptBoost = resolveTranscriptBoost(queryTokens, result.metadata);
2188
2291
  const archiveBoost = resolveArchiveBoost(queryTokens, result);
2189
- return coverageBoost + exactPhraseBoost + sourceBoost + fileKindBoost + transcriptBoost + archiveBoost;
2292
+ return titleScore + sourceScore + metadataFocusScore + archivePathScore + mediaSegmentScore + metadataScore + chunkScore + coverageBoost + exactPhraseBoost + fileKindBoost + transcriptBoost + archiveBoost;
2190
2293
  };
2191
2294
  var hasAnyToken = (tokens, values) => values.some((value) => tokens.includes(value));
2192
2295
  var resolveFileKindBoost = (queryTokens, metadata) => {
@@ -8600,5 +8703,5 @@ export {
8600
8703
  aiChat
8601
8704
  };
8602
8705
 
8603
- //# debugId=8B383E0793D06CEF64756E2164756E21
8706
+ //# debugId=36F6407CE8163A4F64756E2164756E21
8604
8707
  //# sourceMappingURL=index.js.map