@absolutejs/absolute 0.19.0-beta.423 → 0.19.0-beta.424

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -2102,6 +2102,8 @@ var aiChat = (config) => {
2102
2102
  import { Elysia as Elysia2 } from "elysia";
2103
2103
 
2104
2104
  // src/ai/rag/ingestion.ts
2105
+ import { readdir, readFile } from "fs/promises";
2106
+ import { basename, extname, join, relative, resolve } from "path";
2105
2107
  var DEFAULT_MAX_CHUNK_LENGTH = 900;
2106
2108
  var DEFAULT_CHUNK_OVERLAP = 120;
2107
2109
  var DEFAULT_MIN_CHUNK_LENGTH = 80;
@@ -2147,6 +2149,35 @@ var stripMarkdown = (value) => {
2147
2149
  `);
2148
2150
  return normalizeWhitespace(stripped);
2149
2151
  };
2152
+ var markdownStructureUnits = (value) => {
2153
+ const lines = value.replace(/\r\n?/g, `
2154
+ `).split(`
2155
+ `);
2156
+ const sections = [];
2157
+ let current = [];
2158
+ for (const line of lines) {
2159
+ if (/^\s*#{1,6}\s+/.test(line) && current.length > 0) {
2160
+ sections.push(current.join(`
2161
+ `));
2162
+ current = [];
2163
+ }
2164
+ current.push(line);
2165
+ }
2166
+ if (current.length > 0) {
2167
+ sections.push(current.join(`
2168
+ `));
2169
+ }
2170
+ return sections.map((section) => stripMarkdown(section)).map((section) => normalizeWhitespace(section)).filter(Boolean);
2171
+ };
2172
+ var htmlStructureUnits = (value) => {
2173
+ const marked = value.replace(/<(section|article|main|aside|nav|h[1-6])\b[^>]*>/gi, `
2174
+
2175
+ __ABS_SECTION_BREAK__ `).replace(/<\/(section|article|main|aside|nav|h[1-6])>/gi, `
2176
+
2177
+ `);
2178
+ const normalized = stripHtml(marked);
2179
+ return normalized.split(/__ABS_SECTION_BREAK__/).map((section) => normalizeWhitespace(section)).filter(Boolean);
2180
+ };
2150
2181
  var inferFormat = (document) => {
2151
2182
  if (document.format) {
2152
2183
  return document.format;
@@ -2192,6 +2223,21 @@ var fixedUnits = (text, maxChunkLength) => {
2192
2223
  }
2193
2224
  return units;
2194
2225
  };
2226
+ var sourceAwareUnits = (document, format, normalizedText) => {
2227
+ switch (format) {
2228
+ case "markdown": {
2229
+ const sections = markdownStructureUnits(document.text);
2230
+ return sections.length > 0 ? sections : paragraphUnits(normalizedText);
2231
+ }
2232
+ case "html": {
2233
+ const sections = htmlStructureUnits(document.text);
2234
+ return sections.length > 0 ? sections : paragraphUnits(normalizedText);
2235
+ }
2236
+ case "text":
2237
+ default:
2238
+ return paragraphUnits(normalizedText);
2239
+ }
2240
+ };
2195
2241
  var overlapTail = (value, overlap) => {
2196
2242
  if (overlap <= 0 || value.length <= overlap) {
2197
2243
  return value;
@@ -2256,11 +2302,11 @@ var resolveChunkingOptions = (document, defaults) => {
2256
2302
  strategy
2257
2303
  };
2258
2304
  };
2259
- var createChunkTexts = (text, options) => {
2305
+ var createChunkTexts = (document, format, text, options) => {
2260
2306
  if (text.length <= options.maxChunkLength) {
2261
2307
  return [text];
2262
2308
  }
2263
- const units = options.strategy === "fixed" ? fixedUnits(text, options.maxChunkLength) : options.strategy === "sentences" ? sentenceUnits(text) : paragraphUnits(text);
2309
+ const units = options.strategy === "fixed" ? fixedUnits(text, options.maxChunkLength) : options.strategy === "source_aware" ? sourceAwareUnits(document, format, text) : options.strategy === "sentences" ? sentenceUnits(text) : paragraphUnits(text);
2264
2310
  return chunkFromUnits(units, options.maxChunkLength, options.chunkOverlap, options.minChunkLength);
2265
2311
  };
2266
2312
  var prepareRAGDocument = (document, defaultChunking) => {
@@ -2277,7 +2323,7 @@ var prepareRAGDocument = (document, defaultChunking) => {
2277
2323
  source,
2278
2324
  title
2279
2325
  };
2280
- const chunkTexts = createChunkTexts(normalizedText, chunking);
2326
+ const chunkTexts = createChunkTexts(document, format, normalizedText, chunking);
2281
2327
  const chunks = chunkTexts.map((text, index) => ({
2282
2328
  chunkId: `${documentId}:${String(index + 1).padStart(3, "0")}`,
2283
2329
  metadata: {
@@ -2300,9 +2346,81 @@ var prepareRAGDocument = (document, defaultChunking) => {
2300
2346
  };
2301
2347
  };
2302
2348
  var prepareRAGDocuments = (input) => input.documents.map((document) => prepareRAGDocument(document, input.defaultChunking));
2349
+ var inferFormatFromPath = (path) => {
2350
+ const extension = extname(path).toLowerCase();
2351
+ if (extension === ".md" || extension === ".mdx") {
2352
+ return "markdown";
2353
+ }
2354
+ if (extension === ".html" || extension === ".htm") {
2355
+ return "html";
2356
+ }
2357
+ return "text";
2358
+ };
2359
+ var loadRAGDocumentFile = async (input) => {
2360
+ const text = await readFile(input.path, "utf8");
2361
+ return {
2362
+ ...input,
2363
+ format: input.format ?? inferFormatFromPath(input.path),
2364
+ source: input.source ?? input.path,
2365
+ text
2366
+ };
2367
+ };
2368
+ var prepareRAGDocumentFile = async (input, defaultChunking) => prepareRAGDocument(await loadRAGDocumentFile(input), defaultChunking);
2369
+ var DEFAULT_DIRECTORY_EXTENSIONS = [
2370
+ ".txt",
2371
+ ".md",
2372
+ ".mdx",
2373
+ ".html",
2374
+ ".htm"
2375
+ ];
2376
+ var collectDirectoryFiles = async (directory, recursive, includeExtensions) => {
2377
+ const entries = await readdir(directory, { withFileTypes: true });
2378
+ const files = [];
2379
+ for (const entry of entries) {
2380
+ const fullPath = join(directory, entry.name);
2381
+ if (entry.isDirectory()) {
2382
+ if (recursive) {
2383
+ files.push(...await collectDirectoryFiles(fullPath, recursive, includeExtensions));
2384
+ }
2385
+ continue;
2386
+ }
2387
+ if (!entry.isFile()) {
2388
+ continue;
2389
+ }
2390
+ const extension = extname(entry.name).toLowerCase();
2391
+ if (includeExtensions.has(extension)) {
2392
+ files.push(fullPath);
2393
+ }
2394
+ }
2395
+ return files.sort();
2396
+ };
2397
+ var loadRAGDocumentsFromDirectory = async (input) => {
2398
+ const root = resolve(input.directory);
2399
+ const includeExtensions = new Set((input.includeExtensions ?? DEFAULT_DIRECTORY_EXTENSIONS).map((entry) => entry.startsWith(".") ? entry.toLowerCase() : `.${entry.toLowerCase()}`));
2400
+ const files = await collectDirectoryFiles(root, input.recursive !== false, includeExtensions);
2401
+ const documents = await Promise.all(files.map(async (path) => {
2402
+ const source = relative(root, path).replace(/\\/g, "/");
2403
+ const loaded = await loadRAGDocumentFile({
2404
+ metadata: {
2405
+ ...input.baseMetadata ?? {},
2406
+ fileName: basename(path),
2407
+ relativePath: source
2408
+ },
2409
+ path,
2410
+ source
2411
+ });
2412
+ return loaded;
2413
+ }));
2414
+ return {
2415
+ defaultChunking: input.defaultChunking,
2416
+ documents
2417
+ };
2418
+ };
2419
+ var prepareRAGDirectoryDocuments = async (input) => prepareRAGDocuments(await loadRAGDocumentsFromDirectory(input));
2303
2420
  var buildRAGUpsertInputFromDocuments = (input) => ({
2304
2421
  chunks: prepareRAGDocuments(input).flatMap((document) => document.chunks)
2305
2422
  });
2423
+ var buildRAGUpsertInputFromDirectory = async (input) => buildRAGUpsertInputFromDocuments(await loadRAGDocumentsFromDirectory(input));
2306
2424
 
2307
2425
  // src/ai/rag/collection.ts
2308
2426
  var DEFAULT_TOP_K = 6;
@@ -2895,7 +3013,7 @@ import { existsSync as existsSync2 } from "fs";
2895
3013
  import { existsSync, readFileSync } from "fs";
2896
3014
  import { createRequire } from "module";
2897
3015
  import { arch, platform } from "os";
2898
- import { dirname, join } from "path";
3016
+ import { dirname, join as join2 } from "path";
2899
3017
  var require2 = createRequire(import.meta.url);
2900
3018
  var PLATFORM_PACKAGE_MAP = {
2901
3019
  "darwin-arm64": {
@@ -2943,7 +3061,7 @@ var resolveAbsoluteSQLiteVec = () => {
2943
3061
  try {
2944
3062
  const packageJsonPath = require2.resolve(`${packageInfo.packageName}/package.json`);
2945
3063
  const packageRoot = dirname(packageJsonPath);
2946
- const libraryPath = join(packageRoot, packageInfo.libraryFile);
3064
+ const libraryPath = join2(packageRoot, packageInfo.libraryFile);
2947
3065
  const packageVersion = readPackageVersion(packageJsonPath);
2948
3066
  if (!existsSync(libraryPath)) {
2949
3067
  return {
@@ -3659,7 +3777,9 @@ export {
3659
3777
  ragChat,
3660
3778
  querySimilarity,
3661
3779
  prepareRAGDocuments,
3780
+ prepareRAGDocumentFile,
3662
3781
  prepareRAGDocument,
3782
+ prepareRAGDirectoryDocuments,
3663
3783
  parseAIMessage,
3664
3784
  openaiResponses,
3665
3785
  openaiCompatible,
@@ -3667,6 +3787,8 @@ export {
3667
3787
  moonshot,
3668
3788
  mistralai,
3669
3789
  meta,
3790
+ loadRAGDocumentsFromDirectory,
3791
+ loadRAGDocumentFile,
3670
3792
  ingestRAGDocuments,
3671
3793
  ingestDocuments,
3672
3794
  google,
@@ -3681,9 +3803,10 @@ export {
3681
3803
  createInMemoryRAGStore,
3682
3804
  createConversationManager,
3683
3805
  buildRAGUpsertInputFromDocuments,
3806
+ buildRAGUpsertInputFromDirectory,
3684
3807
  alibaba,
3685
3808
  aiChat
3686
3809
  };
3687
3810
 
3688
- //# debugId=D8D946A65C6DB67B64756E2164756E21
3811
+ //# debugId=1F6C91DA7227AEA564756E2164756E21
3689
3812
  //# sourceMappingURL=index.js.map