unrag 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -4,7 +4,15 @@
4
4
  import { intro, outro as outro3 } from "@clack/prompts";
5
5
 
6
6
  // cli/commands/init.ts
7
- import { cancel as cancel2, isCancel as isCancel2, outro, select, text } from "@clack/prompts";
7
+ import {
8
+ cancel as cancel2,
9
+ confirm as confirm2,
10
+ groupMultiselect,
11
+ isCancel as isCancel2,
12
+ outro,
13
+ select,
14
+ text
15
+ } from "@clack/prompts";
8
16
  import path5 from "node:path";
9
17
  import { fileURLToPath } from "node:url";
10
18
 
@@ -71,8 +79,39 @@ var writeText = async (filePath, content) => {
71
79
  await ensureDir(path2.dirname(filePath));
72
80
  await writeFile(filePath, content, "utf8");
73
81
  };
82
+ var EXTRACTOR_FACTORY = {
83
+ "pdf-llm": "createPdfLlmExtractor",
84
+ "pdf-text-layer": "createPdfTextLayerExtractor",
85
+ "pdf-ocr": "createPdfOcrExtractor",
86
+ "image-ocr": "createImageOcrExtractor",
87
+ "image-caption-llm": "createImageCaptionLlmExtractor",
88
+ "audio-transcribe": "createAudioTranscribeExtractor",
89
+ "video-transcribe": "createVideoTranscribeExtractor",
90
+ "video-frames": "createVideoFramesExtractor",
91
+ "file-text": "createFileTextExtractor",
92
+ "file-docx": "createFileDocxExtractor",
93
+ "file-pptx": "createFilePptxExtractor",
94
+ "file-xlsx": "createFileXlsxExtractor"
95
+ };
96
+ var EXTRACTOR_FLAG_KEYS = {
97
+ "pdf-text-layer": ["pdf_textLayer"],
98
+ "pdf-llm": ["pdf_llmExtraction"],
99
+ "pdf-ocr": ["pdf_ocr"],
100
+ "image-ocr": ["image_ocr"],
101
+ "image-caption-llm": ["image_captionLlm"],
102
+ "audio-transcribe": ["audio_transcription"],
103
+ "video-transcribe": ["video_transcription"],
104
+ "video-frames": ["video_frames"],
105
+ "file-text": ["file_text"],
106
+ "file-docx": ["file_docx"],
107
+ "file-pptx": ["file_pptx"],
108
+ "file-xlsx": ["file_xlsx"]
109
+ };
110
+ var ALL_FLAG_KEYS = Array.from(new Set(Object.values(EXTRACTOR_FLAG_KEYS).flat())).sort();
74
111
  var renderUnragConfig = (content, selection) => {
75
112
  const installImportBase = `./${selection.installDir.replace(/\\/g, "/")}`;
113
+ const richMedia = selection.richMedia ?? { enabled: false, extractors: [] };
114
+ const selectedExtractors = Array.from(new Set(richMedia.extractors ?? [])).sort();
76
115
  const baseImports = [
77
116
  `import { defineUnragConfig } from "${installImportBase}/core";`
78
117
  ];
@@ -88,22 +127,40 @@ var renderUnragConfig = (content, selection) => {
88
127
  storeImports.push(`import { createPrismaVectorStore } from "${installImportBase}/store/prisma";`, `import { PrismaClient } from "@prisma/client";`);
89
128
  storeCreateLines.push(` const prisma = (globalThis as any).__unragPrisma ?? new PrismaClient();`, ` (globalThis as any).__unragPrisma = prisma;`, ` const store = createPrismaVectorStore(prisma);`);
90
129
  }
91
- const importsBlock = [...baseImports, ...storeImports].join(`
130
+ const extractorImports = [];
131
+ if (richMedia.enabled && selectedExtractors.length > 0) {
132
+ for (const ex of selectedExtractors) {
133
+ const factory = EXTRACTOR_FACTORY[ex];
134
+ extractorImports.push(`import { ${factory} } from "${installImportBase}/extractors/${ex}";`);
135
+ }
136
+ }
137
+ const importsBlock = [...baseImports, ...storeImports, ...extractorImports].join(`
92
138
  `);
93
139
  const createEngineBlock = [
94
140
  `export function createUnragEngine() {`,
95
141
  ...storeCreateLines,
96
142
  ``,
97
143
  ` return unrag.createEngine({ store });`,
98
- `}`,
99
- ``,
100
- `export async function retrieve(query: string) {`,
101
- ` const engine = createUnragEngine();`,
102
- ` return engine.retrieve({ query, topK: unrag.defaults.retrieval.topK });`,
103
144
  `}`
104
145
  ].join(`
105
146
  `);
106
- return content.replace("// __UNRAG_IMPORTS__", importsBlock).replace("// __UNRAG_CREATE_ENGINE__", createEngineBlock);
147
+ let out = content.replace("// __UNRAG_IMPORTS__", importsBlock).replace("// __UNRAG_CREATE_ENGINE__", createEngineBlock);
148
+ out = out.replace('type: "text", // __UNRAG_EMBEDDING_TYPE__', richMedia.enabled ? 'type: "multimodal",' : 'type: "text",').replace('model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__', richMedia.enabled ? 'model: "cohere/embed-v4.0",' : 'model: "openai/text-embedding-3-small",');
149
+ const enabledFlagKeys = new Set;
150
+ if (richMedia.enabled) {
151
+ for (const ex of selectedExtractors) {
152
+ for (const k of EXTRACTOR_FLAG_KEYS[ex] ?? []) {
153
+ enabledFlagKeys.add(k);
154
+ }
155
+ }
156
+ }
157
+ for (const k of ALL_FLAG_KEYS) {
158
+ out = out.replace(`enabled: false, // __UNRAG_FLAG_${k}__`, `enabled: ${enabledFlagKeys.has(k) ? "true" : "false"},`);
159
+ }
160
+ const extractorLines = richMedia.enabled && selectedExtractors.length > 0 ? selectedExtractors.map((ex) => ` ${EXTRACTOR_FACTORY[ex]}(),`).join(`
161
+ `) : "";
162
+ out = out.replace(" // __UNRAG_EXTRACTORS__", extractorLines);
163
+ return out;
107
164
  };
108
165
  var renderDocs = (content, selection) => {
109
166
  const notes = [];
@@ -272,28 +329,34 @@ async function copyExtractorFiles(selection) {
272
329
  const destRootAbs = path2.join(installBaseAbs, "extractors", selection.extractor);
273
330
  const sharedDestRootAbs = path2.join(installBaseAbs, "extractors", "_shared");
274
331
  const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
332
+ const shouldWrite = async (src, dest) => {
333
+ if (!await exists(dest))
334
+ return true;
335
+ if (nonInteractive)
336
+ return false;
337
+ try {
338
+ const [srcRaw, destRaw] = await Promise.all([readText(src), readText(dest)]);
339
+ if (srcRaw === destRaw)
340
+ return false;
341
+ } catch {}
342
+ const answer = await confirm({
343
+ message: `Overwrite ${path2.relative(selection.projectRoot, dest)}?`,
344
+ initialValue: false
345
+ });
346
+ if (isCancel(answer)) {
347
+ cancel("Cancelled.");
348
+ return false;
349
+ }
350
+ return Boolean(answer);
351
+ };
275
352
  for (const src of extractorFiles) {
276
353
  if (!await exists(src)) {
277
354
  throw new Error(`Registry file missing: ${src}`);
278
355
  }
279
356
  const rel = path2.relative(extractorRegistryAbs, src);
280
357
  const dest = path2.join(destRootAbs, rel);
281
- if (await exists(dest)) {
282
- if (nonInteractive) {
283
- continue;
284
- }
285
- const answer = await confirm({
286
- message: `Overwrite ${path2.relative(selection.projectRoot, dest)}?`,
287
- initialValue: false
288
- });
289
- if (isCancel(answer)) {
290
- cancel("Cancelled.");
291
- return;
292
- }
293
- if (!answer) {
294
- continue;
295
- }
296
- }
358
+ if (!await shouldWrite(src, dest))
359
+ continue;
297
360
  const raw = await readText(src);
298
361
  await writeText(dest, raw);
299
362
  }
@@ -303,22 +366,8 @@ async function copyExtractorFiles(selection) {
303
366
  }
304
367
  const rel = path2.relative(sharedRegistryAbs, src);
305
368
  const dest = path2.join(sharedDestRootAbs, rel);
306
- if (await exists(dest)) {
307
- if (nonInteractive) {
308
- continue;
309
- }
310
- const answer = await confirm({
311
- message: `Overwrite ${path2.relative(selection.projectRoot, dest)}?`,
312
- initialValue: false
313
- });
314
- if (isCancel(answer)) {
315
- cancel("Cancelled.");
316
- return;
317
- }
318
- if (!answer) {
319
- continue;
320
- }
321
- }
369
+ if (!await shouldWrite(src, dest))
370
+ continue;
322
371
  const raw = await readText(src);
323
372
  await writeText(dest, raw);
324
373
  }
@@ -552,9 +601,93 @@ var parseInitArgs = (args) => {
552
601
  }
553
602
  continue;
554
603
  }
604
+ if (a === "--rich-media") {
605
+ out.richMedia = true;
606
+ continue;
607
+ }
608
+ if (a === "--no-rich-media") {
609
+ out.richMedia = false;
610
+ continue;
611
+ }
612
+ if (a === "--extractors") {
613
+ const v = args[i + 1];
614
+ if (v) {
615
+ out.extractors = v.split(",").map((s) => s.trim()).filter(Boolean);
616
+ i++;
617
+ }
618
+ continue;
619
+ }
555
620
  }
556
621
  return out;
557
622
  };
623
+ var DEFAULT_RICH_MEDIA_EXTRACTORS = ["pdf-text-layer", "file-text"];
624
+ var EXTRACTOR_OPTIONS = [
625
+ {
626
+ group: "PDF",
627
+ value: "pdf-text-layer",
628
+ label: `pdf-text-layer (Fast/cheap extraction via PDF text layer)`,
629
+ hint: "recommended"
630
+ },
631
+ {
632
+ group: "PDF",
633
+ value: "pdf-llm",
634
+ label: `pdf-llm (LLM-based PDF extraction; higher cost)`
635
+ },
636
+ {
637
+ group: "PDF",
638
+ value: "pdf-ocr",
639
+ label: `pdf-ocr (OCR scanned PDFs; requires native binaries)`,
640
+ hint: "worker-only"
641
+ },
642
+ {
643
+ group: "Image",
644
+ value: "image-ocr",
645
+ label: `image-ocr (Extract text from images via vision LLM)`
646
+ },
647
+ {
648
+ group: "Image",
649
+ value: "image-caption-llm",
650
+ label: `image-caption-llm (Generate captions for images via vision LLM)`
651
+ },
652
+ {
653
+ group: "Audio",
654
+ value: "audio-transcribe",
655
+ label: `audio-transcribe (Speech-to-text transcription)`
656
+ },
657
+ {
658
+ group: "Video",
659
+ value: "video-transcribe",
660
+ label: `video-transcribe (Transcribe video audio track)`
661
+ },
662
+ {
663
+ group: "Video",
664
+ value: "video-frames",
665
+ label: `video-frames (Sample frames + analyze via vision LLM; requires ffmpeg)`,
666
+ hint: "worker-only"
667
+ },
668
+ {
669
+ group: "Files",
670
+ value: "file-text",
671
+ label: `file-text (Extract text/markdown/json/html from common text files)`,
672
+ hint: "recommended"
673
+ },
674
+ {
675
+ group: "Files",
676
+ value: "file-docx",
677
+ label: `file-docx (Extract text from .docx files)`
678
+ },
679
+ {
680
+ group: "Files",
681
+ value: "file-pptx",
682
+ label: `file-pptx (Extract text from .pptx slides)`
683
+ },
684
+ {
685
+ group: "Files",
686
+ value: "file-xlsx",
687
+ label: `file-xlsx (Extract tables from .xlsx spreadsheets)`
688
+ }
689
+ ];
690
+ var AVAILABLE_EXTRACTORS = new Set(EXTRACTOR_OPTIONS.map((o) => o.value));
558
691
  async function initCommand(args) {
559
692
  const root = await tryFindProjectRoot(process.cwd());
560
693
  if (!root) {
@@ -623,17 +756,71 @@ async function initCommand(args) {
623
756
  return;
624
757
  }
625
758
  const aliasBase = String(aliasAnswer).trim();
759
+ if (parsed.richMedia === false && (parsed.extractors ?? []).length > 0) {
760
+ throw new Error('Cannot use "--no-rich-media" together with "--extractors".');
761
+ }
762
+ const extractorsFromArgs = (parsed.extractors ?? []).filter((x) => AVAILABLE_EXTRACTORS.has(x)).sort();
763
+ const richMediaAnswer = extractorsFromArgs.length > 0 ? true : typeof parsed.richMedia === "boolean" ? parsed.richMedia : nonInteractive ? false : await confirm2({
764
+ message: "Enable rich media ingestion (PDF/images/audio/video/files)? This also enables multimodal image embeddings (you can change this later).",
765
+ initialValue: false
766
+ });
767
+ if (isCancel2(richMediaAnswer)) {
768
+ cancel2("Cancelled.");
769
+ return;
770
+ }
771
+ const richMediaEnabled = Boolean(richMediaAnswer);
772
+ const selectedExtractorsAnswer = richMediaEnabled || extractorsFromArgs.length > 0 ? nonInteractive ? extractorsFromArgs.length > 0 ? extractorsFromArgs : DEFAULT_RICH_MEDIA_EXTRACTORS : await groupMultiselect({
773
+ message: "Select extractors to enable (space to toggle, enter to confirm)",
774
+ options: EXTRACTOR_OPTIONS.reduce((acc, opt) => {
775
+ acc[opt.group] ??= [];
776
+ acc[opt.group].push({
777
+ value: opt.value,
778
+ label: opt.label,
779
+ ...opt.hint ? { hint: opt.hint } : {}
780
+ });
781
+ return acc;
782
+ }, {}),
783
+ initialValues: extractorsFromArgs.length > 0 ? extractorsFromArgs : DEFAULT_RICH_MEDIA_EXTRACTORS,
784
+ required: false
785
+ }) : [];
786
+ if (isCancel2(selectedExtractorsAnswer)) {
787
+ cancel2("Cancelled.");
788
+ return;
789
+ }
790
+ const selectedExtractors = Array.from(new Set(Array.isArray(selectedExtractorsAnswer) ? selectedExtractorsAnswer : [])).sort();
626
791
  const selection = {
627
792
  installDir,
628
793
  storeAdapter: storeAdapterAnswer,
629
794
  projectRoot: root,
630
795
  registryRoot,
631
- aliasBase
796
+ aliasBase,
797
+ richMedia: richMediaEnabled ? {
798
+ enabled: true,
799
+ extractors: selectedExtractors
800
+ } : { enabled: false, extractors: [] }
632
801
  };
633
802
  await copyRegistryFiles(selection);
803
+ if (richMediaEnabled && selectedExtractors.length > 0) {
804
+ for (const extractor of selectedExtractors) {
805
+ await copyExtractorFiles({
806
+ projectRoot: root,
807
+ registryRoot,
808
+ installDir,
809
+ extractor,
810
+ yes: nonInteractive
811
+ });
812
+ }
813
+ }
634
814
  const pkg = await readPackageJson(root);
635
815
  const { deps, devDeps } = depsForAdapter(storeAdapterAnswer);
636
- const merged = mergeDeps(pkg, deps, devDeps);
816
+ const extractorDeps = {};
817
+ const extractorDevDeps = {};
818
+ for (const ex of selectedExtractors) {
819
+ const r = depsForExtractor(ex);
820
+ Object.assign(extractorDeps, r.deps);
821
+ Object.assign(extractorDevDeps, r.devDeps);
822
+ }
823
+ const merged = mergeDeps(pkg, { ...deps, ...extractorDeps }, { ...devDeps, ...extractorDevDeps });
637
824
  if (merged.changes.length > 0) {
638
825
  await writePackageJson(root, merged.pkg);
639
826
  }
@@ -643,7 +830,10 @@ async function initCommand(args) {
643
830
  aliasBase,
644
831
  version: CONFIG_VERSION,
645
832
  connectors: existing?.connectors ?? [],
646
- extractors: existing?.extractors ?? []
833
+ extractors: Array.from(new Set([
834
+ ...existing?.extractors ?? [],
835
+ ...richMediaEnabled ? selectedExtractors : []
836
+ ])).sort()
647
837
  };
648
838
  await writeJsonFile(path5.join(root, CONFIG_FILE), config);
649
839
  const pm = await detectPackageManager(root);
@@ -657,6 +847,11 @@ async function initCommand(args) {
657
847
  `- Docs: ${path5.join(installDir, "unrag.md")}`,
658
848
  `- Config: unrag.config.ts`,
659
849
  `- Imports: ${aliasBase}/* and ${aliasBase}/config`,
850
+ "",
851
+ `- Rich media: ${richMediaEnabled ? "enabled" : "disabled"}`,
852
+ richMediaEnabled ? `- Embeddings: multimodal enabled (images can be embedded directly)` : `- Embeddings: text-only (no direct image embedding)`,
853
+ richMediaEnabled ? `- Extractors: ${selectedExtractors.length > 0 ? selectedExtractors.join(", ") : "none"}` : "",
854
+ richMediaEnabled ? ` Tip: you can tweak extractors + assetProcessing flags in unrag.config.ts later.` : ` Tip: re-run \`unrag init --rich-media\` (or edit unrag.config.ts) to enable rich media later.`,
660
855
  isNext ? tsconfigResult.changed ? `- Next.js: updated ${tsconfigResult.file} (added aliases)` : `- Next.js: no tsconfig changes needed` : `- Next.js: not detected`,
661
856
  "",
662
857
  merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
@@ -685,7 +880,7 @@ function docsUrl(siteRelativePath) {
685
880
  var CONFIG_FILE2 = "unrag.json";
686
881
  var __filename3 = fileURLToPath2(import.meta.url);
687
882
  var __dirname3 = path6.dirname(__filename3);
688
- var AVAILABLE_EXTRACTORS = [
883
+ var AVAILABLE_EXTRACTORS2 = [
689
884
  "pdf-llm",
690
885
  "pdf-text-layer",
691
886
  "pdf-ocr",
@@ -738,7 +933,7 @@ async function addCommand(args) {
738
933
  " unrag add extractor <name>",
739
934
  "",
740
935
  "Available connectors: notion",
741
- `Available extractors: ${AVAILABLE_EXTRACTORS.join(", ")}`
936
+ `Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`
742
937
  ].join(`
743
938
  `));
744
939
  return;
@@ -790,10 +985,10 @@ Available connectors: notion`);
790
985
  return;
791
986
  }
792
987
  const extractor = name;
793
- if (!extractor || !AVAILABLE_EXTRACTORS.includes(extractor)) {
988
+ if (!extractor || !AVAILABLE_EXTRACTORS2.includes(extractor)) {
794
989
  outro2(`Unknown extractor: ${name}
795
990
 
796
- Available extractors: ${AVAILABLE_EXTRACTORS.join(", ")}`);
991
+ Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`);
797
992
  return;
798
993
  }
799
994
  await copyExtractorFiles({
@@ -844,10 +1039,15 @@ function renderHelp() {
844
1039
  " --store <adapter> drizzle | prisma | raw-sql",
845
1040
  " --dir <path> Install directory (alias: --install-dir)",
846
1041
  " --alias <@name> Import alias base (e.g. @unrag)",
1042
+ " --rich-media Enable rich media setup (also enables multimodal embeddings)",
1043
+ " --no-rich-media Disable rich media setup",
1044
+ " --extractors <list> Comma-separated extractors (implies --rich-media)",
847
1045
  "",
848
1046
  "Examples:",
849
1047
  " bunx unrag@latest init",
850
1048
  " bunx unrag@latest init --yes --store drizzle --dir lib/unrag --alias @unrag",
1049
+ " bunx unrag@latest init --yes --rich-media",
1050
+ " bunx unrag@latest init --yes --extractors pdf-text-layer,file-text",
851
1051
  " bunx unrag add notion --yes",
852
1052
  "",
853
1053
  "Docs:",
package/package.json CHANGED
@@ -2,10 +2,11 @@
2
2
  "name": "unrag",
3
3
  "type": "module",
4
4
  "repository": "https://github.com/BetterStacks/unrag",
5
+ "homepage": "https://unrag.dev",
5
6
  "bin": {
6
7
  "unrag": "./dist/cli/index.js"
7
8
  },
8
- "version": "0.2.3",
9
+ "version": "0.2.4",
9
10
  "private": false,
10
11
  "license": "Apache-2.0",
11
12
  "devDependencies": {
@@ -11,6 +11,8 @@
11
11
  * treated like vendored source code.
12
12
  */
13
13
 
14
+ // @ts-nocheck
15
+
14
16
  // __UNRAG_IMPORTS__
15
17
 
16
18
  export const unrag = defineUnragConfig({
@@ -26,8 +28,8 @@ export const unrag = defineUnragConfig({
26
28
  embedding: {
27
29
  provider: "ai",
28
30
  config: {
29
- type: "text",
30
- model: "openai/text-embedding-3-small",
31
+ type: "text", // __UNRAG_EMBEDDING_TYPE__
32
+ model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__
31
33
  timeoutMs: 15_000,
32
34
  },
33
35
  },
@@ -52,13 +54,15 @@ export const unrag = defineUnragConfig({
52
54
  * - `import { createPdfLlmExtractor } from "./lib/unrag/extractors/pdf-llm";`
53
55
  * - `extractors: [createPdfLlmExtractor()]`
54
56
  */
55
- extractors: [],
57
+ extractors: [
58
+ // __UNRAG_EXTRACTORS__
59
+ ],
56
60
  /**
57
61
  * Rich media processing controls.
58
62
  *
59
63
  * Notes:
60
- * - The library defaults are cost-safe (PDF LLM extraction is off).
61
- * - This generated config opts you into PDF extraction for convenience.
64
+ * - This generated config is cost-safe by default (all extraction is off).
65
+ * - `unrag init` can enable rich media + multimodal embeddings for you.
62
66
  * - Tighten fetch allowlists/limits in production if you ingest URL-based assets.
63
67
  */
64
68
  assetProcessing: {
@@ -74,14 +78,14 @@ export const unrag = defineUnragConfig({
74
78
  pdf: {
75
79
  // Fast/cheap text-layer extraction (requires installing a PDF text-layer extractor module).
76
80
  textLayer: {
77
- enabled: false,
81
+ enabled: false, // __UNRAG_FLAG_pdf_textLayer__
78
82
  maxBytes: 15 * 1024 * 1024,
79
83
  maxOutputChars: 200_000,
80
84
  minChars: 200,
81
85
  // maxPages: 200,
82
86
  },
83
87
  llmExtraction: {
84
- enabled: true,
88
+ enabled: false, // __UNRAG_FLAG_pdf_llmExtraction__
85
89
  model: "google/gemini-2.0-flash",
86
90
  prompt:
87
91
  "Extract all readable text from this PDF as faithfully as possible. Preserve structure with headings and lists when obvious. Output plain text or markdown only. Do not add commentary.",
@@ -91,7 +95,7 @@ export const unrag = defineUnragConfig({
91
95
  },
92
96
  // Worker-only OCR pipelines typically require native binaries (poppler/tesseract) or external services.
93
97
  ocr: {
94
- enabled: false,
98
+ enabled: false, // __UNRAG_FLAG_pdf_ocr__
95
99
  maxBytes: 15 * 1024 * 1024,
96
100
  maxOutputChars: 200_000,
97
101
  minChars: 200,
@@ -104,7 +108,7 @@ export const unrag = defineUnragConfig({
104
108
  },
105
109
  image: {
106
110
  ocr: {
107
- enabled: false,
111
+ enabled: false, // __UNRAG_FLAG_image_ocr__
108
112
  model: "google/gemini-2.0-flash",
109
113
  prompt:
110
114
  "Extract all readable text from this image as faithfully as possible. Output plain text only. Do not add commentary.",
@@ -113,7 +117,7 @@ export const unrag = defineUnragConfig({
113
117
  maxOutputChars: 50_000,
114
118
  },
115
119
  captionLlm: {
116
- enabled: false,
120
+ enabled: false, // __UNRAG_FLAG_image_captionLlm__
117
121
  model: "google/gemini-2.0-flash",
118
122
  prompt:
119
123
  "Write a concise, information-dense caption for this image. Include names, numbers, and labels if visible. Output plain text only.",
@@ -124,7 +128,7 @@ export const unrag = defineUnragConfig({
124
128
  },
125
129
  audio: {
126
130
  transcription: {
127
- enabled: false,
131
+ enabled: false, // __UNRAG_FLAG_audio_transcription__
128
132
  model: "openai/whisper-1",
129
133
  timeoutMs: 120_000,
130
134
  maxBytes: 25 * 1024 * 1024,
@@ -132,13 +136,13 @@ export const unrag = defineUnragConfig({
132
136
  },
133
137
  video: {
134
138
  transcription: {
135
- enabled: false,
139
+ enabled: false, // __UNRAG_FLAG_video_transcription__
136
140
  model: "openai/whisper-1",
137
141
  timeoutMs: 120_000,
138
142
  maxBytes: 50 * 1024 * 1024,
139
143
  },
140
144
  frames: {
141
- enabled: false,
145
+ enabled: false, // __UNRAG_FLAG_video_frames__
142
146
  sampleFps: 0.2,
143
147
  maxFrames: 50,
144
148
  // ffmpegPath: "/usr/bin/ffmpeg",
@@ -151,10 +155,30 @@ export const unrag = defineUnragConfig({
151
155
  },
152
156
  },
153
157
  file: {
154
- text: { enabled: false, maxBytes: 5 * 1024 * 1024, maxOutputChars: 200_000, minChars: 50 },
155
- docx: { enabled: false, maxBytes: 15 * 1024 * 1024, maxOutputChars: 200_000, minChars: 50 },
156
- pptx: { enabled: false, maxBytes: 30 * 1024 * 1024, maxOutputChars: 200_000, minChars: 50 },
157
- xlsx: { enabled: false, maxBytes: 30 * 1024 * 1024, maxOutputChars: 200_000, minChars: 50 },
158
+ text: {
159
+ enabled: false, // __UNRAG_FLAG_file_text__
160
+ maxBytes: 5 * 1024 * 1024,
161
+ maxOutputChars: 200_000,
162
+ minChars: 50,
163
+ },
164
+ docx: {
165
+ enabled: false, // __UNRAG_FLAG_file_docx__
166
+ maxBytes: 15 * 1024 * 1024,
167
+ maxOutputChars: 200_000,
168
+ minChars: 50,
169
+ },
170
+ pptx: {
171
+ enabled: false, // __UNRAG_FLAG_file_pptx__
172
+ maxBytes: 30 * 1024 * 1024,
173
+ maxOutputChars: 200_000,
174
+ minChars: 50,
175
+ },
176
+ xlsx: {
177
+ enabled: false, // __UNRAG_FLAG_file_xlsx__
178
+ maxBytes: 30 * 1024 * 1024,
179
+ maxOutputChars: 200_000,
180
+ minChars: 50,
181
+ },
158
182
  },
159
183
  },
160
184
  },