unrag 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/cli/index.js +611 -174
  2. package/package.json +12 -6
  3. package/registry/config/unrag.config.ts +9 -8
  4. package/registry/connectors/google-drive/_api-types.ts +60 -0
  5. package/registry/connectors/google-drive/client.ts +99 -38
  6. package/registry/connectors/google-drive/sync.ts +97 -69
  7. package/registry/connectors/google-drive/types.ts +76 -37
  8. package/registry/connectors/notion/client.ts +12 -3
  9. package/registry/connectors/notion/render.ts +62 -23
  10. package/registry/connectors/notion/sync.ts +30 -23
  11. package/registry/core/assets.ts +11 -10
  12. package/registry/core/config.ts +10 -25
  13. package/registry/core/context-engine.ts +71 -2
  14. package/registry/core/deep-merge.ts +45 -0
  15. package/registry/core/ingest.ts +117 -44
  16. package/registry/core/types.ts +96 -2
  17. package/registry/docs/unrag.md +6 -1
  18. package/registry/embedding/_shared.ts +25 -0
  19. package/registry/embedding/ai.ts +8 -68
  20. package/registry/embedding/azure.ts +88 -0
  21. package/registry/embedding/bedrock.ts +88 -0
  22. package/registry/embedding/cohere.ts +88 -0
  23. package/registry/embedding/google.ts +102 -0
  24. package/registry/embedding/mistral.ts +71 -0
  25. package/registry/embedding/ollama.ts +90 -0
  26. package/registry/embedding/openai.ts +88 -0
  27. package/registry/embedding/openrouter.ts +127 -0
  28. package/registry/embedding/together.ts +77 -0
  29. package/registry/embedding/vertex.ts +111 -0
  30. package/registry/embedding/voyage.ts +169 -0
  31. package/registry/extractors/audio-transcribe/index.ts +39 -23
  32. package/registry/extractors/file-docx/index.ts +8 -1
  33. package/registry/extractors/file-pptx/index.ts +22 -1
  34. package/registry/extractors/file-xlsx/index.ts +24 -1
  35. package/registry/extractors/image-caption-llm/index.ts +8 -3
  36. package/registry/extractors/image-ocr/index.ts +9 -4
  37. package/registry/extractors/pdf-llm/index.ts +9 -4
  38. package/registry/extractors/pdf-text-layer/index.ts +23 -2
  39. package/registry/extractors/video-frames/index.ts +8 -3
  40. package/registry/extractors/video-transcribe/index.ts +40 -24
  41. package/registry/manifest.json +346 -0
  42. package/registry/store/drizzle-postgres-pgvector/store.ts +26 -6
package/dist/cli/index.js CHANGED
@@ -13,7 +13,7 @@ import {
13
13
  select,
14
14
  text
15
15
  } from "@clack/prompts";
16
- import path5 from "node:path";
16
+ import path6 from "node:path";
17
17
  import { fileURLToPath } from "node:url";
18
18
 
19
19
  // cli/lib/registry.ts
@@ -108,10 +108,31 @@ var EXTRACTOR_FLAG_KEYS = {
108
108
  "file-xlsx": ["file_xlsx"]
109
109
  };
110
110
  var ALL_FLAG_KEYS = Array.from(new Set(Object.values(EXTRACTOR_FLAG_KEYS).flat())).sort();
111
+ var indentBlock = (text, spaces) => {
112
+ const pad = " ".repeat(spaces);
113
+ return text.split(`
114
+ `).map((l) => l ? pad + l : l).join(`
115
+ `);
116
+ };
117
+ var replaceBetweenMarkers = (content, startMarker, endMarker, replacement) => {
118
+ const startIdx = content.indexOf(startMarker);
119
+ const endIdx = content.indexOf(endMarker);
120
+ if (startIdx < 0 || endIdx < 0 || endIdx < startIdx)
121
+ return content;
122
+ const startLineStart = content.lastIndexOf(`
123
+ `, startIdx);
124
+ const start = startLineStart < 0 ? 0 : startLineStart + 1;
125
+ const endLineEnd = content.indexOf(`
126
+ `, endIdx);
127
+ const end = endLineEnd < 0 ? content.length : endLineEnd + 1;
128
+ return content.slice(0, start) + replacement + content.slice(end);
129
+ };
111
130
  var renderUnragConfig = (content, selection) => {
112
131
  const installImportBase = `./${selection.installDir.replace(/\\/g, "/")}`;
113
132
  const richMedia = selection.richMedia ?? { enabled: false, extractors: [] };
114
133
  const selectedExtractors = Array.from(new Set(richMedia.extractors ?? [])).sort();
134
+ const preset = selection.presetConfig;
135
+ const embeddingProvider = selection.embeddingProvider ?? (typeof preset?.embedding?.provider === "string" ? preset.embedding.provider : undefined) ?? "ai";
115
136
  const baseImports = [
116
137
  `import { defineUnragConfig } from "${installImportBase}/core";`
117
138
  ];
@@ -145,17 +166,109 @@ var renderUnragConfig = (content, selection) => {
145
166
  ].join(`
146
167
  `);
147
168
  let out = content.replace("// __UNRAG_IMPORTS__", importsBlock).replace("// __UNRAG_CREATE_ENGINE__", createEngineBlock);
148
- out = out.replace('type: "text", // __UNRAG_EMBEDDING_TYPE__', richMedia.enabled ? 'type: "multimodal",' : 'type: "text",').replace('model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__', richMedia.enabled ? 'model: "cohere/embed-v4.0",' : 'model: "openai/text-embedding-3-small",');
149
- const enabledFlagKeys = new Set;
150
- if (richMedia.enabled) {
151
- for (const ex of selectedExtractors) {
152
- for (const k of EXTRACTOR_FLAG_KEYS[ex] ?? []) {
153
- enabledFlagKeys.add(k);
154
- }
169
+ const presetChunkSize = preset?.defaults?.chunking?.chunkSize;
170
+ const presetChunkOverlap = preset?.defaults?.chunking?.chunkOverlap;
171
+ const presetTopK = preset?.defaults?.retrieval?.topK;
172
+ if (typeof presetChunkSize === "number") {
173
+ out = out.replace("chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__", `chunkSize: ${presetChunkSize},`);
174
+ } else {
175
+ out = out.replace("chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__", "chunkSize: 200,");
176
+ }
177
+ if (typeof presetChunkOverlap === "number") {
178
+ out = out.replace("chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__", `chunkOverlap: ${presetChunkOverlap},`);
179
+ } else {
180
+ out = out.replace("chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__", "chunkOverlap: 40,");
181
+ }
182
+ if (typeof presetTopK === "number") {
183
+ out = out.replace("topK: 8, // __UNRAG_DEFAULT_topK__", `topK: ${presetTopK},`);
184
+ } else {
185
+ out = out.replace("topK: 8, // __UNRAG_DEFAULT_topK__", "topK: 8,");
186
+ }
187
+ const presetEmbeddingType = preset?.embedding?.config?.type;
188
+ const presetEmbeddingModel = preset?.embedding?.config?.model;
189
+ const presetEmbeddingTimeoutMs = preset?.embedding?.config?.timeoutMs;
190
+ const providerLine = ` provider: "${embeddingProvider}",`;
191
+ out = out.replace(/^\s*provider:\s*".*?",\s*$/m, providerLine);
192
+ const defaultModelByProvider = {
193
+ ai: "openai/text-embedding-3-small",
194
+ openai: "text-embedding-3-small",
195
+ google: "gemini-embedding-001",
196
+ openrouter: "text-embedding-3-small",
197
+ azure: "text-embedding-3-small",
198
+ vertex: "text-embedding-004",
199
+ bedrock: "amazon.titan-embed-text-v2:0",
200
+ cohere: "embed-english-v3.0",
201
+ mistral: "mistral-embed",
202
+ together: "togethercomputer/m2-bert-80M-2k-retrieval",
203
+ ollama: "nomic-embed-text",
204
+ voyage: "voyage-3.5-lite",
205
+ custom: "openai/text-embedding-3-small"
206
+ };
207
+ const resolvedEmbeddingModel = (() => {
208
+ if (typeof presetEmbeddingModel === "string" && presetEmbeddingModel.trim().length > 0) {
209
+ return presetEmbeddingModel.trim();
210
+ }
211
+ if (embeddingProvider === "ai" && presetEmbeddingType === "multimodal") {
212
+ return "cohere/embed-v4.0";
213
+ }
214
+ return defaultModelByProvider[embeddingProvider] ?? "openai/text-embedding-3-small";
215
+ })();
216
+ const normalizeModelForProvider = (model) => {
217
+ if (embeddingProvider === "ai")
218
+ return model;
219
+ const prefix = `${embeddingProvider}/`;
220
+ return model.startsWith(prefix) ? model.slice(prefix.length) : model;
221
+ };
222
+ const nextModel = normalizeModelForProvider(resolvedEmbeddingModel);
223
+ out = out.replace('model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__', `model: ${JSON.stringify(nextModel)},`);
224
+ if (presetEmbeddingType === "multimodal") {
225
+ if (!out.includes('type: "multimodal"') && !out.includes('type: "text"')) {
226
+ out = out.replace(`config: {
227
+ model:`, `config: {
228
+ type: "multimodal",
229
+ model:`);
230
+ } else {
231
+ out = out.replace(/^\s*type:\s*".*?",\s*$/m, ` type: "multimodal",`);
155
232
  }
156
233
  }
157
- for (const k of ALL_FLAG_KEYS) {
158
- out = out.replace(`enabled: false, // __UNRAG_FLAG_${k}__`, `enabled: ${enabledFlagKeys.has(k) ? "true" : "false"},`);
234
+ if (typeof presetEmbeddingTimeoutMs === "number") {
235
+ out = out.replace("timeoutMs: 15_000, // __UNRAG_EMBEDDING_TIMEOUT__", `timeoutMs: ${presetEmbeddingTimeoutMs},`);
236
+ } else {
237
+ out = out.replace("timeoutMs: 15_000, // __UNRAG_EMBEDDING_TIMEOUT__", "timeoutMs: 15_000,");
238
+ }
239
+ const presetStoreChunkContent = preset?.engine?.storage?.storeChunkContent;
240
+ const presetStoreDocumentContent = preset?.engine?.storage?.storeDocumentContent;
241
+ if (typeof presetStoreChunkContent === "boolean") {
242
+ out = out.replace("storeChunkContent: true, // __UNRAG_STORAGE_storeChunkContent__", `storeChunkContent: ${presetStoreChunkContent},`);
243
+ } else {
244
+ out = out.replace("storeChunkContent: true, // __UNRAG_STORAGE_storeChunkContent__", "storeChunkContent: true,");
245
+ }
246
+ if (typeof presetStoreDocumentContent === "boolean") {
247
+ out = out.replace("storeDocumentContent: true, // __UNRAG_STORAGE_storeDocumentContent__", `storeDocumentContent: ${presetStoreDocumentContent},`);
248
+ } else {
249
+ out = out.replace("storeDocumentContent: true, // __UNRAG_STORAGE_storeDocumentContent__", "storeDocumentContent: true,");
250
+ }
251
+ const assetProcessingOverride = preset?.engine?.assetProcessing;
252
+ if (assetProcessingOverride && typeof assetProcessingOverride === "object") {
253
+ const json = JSON.stringify(assetProcessingOverride, null, 2);
254
+ const block = ` assetProcessing: ${indentBlock(json, 2).trimStart()},
255
+ `;
256
+ out = replaceBetweenMarkers(out, "__UNRAG_ASSET_PROCESSING_BLOCK_START__", "__UNRAG_ASSET_PROCESSING_BLOCK_END__", block);
257
+ } else {
258
+ out = out.replace("// __UNRAG_ASSET_PROCESSING_BLOCK_START__", "").replace("// __UNRAG_ASSET_PROCESSING_BLOCK_END__", "");
259
+ }
260
+ if (!(assetProcessingOverride && typeof assetProcessingOverride === "object")) {
261
+ const enabledFlagKeys = new Set;
262
+ if (richMedia.enabled) {
263
+ for (const ex of selectedExtractors) {
264
+ for (const k of EXTRACTOR_FLAG_KEYS[ex] ?? []) {
265
+ enabledFlagKeys.add(k);
266
+ }
267
+ }
268
+ }
269
+ for (const k of ALL_FLAG_KEYS) {
270
+ out = out.replace(`enabled: false, // __UNRAG_FLAG_${k}__`, `enabled: ${enabledFlagKeys.has(k) ? "true" : "false"},`);
271
+ }
159
272
  }
160
273
  const extractorLines = richMedia.enabled && selectedExtractors.length > 0 ? selectedExtractors.map((ex) => ` ${EXTRACTOR_FACTORY[ex]}(),`).join(`
161
274
  `) : "";
@@ -164,6 +277,7 @@ var renderUnragConfig = (content, selection) => {
164
277
  };
165
278
  var renderDocs = (content, selection) => {
166
279
  const notes = [];
280
+ const embeddingProvider = selection.embeddingProvider ?? "ai";
167
281
  if (selection.storeAdapter === "drizzle") {
168
282
  notes.push("## Store adapter: Drizzle", "", "You can import the generated Drizzle schema module into your app’s main Drizzle schema to avoid duplicating table definitions.", "", "Example pattern:", "```ts", `import * as rag from "./${selection.installDir}/store/drizzle/schema";`, "", "export const schema = {", " ...rag.schema,", " // ...your app tables", "};", "```", "", "Then run Drizzle migrations from your app as usual.");
169
283
  } else if (selection.storeAdapter === "prisma") {
@@ -171,7 +285,42 @@ var renderDocs = (content, selection) => {
171
285
  } else {
172
286
  notes.push("## Store adapter: Raw SQL", "", "This adapter uses a `pg` Pool and parameterized SQL queries against the tables described above.", "It’s the most portable option when you don’t want ORM coupling.");
173
287
  }
174
- const withNotes = content.replace("<!-- __UNRAG_ADAPTER_NOTES__ -->", notes.join(`
288
+ const envLines = [
289
+ "## Environment variables",
290
+ "",
291
+ "Add these to your environment:",
292
+ "- `DATABASE_URL` (Postgres connection string)"
293
+ ];
294
+ if (embeddingProvider === "ai") {
295
+ envLines.push("- `AI_GATEWAY_API_KEY` (required by the AI SDK when using Vercel AI Gateway)", "- Optional: `AI_GATEWAY_MODEL` (defaults to `openai/text-embedding-3-small`)");
296
+ } else if (embeddingProvider === "openai") {
297
+ envLines.push("- `OPENAI_API_KEY`", "- Optional: `OPENAI_EMBEDDING_MODEL` (defaults to `text-embedding-3-small`)");
298
+ } else if (embeddingProvider === "google") {
299
+ envLines.push("- `GOOGLE_GENERATIVE_AI_API_KEY`", "- Optional: `GOOGLE_GENERATIVE_AI_EMBEDDING_MODEL` (defaults to `gemini-embedding-001`)");
300
+ } else if (embeddingProvider === "openrouter") {
301
+ envLines.push("- `OPENROUTER_API_KEY`", "- Optional: `OPENROUTER_EMBEDDING_MODEL` (defaults to `text-embedding-3-small`)");
302
+ } else if (embeddingProvider === "cohere") {
303
+ envLines.push("- `COHERE_API_KEY`", "- Optional: `COHERE_EMBEDDING_MODEL` (defaults to `embed-english-v3.0`)");
304
+ } else if (embeddingProvider === "mistral") {
305
+ envLines.push("- `MISTRAL_API_KEY`", "- Optional: `MISTRAL_EMBEDDING_MODEL` (defaults to `mistral-embed`)");
306
+ } else if (embeddingProvider === "together") {
307
+ envLines.push("- `TOGETHER_AI_API_KEY`", "- Optional: `TOGETHER_AI_EMBEDDING_MODEL` (defaults to `togethercomputer/m2-bert-80M-2k-retrieval`)");
308
+ } else if (embeddingProvider === "voyage") {
309
+ envLines.push("- `VOYAGE_API_KEY`", "- Optional: `VOYAGE_MODEL` (defaults to `voyage-3.5-lite`)");
310
+ } else if (embeddingProvider === "ollama") {
311
+ envLines.push("- Optional: `OLLAMA_EMBEDDING_MODEL` (defaults to `nomic-embed-text`)");
312
+ } else if (embeddingProvider === "azure") {
313
+ envLines.push("- `AZURE_OPENAI_API_KEY`", "- `AZURE_RESOURCE_NAME`", "- Optional: `AZURE_EMBEDDING_MODEL` (defaults to `text-embedding-3-small`)");
314
+ } else if (embeddingProvider === "vertex") {
315
+ envLines.push("- `GOOGLE_APPLICATION_CREDENTIALS` (when running outside GCP)", "- Optional: `GOOGLE_VERTEX_EMBEDDING_MODEL` (defaults to `text-embedding-004`)");
316
+ } else if (embeddingProvider === "bedrock") {
317
+ envLines.push("- `AWS_REGION`", "- AWS credentials (`AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`) when running outside AWS", "- Optional: `BEDROCK_EMBEDDING_MODEL` (defaults to `amazon.titan-embed-text-v2:0`)");
318
+ }
319
+ const withEnv = content.replace(/## Environment variables[\s\S]*?## Database requirements/, `${envLines.join(`
320
+ `)}
321
+
322
+ ## Database requirements`);
323
+ const withNotes = withEnv.replace("<!-- __UNRAG_ADAPTER_NOTES__ -->", notes.join(`
175
324
  `));
176
325
  return withNotes.replaceAll("@unrag/config", `${selection.aliasBase}/config`).replaceAll("`@unrag/*`", `\`${selection.aliasBase}/*\``);
177
326
  };
@@ -225,9 +374,57 @@ async function copyRegistryFiles(selection) {
225
374
  src: path2.join(selection.registryRoot, "core/retrieve.ts"),
226
375
  dest: path2.join(installBaseAbs, "core/retrieve.ts")
227
376
  },
377
+ {
378
+ src: path2.join(selection.registryRoot, "embedding/_shared.ts"),
379
+ dest: path2.join(installBaseAbs, "embedding/_shared.ts")
380
+ },
228
381
  {
229
382
  src: path2.join(selection.registryRoot, "embedding/ai.ts"),
230
383
  dest: path2.join(installBaseAbs, "embedding/ai.ts")
384
+ },
385
+ {
386
+ src: path2.join(selection.registryRoot, "embedding/openai.ts"),
387
+ dest: path2.join(installBaseAbs, "embedding/openai.ts")
388
+ },
389
+ {
390
+ src: path2.join(selection.registryRoot, "embedding/google.ts"),
391
+ dest: path2.join(installBaseAbs, "embedding/google.ts")
392
+ },
393
+ {
394
+ src: path2.join(selection.registryRoot, "embedding/openrouter.ts"),
395
+ dest: path2.join(installBaseAbs, "embedding/openrouter.ts")
396
+ },
397
+ {
398
+ src: path2.join(selection.registryRoot, "embedding/azure.ts"),
399
+ dest: path2.join(installBaseAbs, "embedding/azure.ts")
400
+ },
401
+ {
402
+ src: path2.join(selection.registryRoot, "embedding/vertex.ts"),
403
+ dest: path2.join(installBaseAbs, "embedding/vertex.ts")
404
+ },
405
+ {
406
+ src: path2.join(selection.registryRoot, "embedding/bedrock.ts"),
407
+ dest: path2.join(installBaseAbs, "embedding/bedrock.ts")
408
+ },
409
+ {
410
+ src: path2.join(selection.registryRoot, "embedding/cohere.ts"),
411
+ dest: path2.join(installBaseAbs, "embedding/cohere.ts")
412
+ },
413
+ {
414
+ src: path2.join(selection.registryRoot, "embedding/mistral.ts"),
415
+ dest: path2.join(installBaseAbs, "embedding/mistral.ts")
416
+ },
417
+ {
418
+ src: path2.join(selection.registryRoot, "embedding/together.ts"),
419
+ dest: path2.join(installBaseAbs, "embedding/together.ts")
420
+ },
421
+ {
422
+ src: path2.join(selection.registryRoot, "embedding/ollama.ts"),
423
+ dest: path2.join(installBaseAbs, "embedding/ollama.ts")
424
+ },
425
+ {
426
+ src: path2.join(selection.registryRoot, "embedding/voyage.ts"),
427
+ dest: path2.join(installBaseAbs, "embedding/voyage.ts")
231
428
  }
232
429
  ];
233
430
  if (selection.storeAdapter === "drizzle") {
@@ -258,21 +455,27 @@ async function copyRegistryFiles(selection) {
258
455
  dest: path2.join(installBaseAbs, "store/prisma/store.ts")
259
456
  });
260
457
  }
458
+ const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
459
+ const overwritePolicy = selection.overwrite ?? "skip";
261
460
  for (const mapping of fileMappings) {
262
461
  if (!await exists(mapping.src)) {
263
462
  throw new Error(`Registry file missing: ${mapping.src}`);
264
463
  }
265
464
  if (await exists(mapping.dest)) {
266
- const answer = await confirm({
267
- message: `Overwrite ${path2.relative(selection.projectRoot, mapping.dest)}?`,
268
- initialValue: false
269
- });
270
- if (isCancel(answer)) {
271
- cancel("Cancelled.");
272
- return;
273
- }
274
- if (!answer) {
465
+ if (overwritePolicy === "force") {} else if (nonInteractive) {
275
466
  continue;
467
+ } else {
468
+ const answer = await confirm({
469
+ message: `Overwrite ${path2.relative(selection.projectRoot, mapping.dest)}?`,
470
+ initialValue: false
471
+ });
472
+ if (isCancel(answer)) {
473
+ cancel("Cancelled.");
474
+ return;
475
+ }
476
+ if (!answer) {
477
+ continue;
478
+ }
276
479
  }
277
480
  }
278
481
  const raw = await readText(mapping.src);
@@ -290,6 +493,7 @@ async function copyConnectorFiles(selection) {
290
493
  const files = await listFilesRecursive(connectorRegistryAbs);
291
494
  const destRootAbs = path2.join(installBaseAbs, "connectors", selection.connector);
292
495
  const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
496
+ const overwritePolicy = selection.overwrite ?? "skip";
293
497
  for (const src of files) {
294
498
  if (!await exists(src)) {
295
499
  throw new Error(`Registry file missing: ${src}`);
@@ -297,7 +501,7 @@ async function copyConnectorFiles(selection) {
297
501
  const rel = path2.relative(connectorRegistryAbs, src);
298
502
  const dest = path2.join(destRootAbs, rel);
299
503
  if (await exists(dest)) {
300
- if (nonInteractive) {
504
+ if (overwritePolicy === "force") {} else if (nonInteractive) {
301
505
  continue;
302
506
  }
303
507
  const answer = await confirm({
@@ -329,9 +533,12 @@ async function copyExtractorFiles(selection) {
329
533
  const destRootAbs = path2.join(installBaseAbs, "extractors", selection.extractor);
330
534
  const sharedDestRootAbs = path2.join(installBaseAbs, "extractors", "_shared");
331
535
  const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
536
+ const overwritePolicy = selection.overwrite ?? "skip";
332
537
  const shouldWrite = async (src, dest) => {
333
538
  if (!await exists(dest))
334
539
  return true;
540
+ if (overwritePolicy === "force")
541
+ return true;
335
542
  if (nonInteractive)
336
543
  return false;
337
544
  try {
@@ -388,26 +595,103 @@ async function writeJsonFile(filePath, data) {
388
595
  `, "utf8");
389
596
  }
390
597
 
391
- // cli/lib/packageJson.ts
598
+ // cli/lib/manifest.ts
392
599
  import path3 from "node:path";
393
- import { readFile as readFile3, writeFile as writeFile3 } from "node:fs/promises";
600
+ import { readFile as readFile3 } from "node:fs/promises";
601
+ async function readRegistryManifest(registryRoot) {
602
+ const abs = path3.join(registryRoot, "manifest.json");
603
+ const raw = await readFile3(abs, "utf8");
604
+ const parsed = JSON.parse(raw);
605
+ if (!parsed || parsed.version !== 1) {
606
+ throw new Error(`Unsupported registry manifest version in ${abs}`);
607
+ }
608
+ if (!Array.isArray(parsed.extractors) || !Array.isArray(parsed.connectors)) {
609
+ throw new Error(`Invalid registry manifest shape in ${abs}`);
610
+ }
611
+ return parsed;
612
+ }
613
+
614
+ // cli/lib/constants.ts
615
+ var UNRAG_SITE_URL = (process.env.UNRAG_SITE_URL ?? process.env.UNRAG_DOCS_BASE_URL)?.trim() || "https://unrag.dev";
616
+ var UNRAG_GITHUB_REPO_URL = "https://github.com/BetterStacks/unrag";
617
+ function docsUrl(siteRelativePath) {
618
+ const p = siteRelativePath.startsWith("/") ? siteRelativePath : `/${siteRelativePath}`;
619
+ const base = UNRAG_SITE_URL.endsWith("/") ? UNRAG_SITE_URL : `${UNRAG_SITE_URL}/`;
620
+ return new URL(p.replace(/^\/+/, "/"), base).toString();
621
+ }
622
+
623
+ // cli/lib/preset.ts
624
+ function isPresetPayloadV1(x) {
625
+ if (!x || typeof x !== "object")
626
+ return false;
627
+ const o = x;
628
+ if (o.version !== 1)
629
+ return false;
630
+ if (!o.install || typeof o.install !== "object")
631
+ return false;
632
+ if (!o.modules || typeof o.modules !== "object")
633
+ return false;
634
+ if (typeof o.install.installDir !== "string")
635
+ return false;
636
+ if (!["drizzle", "prisma", "raw-sql"].includes(String(o.install.storeAdapter)))
637
+ return false;
638
+ if (typeof o.install.aliasBase !== "string")
639
+ return false;
640
+ if (!Array.isArray(o.modules.extractors) || !Array.isArray(o.modules.connectors))
641
+ return false;
642
+ return true;
643
+ }
644
+ function toPresetUrl(input) {
645
+ const s = String(input ?? "").trim();
646
+ if (!s) {
647
+ throw new Error("Missing preset id/url");
648
+ }
649
+ if (s.startsWith("http://") || s.startsWith("https://"))
650
+ return s;
651
+ return docsUrl(`/api/presets/${encodeURIComponent(s)}`);
652
+ }
653
+ async function fetchPreset(input) {
654
+ const url = toPresetUrl(input);
655
+ if (typeof fetch !== "function") {
656
+ throw new Error(`Global fetch() is unavailable in this runtime; cannot fetch preset from ${url}. Set UNRAG_SITE_URL="${UNRAG_SITE_URL}" and use a newer Node runtime.`);
657
+ }
658
+ const res = await fetch(url, {
659
+ headers: {
660
+ "user-agent": "unrag-cli",
661
+ accept: "application/json"
662
+ }
663
+ });
664
+ if (!res.ok) {
665
+ const text = await res.text().catch(() => "");
666
+ throw new Error(`Failed to fetch preset (${res.status}) from ${url}${text ? `: ${text}` : ""}`);
667
+ }
668
+ const json = await res.json();
669
+ if (!isPresetPayloadV1(json)) {
670
+ throw new Error(`Invalid preset payload returned from ${url}`);
671
+ }
672
+ return json;
673
+ }
674
+
675
+ // cli/lib/packageJson.ts
676
+ import path4 from "node:path";
677
+ import { readFile as readFile4, writeFile as writeFile3 } from "node:fs/promises";
394
678
  async function detectPackageManager(projectRoot) {
395
- if (await exists(path3.join(projectRoot, "bun.lock")))
679
+ if (await exists(path4.join(projectRoot, "bun.lock")))
396
680
  return "bun";
397
- if (await exists(path3.join(projectRoot, "pnpm-lock.yaml")))
681
+ if (await exists(path4.join(projectRoot, "pnpm-lock.yaml")))
398
682
  return "pnpm";
399
- if (await exists(path3.join(projectRoot, "yarn.lock")))
683
+ if (await exists(path4.join(projectRoot, "yarn.lock")))
400
684
  return "yarn";
401
- if (await exists(path3.join(projectRoot, "package-lock.json")))
685
+ if (await exists(path4.join(projectRoot, "package-lock.json")))
402
686
  return "npm";
403
687
  return "npm";
404
688
  }
405
689
  async function readPackageJson(projectRoot) {
406
- const raw = await readFile3(path3.join(projectRoot, "package.json"), "utf8");
690
+ const raw = await readFile4(path4.join(projectRoot, "package.json"), "utf8");
407
691
  return JSON.parse(raw);
408
692
  }
409
693
  async function writePackageJson(projectRoot, pkg) {
410
- await writeFile3(path3.join(projectRoot, "package.json"), JSON.stringify(pkg, null, 2) + `
694
+ await writeFile3(path4.join(projectRoot, "package.json"), JSON.stringify(pkg, null, 2) + `
411
695
  `, "utf8");
412
696
  }
413
697
  function mergeDeps(pkg, deps, devDeps) {
@@ -431,7 +715,7 @@ function mergeDeps(pkg, deps, devDeps) {
431
715
  }
432
716
  function depsForAdapter(adapter) {
433
717
  const deps = {
434
- ai: "^5.0.113"
718
+ ai: "^6.0.3"
435
719
  };
436
720
  const devDeps = {};
437
721
  if (adapter === "drizzle") {
@@ -465,20 +749,20 @@ function depsForExtractor(extractor) {
465
749
  const deps = {};
466
750
  const devDeps = {};
467
751
  if (extractor === "pdf-llm") {
468
- deps["ai"] = "^5.0.113";
752
+ deps["ai"] = "^6.0.3";
469
753
  }
470
754
  if (extractor === "pdf-text-layer") {
471
755
  deps["pdfjs-dist"] = "^5.4.149";
472
756
  }
473
757
  if (extractor === "pdf-ocr") {}
474
758
  if (extractor === "image-ocr" || extractor === "image-caption-llm") {
475
- deps["ai"] = "^5.0.113";
759
+ deps["ai"] = "^6.0.3";
476
760
  }
477
761
  if (extractor === "audio-transcribe" || extractor === "video-transcribe") {
478
- deps["ai"] = "^5.0.113";
762
+ deps["ai"] = "^6.0.3";
479
763
  }
480
764
  if (extractor === "video-frames") {
481
- deps["ai"] = "^5.0.113";
765
+ deps["ai"] = "^6.0.3";
482
766
  }
483
767
  if (extractor === "file-text") {}
484
768
  if (extractor === "file-docx") {
@@ -492,6 +776,33 @@ function depsForExtractor(extractor) {
492
776
  }
493
777
  return { deps, devDeps };
494
778
  }
779
+ function depsForEmbeddingProvider(provider) {
780
+ const deps = {};
781
+ const devDeps = {};
782
+ if (provider === "openai")
783
+ deps["@ai-sdk/openai"] = "^3.0.1";
784
+ if (provider === "google")
785
+ deps["@ai-sdk/google"] = "^3.0.1";
786
+ if (provider === "azure")
787
+ deps["@ai-sdk/azure"] = "^3.0.1";
788
+ if (provider === "vertex")
789
+ deps["@ai-sdk/google-vertex"] = "^3.0.1";
790
+ if (provider === "bedrock")
791
+ deps["@ai-sdk/amazon-bedrock"] = "^3.0.72";
792
+ if (provider === "cohere")
793
+ deps["@ai-sdk/cohere"] = "^3.0.1";
794
+ if (provider === "mistral")
795
+ deps["@ai-sdk/mistral"] = "^3.0.1";
796
+ if (provider === "together")
797
+ deps["@ai-sdk/togetherai"] = "^3.0.1";
798
+ if (provider === "openrouter")
799
+ deps["@openrouter/sdk"] = "^0.3.10";
800
+ if (provider === "ollama")
801
+ deps["ollama-ai-provider-v2"] = "^2.0.0";
802
+ if (provider === "voyage")
803
+ deps["voyage-ai-provider"] = "^3.0.0";
804
+ return { deps, devDeps };
805
+ }
495
806
  function installCmd(pm) {
496
807
  if (pm === "bun")
497
808
  return "bun install";
@@ -503,8 +814,8 @@ function installCmd(pm) {
503
814
  }
504
815
 
505
816
  // cli/lib/tsconfig.ts
506
- import path4 from "node:path";
507
- import { readFile as readFile4, writeFile as writeFile4 } from "node:fs/promises";
817
+ import path5 from "node:path";
818
+ import { readFile as readFile5, writeFile as writeFile4 } from "node:fs/promises";
508
819
  import { parse } from "jsonc-parser";
509
820
  var parseJsoncLoose = (raw) => {
510
821
  const errors = [];
@@ -515,14 +826,14 @@ var parseJsoncLoose = (raw) => {
515
826
  return result;
516
827
  };
517
828
  async function patchTsconfigPaths(params) {
518
- const configFile = await exists(path4.join(params.projectRoot, "tsconfig.json")) ? "tsconfig.json" : await exists(path4.join(params.projectRoot, "jsconfig.json")) ? "jsconfig.json" : null;
829
+ const configFile = await exists(path5.join(params.projectRoot, "tsconfig.json")) ? "tsconfig.json" : await exists(path5.join(params.projectRoot, "jsconfig.json")) ? "jsconfig.json" : null;
519
830
  const aliasBase = params.aliasBase;
520
831
  const aliasKey = `${aliasBase}/*`;
521
832
  const target = [`./${params.installDir.replace(/\\/g, "/")}/*`];
522
833
  const configAliasKey = `${aliasBase}/config`;
523
834
  const configTarget = ["./unrag.config.ts"];
524
835
  if (!configFile) {
525
- const abs2 = path4.join(params.projectRoot, "tsconfig.json");
836
+ const abs2 = path5.join(params.projectRoot, "tsconfig.json");
526
837
  const next2 = {
527
838
  compilerOptions: {
528
839
  baseUrl: ".",
@@ -536,8 +847,8 @@ async function patchTsconfigPaths(params) {
536
847
  `, "utf8");
537
848
  return { changed: true, file: "tsconfig.json" };
538
849
  }
539
- const abs = path4.join(params.projectRoot, configFile);
540
- const raw = await readFile4(abs, "utf8");
850
+ const abs = path5.join(params.projectRoot, configFile);
851
+ const raw = await readFile5(abs, "utf8");
541
852
  let parsed;
542
853
  try {
543
854
  parsed = parseJsoncLoose(raw);
@@ -572,7 +883,7 @@ async function patchTsconfigPaths(params) {
572
883
  var CONFIG_FILE = "unrag.json";
573
884
  var CONFIG_VERSION = 1;
574
885
  var __filename2 = fileURLToPath(import.meta.url);
575
- var __dirname2 = path5.dirname(__filename2);
886
+ var __dirname2 = path6.dirname(__filename2);
576
887
  var parseInitArgs = (args) => {
577
888
  const out = {};
578
889
  for (let i = 0;i < args.length; i++) {
@@ -621,77 +932,35 @@ var parseInitArgs = (args) => {
621
932
  }
622
933
  continue;
623
934
  }
935
+ if (a === "--provider") {
936
+ const v = args[i + 1];
937
+ if (v === "ai" || v === "openai" || v === "google" || v === "openrouter" || v === "azure" || v === "vertex" || v === "bedrock" || v === "cohere" || v === "mistral" || v === "together" || v === "ollama" || v === "voyage") {
938
+ out.provider = v;
939
+ i++;
940
+ }
941
+ continue;
942
+ }
943
+ if (a === "--preset") {
944
+ const v = args[i + 1];
945
+ if (v) {
946
+ out.preset = v;
947
+ i++;
948
+ }
949
+ continue;
950
+ }
951
+ if (a === "--overwrite") {
952
+ const v = args[i + 1];
953
+ if (v === "skip" || v === "force") {
954
+ out.overwrite = v;
955
+ i++;
956
+ }
957
+ continue;
958
+ }
624
959
  }
625
960
  return out;
626
961
  };
627
- var DEFAULT_RICH_MEDIA_EXTRACTORS = ["pdf-text-layer", "file-text"];
628
- var EXTRACTOR_OPTIONS = [
629
- {
630
- group: "PDF",
631
- value: "pdf-text-layer",
632
- label: `pdf-text-layer (Fast/cheap extraction via PDF text layer)`,
633
- hint: "recommended"
634
- },
635
- {
636
- group: "PDF",
637
- value: "pdf-llm",
638
- label: `pdf-llm (LLM-based PDF extraction; higher cost)`
639
- },
640
- {
641
- group: "PDF",
642
- value: "pdf-ocr",
643
- label: `pdf-ocr (OCR scanned PDFs; requires native binaries)`,
644
- hint: "worker-only"
645
- },
646
- {
647
- group: "Image",
648
- value: "image-ocr",
649
- label: `image-ocr (Extract text from images via vision LLM)`
650
- },
651
- {
652
- group: "Image",
653
- value: "image-caption-llm",
654
- label: `image-caption-llm (Generate captions for images via vision LLM)`
655
- },
656
- {
657
- group: "Audio",
658
- value: "audio-transcribe",
659
- label: `audio-transcribe (Speech-to-text transcription)`
660
- },
661
- {
662
- group: "Video",
663
- value: "video-transcribe",
664
- label: `video-transcribe (Transcribe video audio track)`
665
- },
666
- {
667
- group: "Video",
668
- value: "video-frames",
669
- label: `video-frames (Sample frames + analyze via vision LLM; requires ffmpeg)`,
670
- hint: "worker-only"
671
- },
672
- {
673
- group: "Files",
674
- value: "file-text",
675
- label: `file-text (Extract text/markdown/json/html from common text files)`,
676
- hint: "recommended"
677
- },
678
- {
679
- group: "Files",
680
- value: "file-docx",
681
- label: `file-docx (Extract text from .docx files)`
682
- },
683
- {
684
- group: "Files",
685
- value: "file-pptx",
686
- label: `file-pptx (Extract text from .pptx slides)`
687
- },
688
- {
689
- group: "Files",
690
- value: "file-xlsx",
691
- label: `file-xlsx (Extract tables from .xlsx spreadsheets)`
692
- }
693
- ];
694
- var AVAILABLE_EXTRACTORS = new Set(EXTRACTOR_OPTIONS.map((o) => o.value));
962
+ var toExtractors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
963
+ var toConnectors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
695
964
  async function initCommand(args) {
696
965
  const root = await tryFindProjectRoot(process.cwd());
697
966
  if (!root) {
@@ -701,15 +970,42 @@ async function initCommand(args) {
701
970
  if (!cliPackageRoot) {
702
971
  throw new Error("Could not locate CLI package root (package.json not found).");
703
972
  }
704
- const registryRoot = path5.join(cliPackageRoot, "registry");
705
- const existing = await readJsonFile(path5.join(root, CONFIG_FILE));
973
+ const registryRoot = path6.join(cliPackageRoot, "registry");
974
+ const manifest = await readRegistryManifest(registryRoot);
975
+ const extractorOptions = manifest.extractors.map((ex) => {
976
+ const value = ex.id;
977
+ const label = ex.description ? `${ex.label} (${ex.description})` : ex.label;
978
+ return {
979
+ group: ex.group,
980
+ value,
981
+ label,
982
+ hint: ex.hint,
983
+ defaultSelected: Boolean(ex.defaultSelected)
984
+ };
985
+ });
986
+ const availableExtractors = new Set(extractorOptions.map((o) => o.value));
987
+ const defaultRichMediaExtractors = extractorOptions.filter((o) => o.defaultSelected).map((o) => o.value).sort();
988
+ const existing = await readJsonFile(path6.join(root, CONFIG_FILE));
706
989
  const parsed = parseInitArgs(args);
990
+ const preset = parsed.preset ? await fetchPreset(parsed.preset) : null;
991
+ if (preset) {
992
+ const hasOtherChoices = Boolean(parsed.installDir) || Boolean(parsed.storeAdapter) || Boolean(parsed.aliasBase) || typeof parsed.richMedia === "boolean" || (parsed.extractors ?? []).length > 0;
993
+ if (hasOtherChoices) {
994
+ throw new Error('When using "--preset", do not pass other init preference flags (--store/--dir/--alias/--rich-media/--extractors).');
995
+ }
996
+ }
997
+ const presetEmbeddingProvider = (() => {
998
+ const v = preset?.config?.embedding?.provider;
999
+ return v === "ai" || v === "openai" || v === "google" || v === "openrouter" || v === "azure" || v === "vertex" || v === "bedrock" || v === "cohere" || v === "mistral" || v === "together" || v === "ollama" || v === "voyage" || v === "custom" ? v : undefined;
1000
+ })();
707
1001
  const defaults = {
708
- installDir: existing?.installDir ?? "lib/unrag",
709
- storeAdapter: existing?.storeAdapter ?? "drizzle",
710
- aliasBase: existing?.aliasBase ?? "@unrag"
1002
+ installDir: preset?.install?.installDir ?? existing?.installDir ?? "lib/unrag",
1003
+ storeAdapter: preset?.install?.storeAdapter ?? existing?.storeAdapter ?? "drizzle",
1004
+ aliasBase: preset?.install?.aliasBase ?? existing?.aliasBase ?? "@unrag",
1005
+ embeddingProvider: parsed.provider ?? presetEmbeddingProvider ?? existing?.embeddingProvider ?? "ai"
711
1006
  };
712
- const nonInteractive = parsed.yes || !process.stdin.isTTY;
1007
+ const nonInteractive = Boolean(parsed.yes) || Boolean(preset) || !process.stdin.isTTY;
1008
+ const overwritePolicy = parsed.overwrite ?? "skip";
713
1009
  const installDirAnswer = parsed.installDir ? parsed.installDir : nonInteractive ? defaults.installDir : await text({
714
1010
  message: "Install directory",
715
1011
  initialValue: defaults.installDir,
@@ -760,12 +1056,41 @@ async function initCommand(args) {
760
1056
  return;
761
1057
  }
762
1058
  const aliasBase = String(aliasAnswer).trim();
1059
+ const embeddingProviderAnswer = parsed.provider ? parsed.provider : nonInteractive ? defaults.embeddingProvider : await select({
1060
+ message: "Embedding provider",
1061
+ initialValue: defaults.embeddingProvider,
1062
+ options: [
1063
+ { value: "ai", label: "Vercel AI Gateway (AI SDK)", hint: "default" },
1064
+ { value: "openai", label: "OpenAI" },
1065
+ { value: "google", label: "Google AI (Gemini)" },
1066
+ { value: "openrouter", label: "OpenRouter" },
1067
+ { value: "azure", label: "Azure OpenAI" },
1068
+ { value: "vertex", label: "Google Vertex AI" },
1069
+ { value: "bedrock", label: "AWS Bedrock" },
1070
+ { value: "cohere", label: "Cohere" },
1071
+ { value: "mistral", label: "Mistral" },
1072
+ { value: "together", label: "Together.ai" },
1073
+ { value: "ollama", label: "Ollama (local)" },
1074
+ { value: "voyage", label: "Voyage AI" }
1075
+ ]
1076
+ });
1077
+ if (isCancel2(embeddingProviderAnswer)) {
1078
+ cancel2("Cancelled.");
1079
+ return;
1080
+ }
1081
+ const embeddingProvider = embeddingProviderAnswer;
763
1082
  if (parsed.richMedia === false && (parsed.extractors ?? []).length > 0) {
764
1083
  throw new Error('Cannot use "--no-rich-media" together with "--extractors".');
765
1084
  }
766
- const extractorsFromArgs = (parsed.extractors ?? []).filter((x) => AVAILABLE_EXTRACTORS.has(x)).sort();
1085
+ const extractorsFromArgs = (preset ? toExtractors(preset.modules?.extractors) : parsed.extractors ?? []).filter((x) => availableExtractors.has(x)).sort();
1086
+ if (preset) {
1087
+ const unknown = toExtractors(preset.modules?.extractors).filter((x) => !availableExtractors.has(x));
1088
+ if (unknown.length > 0) {
1089
+ throw new Error(`Preset contains unknown extractors: ${unknown.join(", ")}`);
1090
+ }
1091
+ }
767
1092
  const richMediaAnswer = extractorsFromArgs.length > 0 ? true : typeof parsed.richMedia === "boolean" ? parsed.richMedia : nonInteractive ? false : await confirm2({
768
- message: "Enable rich media ingestion (PDF/images/audio/video/files)? This also enables multimodal image embeddings (you can change this later).",
1093
+ message: "Enable rich media ingestion (PDF/images/audio/video/files)? This enables extractor modules and assetProcessing (you can change this later).",
769
1094
  initialValue: false
770
1095
  });
771
1096
  if (isCancel2(richMediaAnswer)) {
@@ -773,9 +1098,9 @@ async function initCommand(args) {
773
1098
  return;
774
1099
  }
775
1100
  const richMediaEnabled = Boolean(richMediaAnswer);
776
- const selectedExtractorsAnswer = richMediaEnabled || extractorsFromArgs.length > 0 ? nonInteractive ? extractorsFromArgs.length > 0 ? extractorsFromArgs : DEFAULT_RICH_MEDIA_EXTRACTORS : await groupMultiselect({
1101
+ const selectedExtractorsAnswer = richMediaEnabled || extractorsFromArgs.length > 0 ? nonInteractive ? extractorsFromArgs.length > 0 ? extractorsFromArgs : defaultRichMediaExtractors.length > 0 ? defaultRichMediaExtractors : ["pdf-text-layer", "file-text"] : await groupMultiselect({
777
1102
  message: "Select extractors to enable (space to toggle, enter to confirm)",
778
- options: EXTRACTOR_OPTIONS.reduce((acc, opt) => {
1103
+ options: extractorOptions.reduce((acc, opt) => {
779
1104
  acc[opt.group] ??= [];
780
1105
  acc[opt.group].push({
781
1106
  value: opt.value,
@@ -784,7 +1109,7 @@ async function initCommand(args) {
784
1109
  });
785
1110
  return acc;
786
1111
  }, {}),
787
- initialValues: extractorsFromArgs.length > 0 ? extractorsFromArgs : DEFAULT_RICH_MEDIA_EXTRACTORS,
1112
+ initialValues: extractorsFromArgs.length > 0 ? extractorsFromArgs : defaultRichMediaExtractors.length > 0 ? defaultRichMediaExtractors : ["pdf-text-layer", "file-text"],
788
1113
  required: false
789
1114
  }) : [];
790
1115
  if (isCancel2(selectedExtractorsAnswer)) {
@@ -798,6 +1123,10 @@ async function initCommand(args) {
798
1123
  projectRoot: root,
799
1124
  registryRoot,
800
1125
  aliasBase,
1126
+ embeddingProvider,
1127
+ yes: nonInteractive,
1128
+ overwrite: overwritePolicy,
1129
+ presetConfig: preset?.config ?? undefined,
801
1130
  richMedia: richMediaEnabled ? {
802
1131
  enabled: true,
803
1132
  extractors: selectedExtractors
@@ -811,12 +1140,14 @@ async function initCommand(args) {
811
1140
  registryRoot,
812
1141
  installDir,
813
1142
  extractor,
814
- yes: nonInteractive
1143
+ yes: nonInteractive,
1144
+ overwrite: overwritePolicy
815
1145
  });
816
1146
  }
817
1147
  }
818
1148
  const pkg = await readPackageJson(root);
819
1149
  const { deps, devDeps } = depsForAdapter(storeAdapterAnswer);
1150
+ const embeddingDeps = depsForEmbeddingProvider(embeddingProvider);
820
1151
  const extractorDeps = {};
821
1152
  const extractorDevDeps = {};
822
1153
  for (const ex of selectedExtractors) {
@@ -824,7 +1155,34 @@ async function initCommand(args) {
824
1155
  Object.assign(extractorDeps, r.deps);
825
1156
  Object.assign(extractorDevDeps, r.devDeps);
826
1157
  }
827
- const merged = mergeDeps(pkg, { ...deps, ...extractorDeps }, { ...devDeps, ...extractorDevDeps });
1158
+ const connectorsFromPreset = preset ? toConnectors(preset.modules?.connectors) : [];
1159
+ const availableConnectorIds = new Set((manifest.connectors ?? []).filter((c) => c.status === "available").map((c) => String(c.id)));
1160
+ if (preset) {
1161
+ const unknown = connectorsFromPreset.filter((c) => !availableConnectorIds.has(c));
1162
+ if (unknown.length > 0) {
1163
+ throw new Error(`Preset contains unknown/unavailable connectors: ${unknown.join(", ")}`);
1164
+ }
1165
+ }
1166
+ if (connectorsFromPreset.length > 0) {
1167
+ for (const connector of connectorsFromPreset) {
1168
+ await copyConnectorFiles({
1169
+ projectRoot: root,
1170
+ registryRoot,
1171
+ installDir,
1172
+ connector,
1173
+ yes: nonInteractive,
1174
+ overwrite: overwritePolicy
1175
+ });
1176
+ }
1177
+ }
1178
+ const connectorDeps = {};
1179
+ const connectorDevDeps = {};
1180
+ for (const c of connectorsFromPreset) {
1181
+ const r = depsForConnector(c);
1182
+ Object.assign(connectorDeps, r.deps);
1183
+ Object.assign(connectorDevDeps, r.devDeps);
1184
+ }
1185
+ const merged = mergeDeps(pkg, { ...deps, ...embeddingDeps.deps, ...extractorDeps, ...connectorDeps }, { ...devDeps, ...embeddingDeps.devDeps, ...extractorDevDeps, ...connectorDevDeps });
828
1186
  if (merged.changes.length > 0) {
829
1187
  await writePackageJson(root, merged.pkg);
830
1188
  }
@@ -832,28 +1190,126 @@ async function initCommand(args) {
832
1190
  installDir,
833
1191
  storeAdapter: storeAdapterAnswer,
834
1192
  aliasBase,
1193
+ embeddingProvider,
835
1194
  version: CONFIG_VERSION,
836
- connectors: existing?.connectors ?? [],
1195
+ connectors: Array.from(new Set([...existing?.connectors ?? [], ...connectorsFromPreset])).sort(),
837
1196
  extractors: Array.from(new Set([
838
1197
  ...existing?.extractors ?? [],
839
1198
  ...richMediaEnabled ? selectedExtractors : []
840
1199
  ])).sort()
841
1200
  };
842
- await writeJsonFile(path5.join(root, CONFIG_FILE), config);
1201
+ await writeJsonFile(path6.join(root, CONFIG_FILE), config);
843
1202
  const pm = await detectPackageManager(root);
844
1203
  const installLine = merged.changes.length > 0 ? `Next: run \`${installCmd(pm)}\`` : "Dependencies already satisfied.";
845
1204
  const isNext = Boolean((merged.pkg.dependencies ?? {})["next"]) || Boolean((merged.pkg.devDependencies ?? {})["next"]);
846
1205
  const tsconfigResult = isNext ? await patchTsconfigPaths({ projectRoot: root, installDir, aliasBase }) : { changed: false };
1206
+ const envHint = (() => {
1207
+ if (embeddingProvider === "ai") {
1208
+ return [
1209
+ "Env:",
1210
+ "- DATABASE_URL=...",
1211
+ "- AI_GATEWAY_API_KEY=...",
1212
+ "- (optional) AI_GATEWAY_MODEL=openai/text-embedding-3-small"
1213
+ ];
1214
+ }
1215
+ if (embeddingProvider === "openai") {
1216
+ return [
1217
+ "Env:",
1218
+ "- DATABASE_URL=...",
1219
+ "- OPENAI_API_KEY=...",
1220
+ "- (optional) OPENAI_EMBEDDING_MODEL=text-embedding-3-small"
1221
+ ];
1222
+ }
1223
+ if (embeddingProvider === "google") {
1224
+ return [
1225
+ "Env:",
1226
+ "- DATABASE_URL=...",
1227
+ "- GOOGLE_GENERATIVE_AI_API_KEY=...",
1228
+ "- (optional) GOOGLE_GENERATIVE_AI_EMBEDDING_MODEL=gemini-embedding-001"
1229
+ ];
1230
+ }
1231
+ if (embeddingProvider === "openrouter") {
1232
+ return [
1233
+ "Env:",
1234
+ "- DATABASE_URL=...",
1235
+ "- OPENROUTER_API_KEY=...",
1236
+ "- (optional) OPENROUTER_EMBEDDING_MODEL=text-embedding-3-small"
1237
+ ];
1238
+ }
1239
+ if (embeddingProvider === "cohere") {
1240
+ return [
1241
+ "Env:",
1242
+ "- DATABASE_URL=...",
1243
+ "- COHERE_API_KEY=...",
1244
+ "- (optional) COHERE_EMBEDDING_MODEL=embed-english-v3.0"
1245
+ ];
1246
+ }
1247
+ if (embeddingProvider === "mistral") {
1248
+ return [
1249
+ "Env:",
1250
+ "- DATABASE_URL=...",
1251
+ "- MISTRAL_API_KEY=...",
1252
+ "- (optional) MISTRAL_EMBEDDING_MODEL=mistral-embed"
1253
+ ];
1254
+ }
1255
+ if (embeddingProvider === "together") {
1256
+ return [
1257
+ "Env:",
1258
+ "- DATABASE_URL=...",
1259
+ "- TOGETHER_AI_API_KEY=...",
1260
+ "- (optional) TOGETHER_AI_EMBEDDING_MODEL=togethercomputer/m2-bert-80M-2k-retrieval"
1261
+ ];
1262
+ }
1263
+ if (embeddingProvider === "voyage") {
1264
+ return [
1265
+ "Env:",
1266
+ "- DATABASE_URL=...",
1267
+ "- VOYAGE_API_KEY=...",
1268
+ "- (optional) VOYAGE_MODEL=voyage-3.5-lite"
1269
+ ];
1270
+ }
1271
+ if (embeddingProvider === "ollama") {
1272
+ return [
1273
+ "Env:",
1274
+ "- DATABASE_URL=...",
1275
+ "- (optional) OLLAMA_EMBEDDING_MODEL=nomic-embed-text"
1276
+ ];
1277
+ }
1278
+ if (embeddingProvider === "azure") {
1279
+ return [
1280
+ "Env:",
1281
+ "- DATABASE_URL=...",
1282
+ "- AZURE_OPENAI_API_KEY=...",
1283
+ "- AZURE_RESOURCE_NAME=...",
1284
+ "- (optional) AZURE_EMBEDDING_MODEL=text-embedding-3-small"
1285
+ ];
1286
+ }
1287
+ if (embeddingProvider === "vertex") {
1288
+ return [
1289
+ "Env:",
1290
+ "- DATABASE_URL=...",
1291
+ "- GOOGLE_APPLICATION_CREDENTIALS=... (when outside GCP)",
1292
+ "- (optional) GOOGLE_VERTEX_EMBEDDING_MODEL=text-embedding-004"
1293
+ ];
1294
+ }
1295
+ return [
1296
+ "Env:",
1297
+ "- DATABASE_URL=...",
1298
+ "- AWS_REGION=... (Bedrock)",
1299
+ "- AWS credentials (when outside AWS)",
1300
+ "- (optional) BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0"
1301
+ ];
1302
+ })();
847
1303
  outro([
848
1304
  "Installed Unrag.",
849
1305
  "",
850
- `- Code: ${path5.join(installDir)}`,
851
- `- Docs: ${path5.join(installDir, "unrag.md")}`,
1306
+ `- Code: ${path6.join(installDir)}`,
1307
+ `- Docs: ${path6.join(installDir, "unrag.md")}`,
852
1308
  `- Config: unrag.config.ts`,
853
1309
  `- Imports: ${aliasBase}/* and ${aliasBase}/config`,
854
1310
  "",
855
1311
  `- Rich media: ${richMediaEnabled ? "enabled" : "disabled"}`,
856
- richMediaEnabled ? `- Embeddings: multimodal enabled (images can be embedded directly)` : `- Embeddings: text-only (no direct image embedding)`,
1312
+ `- Embedding provider: ${embeddingProvider}`,
857
1313
  richMediaEnabled ? `- Extractors: ${selectedExtractors.length > 0 ? selectedExtractors.join(", ") : "none"}` : "",
858
1314
  richMediaEnabled ? ` Tip: you can tweak extractors + assetProcessing flags in unrag.config.ts later.` : ` Tip: re-run \`unrag init --rich-media\` (or edit unrag.config.ts) to enable rich media later.`,
859
1315
  isNext ? tsconfigResult.changed ? `- Next.js: updated ${tsconfigResult.file} (added aliases)` : `- Next.js: no tsconfig changes needed` : `- Next.js: not detected`,
@@ -861,6 +1317,8 @@ async function initCommand(args) {
861
1317
  merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
862
1318
  installLine,
863
1319
  "",
1320
+ ...envHint,
1321
+ "",
864
1322
  `Saved ${CONFIG_FILE}.`
865
1323
  ].join(`
866
1324
  `));
@@ -868,37 +1326,11 @@ async function initCommand(args) {
868
1326
 
869
1327
  // cli/commands/add.ts
870
1328
  import { outro as outro2 } from "@clack/prompts";
871
- import path6 from "node:path";
1329
+ import path7 from "node:path";
872
1330
  import { fileURLToPath as fileURLToPath2 } from "node:url";
873
-
874
- // cli/lib/constants.ts
875
- var UNRAG_SITE_URL = (process.env.UNRAG_SITE_URL ?? process.env.UNRAG_DOCS_BASE_URL)?.trim() || "https://unrag.dev";
876
- var UNRAG_GITHUB_REPO_URL = "https://github.com/BetterStacks/unrag";
877
- function docsUrl(siteRelativePath) {
878
- const p = siteRelativePath.startsWith("/") ? siteRelativePath : `/${siteRelativePath}`;
879
- const base = UNRAG_SITE_URL.endsWith("/") ? UNRAG_SITE_URL : `${UNRAG_SITE_URL}/`;
880
- return new URL(p.replace(/^\/+/, "/"), base).toString();
881
- }
882
-
883
- // cli/commands/add.ts
884
1331
  var CONFIG_FILE2 = "unrag.json";
885
1332
  var __filename3 = fileURLToPath2(import.meta.url);
886
- var __dirname3 = path6.dirname(__filename3);
887
- var AVAILABLE_EXTRACTORS2 = [
888
- "pdf-llm",
889
- "pdf-text-layer",
890
- "pdf-ocr",
891
- "image-ocr",
892
- "image-caption-llm",
893
- "audio-transcribe",
894
- "video-transcribe",
895
- "video-frames",
896
- "file-text",
897
- "file-docx",
898
- "file-pptx",
899
- "file-xlsx"
900
- ];
901
- var AVAILABLE_CONNECTORS = ["notion", "google-drive"];
1333
+ var __dirname3 = path7.dirname(__filename3);
902
1334
  var parseAddArgs = (args) => {
903
1335
  const out = {};
904
1336
  for (let i = 0;i < args.length; i++) {
@@ -931,36 +1363,39 @@ async function addCommand(args) {
931
1363
  const parsed = parseAddArgs(args);
932
1364
  const kind = parsed.kind ?? "connector";
933
1365
  const name = parsed.name;
1366
+ const configPath = path7.join(root, CONFIG_FILE2);
1367
+ const config = await readJsonFile(configPath);
1368
+ if (!config?.installDir) {
1369
+ throw new Error(`Missing ${CONFIG_FILE2}. Run \`unrag@latest init\` first.`);
1370
+ }
1371
+ const cliPackageRoot = await findUp(__dirname3, "package.json");
1372
+ if (!cliPackageRoot) {
1373
+ throw new Error("Could not locate CLI package root (package.json not found).");
1374
+ }
1375
+ const registryRoot = path7.join(cliPackageRoot, "registry");
1376
+ const manifest = await readRegistryManifest(registryRoot);
1377
+ const availableExtractors = new Set(manifest.extractors.map((e) => e.id));
1378
+ const availableConnectors = new Set(manifest.connectors.filter((c) => c.status === "available").map((c) => c.id));
934
1379
  if (!name) {
935
1380
  outro2([
936
1381
  "Usage:",
937
1382
  " unrag add <connector>",
938
1383
  " unrag add extractor <name>",
939
1384
  "",
940
- `Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`,
941
- `Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`
1385
+ `Available connectors: ${Array.from(availableConnectors).join(", ")}`,
1386
+ `Available extractors: ${Array.from(availableExtractors).join(", ")}`
942
1387
  ].join(`
943
1388
  `));
944
1389
  return;
945
1390
  }
946
- const configPath = path6.join(root, CONFIG_FILE2);
947
- const config = await readJsonFile(configPath);
948
- if (!config?.installDir) {
949
- throw new Error(`Missing ${CONFIG_FILE2}. Run \`unrag@latest init\` first.`);
950
- }
951
- const cliPackageRoot = await findUp(__dirname3, "package.json");
952
- if (!cliPackageRoot) {
953
- throw new Error("Could not locate CLI package root (package.json not found).");
954
- }
955
- const registryRoot = path6.join(cliPackageRoot, "registry");
956
1391
  const nonInteractive = parsed.yes || !process.stdin.isTTY;
957
1392
  const pkg = await readPackageJson(root);
958
1393
  if (kind === "connector") {
959
1394
  const connector = name;
960
- if (!connector || !AVAILABLE_CONNECTORS.includes(connector)) {
1395
+ if (!connector || !availableConnectors.has(connector)) {
961
1396
  outro2(`Unknown connector: ${name}
962
1397
 
963
- Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`);
1398
+ Available connectors: ${Array.from(availableConnectors).join(", ")}`);
964
1399
  return;
965
1400
  }
966
1401
  await copyConnectorFiles({
@@ -980,7 +1415,7 @@ Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`);
980
1415
  outro2([
981
1416
  `Installed connector: ${connector}.`,
982
1417
  "",
983
- `- Code: ${path6.join(config.installDir, "connectors", connector)}`,
1418
+ `- Code: ${path7.join(config.installDir, "connectors", connector)}`,
984
1419
  `- Docs: ${docsUrl(`/docs/connectors/${connector}`)}`,
985
1420
  "",
986
1421
  merged2.changes.length > 0 ? `Added deps: ${merged2.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
@@ -990,10 +1425,10 @@ Available connectors: ${AVAILABLE_CONNECTORS.join(", ")}`);
990
1425
  return;
991
1426
  }
992
1427
  const extractor = name;
993
- if (!extractor || !AVAILABLE_EXTRACTORS2.includes(extractor)) {
1428
+ if (!extractor || !availableExtractors.has(extractor)) {
994
1429
  outro2(`Unknown extractor: ${name}
995
1430
 
996
- Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`);
1431
+ Available extractors: ${Array.from(availableExtractors).join(", ")}`);
997
1432
  return;
998
1433
  }
999
1434
  await copyExtractorFiles({
@@ -1013,7 +1448,7 @@ Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`);
1013
1448
  outro2([
1014
1449
  `Installed extractor: ${extractor}.`,
1015
1450
  "",
1016
- `- Code: ${path6.join(config.installDir, "extractors", extractor)}`,
1451
+ `- Code: ${path7.join(config.installDir, "extractors", extractor)}`,
1017
1452
  "",
1018
1453
  merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
1019
1454
  "",
@@ -1033,7 +1468,7 @@ function renderHelp() {
1033
1468
  "",
1034
1469
  "Commands:",
1035
1470
  " init Install core files (config + store adapter templates)",
1036
- " add <connector> Install a connector (currently: notion)",
1471
+ " add <connector> Install a connector (notion, google-drive)",
1037
1472
  " help Show this help",
1038
1473
  "",
1039
1474
  "Global options:",
@@ -1044,6 +1479,8 @@ function renderHelp() {
1044
1479
  " --store <adapter> drizzle | prisma | raw-sql",
1045
1480
  " --dir <path> Install directory (alias: --install-dir)",
1046
1481
  " --alias <@name> Import alias base (e.g. @unrag)",
1482
+ " --preset <id|url> Install from a web-generated preset (non-interactive)",
1483
+ " --overwrite <mode> skip | force (when files already exist)",
1047
1484
  " --rich-media Enable rich media setup (also enables multimodal embeddings)",
1048
1485
  " --no-rich-media Disable rich media setup",
1049
1486
  " --extractors <list> Comma-separated extractors (implies --rich-media)",