unrag 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -13,7 +13,7 @@ import {
13
13
  select,
14
14
  text
15
15
  } from "@clack/prompts";
16
- import path5 from "node:path";
16
+ import path6 from "node:path";
17
17
  import { fileURLToPath } from "node:url";
18
18
 
19
19
  // cli/lib/registry.ts
@@ -108,10 +108,31 @@ var EXTRACTOR_FLAG_KEYS = {
108
108
  "file-xlsx": ["file_xlsx"]
109
109
  };
110
110
  var ALL_FLAG_KEYS = Array.from(new Set(Object.values(EXTRACTOR_FLAG_KEYS).flat())).sort();
111
+ var indentBlock = (text, spaces) => {
112
+ const pad = " ".repeat(spaces);
113
+ return text.split(`
114
+ `).map((l) => l ? pad + l : l).join(`
115
+ `);
116
+ };
117
+ var replaceBetweenMarkers = (content, startMarker, endMarker, replacement) => {
118
+ const startIdx = content.indexOf(startMarker);
119
+ const endIdx = content.indexOf(endMarker);
120
+ if (startIdx < 0 || endIdx < 0 || endIdx < startIdx)
121
+ return content;
122
+ const startLineStart = content.lastIndexOf(`
123
+ `, startIdx);
124
+ const start = startLineStart < 0 ? 0 : startLineStart + 1;
125
+ const endLineEnd = content.indexOf(`
126
+ `, endIdx);
127
+ const end = endLineEnd < 0 ? content.length : endLineEnd + 1;
128
+ return content.slice(0, start) + replacement + content.slice(end);
129
+ };
111
130
  var renderUnragConfig = (content, selection) => {
112
131
  const installImportBase = `./${selection.installDir.replace(/\\/g, "/")}`;
113
132
  const richMedia = selection.richMedia ?? { enabled: false, extractors: [] };
114
133
  const selectedExtractors = Array.from(new Set(richMedia.extractors ?? [])).sort();
134
+ const preset = selection.presetConfig;
135
+ const embeddingProvider = selection.embeddingProvider ?? (typeof preset?.embedding?.provider === "string" ? preset.embedding.provider : undefined) ?? "ai";
115
136
  const baseImports = [
116
137
  `import { defineUnragConfig } from "${installImportBase}/core";`
117
138
  ];
@@ -145,17 +166,109 @@ var renderUnragConfig = (content, selection) => {
145
166
  ].join(`
146
167
  `);
147
168
  let out = content.replace("// __UNRAG_IMPORTS__", importsBlock).replace("// __UNRAG_CREATE_ENGINE__", createEngineBlock);
148
- out = out.replace('type: "text", // __UNRAG_EMBEDDING_TYPE__', richMedia.enabled ? 'type: "multimodal",' : 'type: "text",').replace('model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__', richMedia.enabled ? 'model: "cohere/embed-v4.0",' : 'model: "openai/text-embedding-3-small",');
149
- const enabledFlagKeys = new Set;
150
- if (richMedia.enabled) {
151
- for (const ex of selectedExtractors) {
152
- for (const k of EXTRACTOR_FLAG_KEYS[ex] ?? []) {
153
- enabledFlagKeys.add(k);
154
- }
169
+ const presetChunkSize = preset?.defaults?.chunking?.chunkSize;
170
+ const presetChunkOverlap = preset?.defaults?.chunking?.chunkOverlap;
171
+ const presetTopK = preset?.defaults?.retrieval?.topK;
172
+ if (typeof presetChunkSize === "number") {
173
+ out = out.replace("chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__", `chunkSize: ${presetChunkSize},`);
174
+ } else {
175
+ out = out.replace("chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__", "chunkSize: 200,");
176
+ }
177
+ if (typeof presetChunkOverlap === "number") {
178
+ out = out.replace("chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__", `chunkOverlap: ${presetChunkOverlap},`);
179
+ } else {
180
+ out = out.replace("chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__", "chunkOverlap: 40,");
181
+ }
182
+ if (typeof presetTopK === "number") {
183
+ out = out.replace("topK: 8, // __UNRAG_DEFAULT_topK__", `topK: ${presetTopK},`);
184
+ } else {
185
+ out = out.replace("topK: 8, // __UNRAG_DEFAULT_topK__", "topK: 8,");
186
+ }
187
+ const presetEmbeddingType = preset?.embedding?.config?.type;
188
+ const presetEmbeddingModel = preset?.embedding?.config?.model;
189
+ const presetEmbeddingTimeoutMs = preset?.embedding?.config?.timeoutMs;
190
+ const providerLine = ` provider: "${embeddingProvider}",`;
191
+ out = out.replace(/^\s*provider:\s*".*?",\s*$/m, providerLine);
192
+ const defaultModelByProvider = {
193
+ ai: "openai/text-embedding-3-small",
194
+ openai: "text-embedding-3-small",
195
+ google: "gemini-embedding-001",
196
+ openrouter: "text-embedding-3-small",
197
+ azure: "text-embedding-3-small",
198
+ vertex: "text-embedding-004",
199
+ bedrock: "amazon.titan-embed-text-v2:0",
200
+ cohere: "embed-english-v3.0",
201
+ mistral: "mistral-embed",
202
+ together: "togethercomputer/m2-bert-80M-2k-retrieval",
203
+ ollama: "nomic-embed-text",
204
+ voyage: "voyage-3.5-lite",
205
+ custom: "openai/text-embedding-3-small"
206
+ };
207
+ const resolvedEmbeddingModel = (() => {
208
+ if (typeof presetEmbeddingModel === "string" && presetEmbeddingModel.trim().length > 0) {
209
+ return presetEmbeddingModel.trim();
210
+ }
211
+ if (embeddingProvider === "ai" && presetEmbeddingType === "multimodal") {
212
+ return "cohere/embed-v4.0";
213
+ }
214
+ return defaultModelByProvider[embeddingProvider] ?? "openai/text-embedding-3-small";
215
+ })();
216
+ const normalizeModelForProvider = (model) => {
217
+ if (embeddingProvider === "ai")
218
+ return model;
219
+ const prefix = `${embeddingProvider}/`;
220
+ return model.startsWith(prefix) ? model.slice(prefix.length) : model;
221
+ };
222
+ const nextModel = normalizeModelForProvider(resolvedEmbeddingModel);
223
+ out = out.replace('model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__', `model: ${JSON.stringify(nextModel)},`);
224
+ if (presetEmbeddingType === "multimodal") {
225
+ if (!out.includes('type: "multimodal"') && !out.includes('type: "text"')) {
226
+ out = out.replace(`config: {
227
+ model:`, `config: {
228
+ type: "multimodal",
229
+ model:`);
230
+ } else {
231
+ out = out.replace(/^\s*type:\s*".*?",\s*$/m, ` type: "multimodal",`);
155
232
  }
156
233
  }
157
- for (const k of ALL_FLAG_KEYS) {
158
- out = out.replace(`enabled: false, // __UNRAG_FLAG_${k}__`, `enabled: ${enabledFlagKeys.has(k) ? "true" : "false"},`);
234
+ if (typeof presetEmbeddingTimeoutMs === "number") {
235
+ out = out.replace("timeoutMs: 15_000, // __UNRAG_EMBEDDING_TIMEOUT__", `timeoutMs: ${presetEmbeddingTimeoutMs},`);
236
+ } else {
237
+ out = out.replace("timeoutMs: 15_000, // __UNRAG_EMBEDDING_TIMEOUT__", "timeoutMs: 15_000,");
238
+ }
239
+ const presetStoreChunkContent = preset?.engine?.storage?.storeChunkContent;
240
+ const presetStoreDocumentContent = preset?.engine?.storage?.storeDocumentContent;
241
+ if (typeof presetStoreChunkContent === "boolean") {
242
+ out = out.replace("storeChunkContent: true, // __UNRAG_STORAGE_storeChunkContent__", `storeChunkContent: ${presetStoreChunkContent},`);
243
+ } else {
244
+ out = out.replace("storeChunkContent: true, // __UNRAG_STORAGE_storeChunkContent__", "storeChunkContent: true,");
245
+ }
246
+ if (typeof presetStoreDocumentContent === "boolean") {
247
+ out = out.replace("storeDocumentContent: true, // __UNRAG_STORAGE_storeDocumentContent__", `storeDocumentContent: ${presetStoreDocumentContent},`);
248
+ } else {
249
+ out = out.replace("storeDocumentContent: true, // __UNRAG_STORAGE_storeDocumentContent__", "storeDocumentContent: true,");
250
+ }
251
+ const assetProcessingOverride = preset?.engine?.assetProcessing;
252
+ if (assetProcessingOverride && typeof assetProcessingOverride === "object") {
253
+ const json = JSON.stringify(assetProcessingOverride, null, 2);
254
+ const block = ` assetProcessing: ${indentBlock(json, 2).trimStart()},
255
+ `;
256
+ out = replaceBetweenMarkers(out, "__UNRAG_ASSET_PROCESSING_BLOCK_START__", "__UNRAG_ASSET_PROCESSING_BLOCK_END__", block);
257
+ } else {
258
+ out = out.replace("// __UNRAG_ASSET_PROCESSING_BLOCK_START__", "").replace("// __UNRAG_ASSET_PROCESSING_BLOCK_END__", "");
259
+ }
260
+ if (!(assetProcessingOverride && typeof assetProcessingOverride === "object")) {
261
+ const enabledFlagKeys = new Set;
262
+ if (richMedia.enabled) {
263
+ for (const ex of selectedExtractors) {
264
+ for (const k of EXTRACTOR_FLAG_KEYS[ex] ?? []) {
265
+ enabledFlagKeys.add(k);
266
+ }
267
+ }
268
+ }
269
+ for (const k of ALL_FLAG_KEYS) {
270
+ out = out.replace(`enabled: false, // __UNRAG_FLAG_${k}__`, `enabled: ${enabledFlagKeys.has(k) ? "true" : "false"},`);
271
+ }
159
272
  }
160
273
  const extractorLines = richMedia.enabled && selectedExtractors.length > 0 ? selectedExtractors.map((ex) => ` ${EXTRACTOR_FACTORY[ex]}(),`).join(`
161
274
  `) : "";
@@ -164,6 +277,7 @@ var renderUnragConfig = (content, selection) => {
164
277
  };
165
278
  var renderDocs = (content, selection) => {
166
279
  const notes = [];
280
+ const embeddingProvider = selection.embeddingProvider ?? "ai";
167
281
  if (selection.storeAdapter === "drizzle") {
168
282
  notes.push("## Store adapter: Drizzle", "", "You can import the generated Drizzle schema module into your app’s main Drizzle schema to avoid duplicating table definitions.", "", "Example pattern:", "```ts", `import * as rag from "./${selection.installDir}/store/drizzle/schema";`, "", "export const schema = {", " ...rag.schema,", " // ...your app tables", "};", "```", "", "Then run Drizzle migrations from your app as usual.");
169
283
  } else if (selection.storeAdapter === "prisma") {
@@ -171,7 +285,42 @@ var renderDocs = (content, selection) => {
171
285
  } else {
172
286
  notes.push("## Store adapter: Raw SQL", "", "This adapter uses a `pg` Pool and parameterized SQL queries against the tables described above.", "It’s the most portable option when you don’t want ORM coupling.");
173
287
  }
174
- const withNotes = content.replace("<!-- __UNRAG_ADAPTER_NOTES__ -->", notes.join(`
288
+ const envLines = [
289
+ "## Environment variables",
290
+ "",
291
+ "Add these to your environment:",
292
+ "- `DATABASE_URL` (Postgres connection string)"
293
+ ];
294
+ if (embeddingProvider === "ai") {
295
+ envLines.push("- `AI_GATEWAY_API_KEY` (required by the AI SDK when using Vercel AI Gateway)", "- Optional: `AI_GATEWAY_MODEL` (defaults to `openai/text-embedding-3-small`)");
296
+ } else if (embeddingProvider === "openai") {
297
+ envLines.push("- `OPENAI_API_KEY`", "- Optional: `OPENAI_EMBEDDING_MODEL` (defaults to `text-embedding-3-small`)");
298
+ } else if (embeddingProvider === "google") {
299
+ envLines.push("- `GOOGLE_GENERATIVE_AI_API_KEY`", "- Optional: `GOOGLE_GENERATIVE_AI_EMBEDDING_MODEL` (defaults to `gemini-embedding-001`)");
300
+ } else if (embeddingProvider === "openrouter") {
301
+ envLines.push("- `OPENROUTER_API_KEY`", "- Optional: `OPENROUTER_EMBEDDING_MODEL` (defaults to `text-embedding-3-small`)");
302
+ } else if (embeddingProvider === "cohere") {
303
+ envLines.push("- `COHERE_API_KEY`", "- Optional: `COHERE_EMBEDDING_MODEL` (defaults to `embed-english-v3.0`)");
304
+ } else if (embeddingProvider === "mistral") {
305
+ envLines.push("- `MISTRAL_API_KEY`", "- Optional: `MISTRAL_EMBEDDING_MODEL` (defaults to `mistral-embed`)");
306
+ } else if (embeddingProvider === "together") {
307
+ envLines.push("- `TOGETHER_AI_API_KEY`", "- Optional: `TOGETHER_AI_EMBEDDING_MODEL` (defaults to `togethercomputer/m2-bert-80M-2k-retrieval`)");
308
+ } else if (embeddingProvider === "voyage") {
309
+ envLines.push("- `VOYAGE_API_KEY`", "- Optional: `VOYAGE_MODEL` (defaults to `voyage-3.5-lite`)");
310
+ } else if (embeddingProvider === "ollama") {
311
+ envLines.push("- Optional: `OLLAMA_EMBEDDING_MODEL` (defaults to `nomic-embed-text`)");
312
+ } else if (embeddingProvider === "azure") {
313
+ envLines.push("- `AZURE_OPENAI_API_KEY`", "- `AZURE_RESOURCE_NAME`", "- Optional: `AZURE_EMBEDDING_MODEL` (defaults to `text-embedding-3-small`)");
314
+ } else if (embeddingProvider === "vertex") {
315
+ envLines.push("- `GOOGLE_APPLICATION_CREDENTIALS` (when running outside GCP)", "- Optional: `GOOGLE_VERTEX_EMBEDDING_MODEL` (defaults to `text-embedding-004`)");
316
+ } else if (embeddingProvider === "bedrock") {
317
+ envLines.push("- `AWS_REGION`", "- AWS credentials (`AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`) when running outside AWS", "- Optional: `BEDROCK_EMBEDDING_MODEL` (defaults to `amazon.titan-embed-text-v2:0`)");
318
+ }
319
+ const withEnv = content.replace(/## Environment variables[\s\S]*?## Database requirements/, `${envLines.join(`
320
+ `)}
321
+
322
+ ## Database requirements`);
323
+ const withNotes = withEnv.replace("<!-- __UNRAG_ADAPTER_NOTES__ -->", notes.join(`
175
324
  `));
176
325
  return withNotes.replaceAll("@unrag/config", `${selection.aliasBase}/config`).replaceAll("`@unrag/*`", `\`${selection.aliasBase}/*\``);
177
326
  };
@@ -225,9 +374,57 @@ async function copyRegistryFiles(selection) {
225
374
  src: path2.join(selection.registryRoot, "core/retrieve.ts"),
226
375
  dest: path2.join(installBaseAbs, "core/retrieve.ts")
227
376
  },
377
+ {
378
+ src: path2.join(selection.registryRoot, "embedding/_shared.ts"),
379
+ dest: path2.join(installBaseAbs, "embedding/_shared.ts")
380
+ },
228
381
  {
229
382
  src: path2.join(selection.registryRoot, "embedding/ai.ts"),
230
383
  dest: path2.join(installBaseAbs, "embedding/ai.ts")
384
+ },
385
+ {
386
+ src: path2.join(selection.registryRoot, "embedding/openai.ts"),
387
+ dest: path2.join(installBaseAbs, "embedding/openai.ts")
388
+ },
389
+ {
390
+ src: path2.join(selection.registryRoot, "embedding/google.ts"),
391
+ dest: path2.join(installBaseAbs, "embedding/google.ts")
392
+ },
393
+ {
394
+ src: path2.join(selection.registryRoot, "embedding/openrouter.ts"),
395
+ dest: path2.join(installBaseAbs, "embedding/openrouter.ts")
396
+ },
397
+ {
398
+ src: path2.join(selection.registryRoot, "embedding/azure.ts"),
399
+ dest: path2.join(installBaseAbs, "embedding/azure.ts")
400
+ },
401
+ {
402
+ src: path2.join(selection.registryRoot, "embedding/vertex.ts"),
403
+ dest: path2.join(installBaseAbs, "embedding/vertex.ts")
404
+ },
405
+ {
406
+ src: path2.join(selection.registryRoot, "embedding/bedrock.ts"),
407
+ dest: path2.join(installBaseAbs, "embedding/bedrock.ts")
408
+ },
409
+ {
410
+ src: path2.join(selection.registryRoot, "embedding/cohere.ts"),
411
+ dest: path2.join(installBaseAbs, "embedding/cohere.ts")
412
+ },
413
+ {
414
+ src: path2.join(selection.registryRoot, "embedding/mistral.ts"),
415
+ dest: path2.join(installBaseAbs, "embedding/mistral.ts")
416
+ },
417
+ {
418
+ src: path2.join(selection.registryRoot, "embedding/together.ts"),
419
+ dest: path2.join(installBaseAbs, "embedding/together.ts")
420
+ },
421
+ {
422
+ src: path2.join(selection.registryRoot, "embedding/ollama.ts"),
423
+ dest: path2.join(installBaseAbs, "embedding/ollama.ts")
424
+ },
425
+ {
426
+ src: path2.join(selection.registryRoot, "embedding/voyage.ts"),
427
+ dest: path2.join(installBaseAbs, "embedding/voyage.ts")
231
428
  }
232
429
  ];
233
430
  if (selection.storeAdapter === "drizzle") {
@@ -258,21 +455,27 @@ async function copyRegistryFiles(selection) {
258
455
  dest: path2.join(installBaseAbs, "store/prisma/store.ts")
259
456
  });
260
457
  }
458
+ const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
459
+ const overwritePolicy = selection.overwrite ?? "skip";
261
460
  for (const mapping of fileMappings) {
262
461
  if (!await exists(mapping.src)) {
263
462
  throw new Error(`Registry file missing: ${mapping.src}`);
264
463
  }
265
464
  if (await exists(mapping.dest)) {
266
- const answer = await confirm({
267
- message: `Overwrite ${path2.relative(selection.projectRoot, mapping.dest)}?`,
268
- initialValue: false
269
- });
270
- if (isCancel(answer)) {
271
- cancel("Cancelled.");
272
- return;
273
- }
274
- if (!answer) {
465
+ if (overwritePolicy === "force") {} else if (nonInteractive) {
275
466
  continue;
467
+ } else {
468
+ const answer = await confirm({
469
+ message: `Overwrite ${path2.relative(selection.projectRoot, mapping.dest)}?`,
470
+ initialValue: false
471
+ });
472
+ if (isCancel(answer)) {
473
+ cancel("Cancelled.");
474
+ return;
475
+ }
476
+ if (!answer) {
477
+ continue;
478
+ }
276
479
  }
277
480
  }
278
481
  const raw = await readText(mapping.src);
@@ -290,6 +493,7 @@ async function copyConnectorFiles(selection) {
290
493
  const files = await listFilesRecursive(connectorRegistryAbs);
291
494
  const destRootAbs = path2.join(installBaseAbs, "connectors", selection.connector);
292
495
  const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
496
+ const overwritePolicy = selection.overwrite ?? "skip";
293
497
  for (const src of files) {
294
498
  if (!await exists(src)) {
295
499
  throw new Error(`Registry file missing: ${src}`);
@@ -297,7 +501,7 @@ async function copyConnectorFiles(selection) {
297
501
  const rel = path2.relative(connectorRegistryAbs, src);
298
502
  const dest = path2.join(destRootAbs, rel);
299
503
  if (await exists(dest)) {
300
- if (nonInteractive) {
504
+ if (overwritePolicy === "force") {} else if (nonInteractive) {
301
505
  continue;
302
506
  }
303
507
  const answer = await confirm({
@@ -329,9 +533,12 @@ async function copyExtractorFiles(selection) {
329
533
  const destRootAbs = path2.join(installBaseAbs, "extractors", selection.extractor);
330
534
  const sharedDestRootAbs = path2.join(installBaseAbs, "extractors", "_shared");
331
535
  const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
536
+ const overwritePolicy = selection.overwrite ?? "skip";
332
537
  const shouldWrite = async (src, dest) => {
333
538
  if (!await exists(dest))
334
539
  return true;
540
+ if (overwritePolicy === "force")
541
+ return true;
335
542
  if (nonInteractive)
336
543
  return false;
337
544
  try {
@@ -388,26 +595,103 @@ async function writeJsonFile(filePath, data) {
388
595
  `, "utf8");
389
596
  }
390
597
 
391
- // cli/lib/packageJson.ts
598
+ // cli/lib/manifest.ts
392
599
  import path3 from "node:path";
393
- import { readFile as readFile3, writeFile as writeFile3 } from "node:fs/promises";
600
+ import { readFile as readFile3 } from "node:fs/promises";
601
+ async function readRegistryManifest(registryRoot) {
602
+ const abs = path3.join(registryRoot, "manifest.json");
603
+ const raw = await readFile3(abs, "utf8");
604
+ const parsed = JSON.parse(raw);
605
+ if (!parsed || parsed.version !== 1) {
606
+ throw new Error(`Unsupported registry manifest version in ${abs}`);
607
+ }
608
+ if (!Array.isArray(parsed.extractors) || !Array.isArray(parsed.connectors)) {
609
+ throw new Error(`Invalid registry manifest shape in ${abs}`);
610
+ }
611
+ return parsed;
612
+ }
613
+
614
+ // cli/lib/constants.ts
615
+ var UNRAG_SITE_URL = (process.env.UNRAG_SITE_URL ?? process.env.UNRAG_DOCS_BASE_URL)?.trim() || "https://unrag.dev";
616
+ var UNRAG_GITHUB_REPO_URL = "https://github.com/BetterStacks/unrag";
617
+ function docsUrl(siteRelativePath) {
618
+ const p = siteRelativePath.startsWith("/") ? siteRelativePath : `/${siteRelativePath}`;
619
+ const base = UNRAG_SITE_URL.endsWith("/") ? UNRAG_SITE_URL : `${UNRAG_SITE_URL}/`;
620
+ return new URL(p.replace(/^\/+/, "/"), base).toString();
621
+ }
622
+
623
+ // cli/lib/preset.ts
624
+ function isPresetPayloadV1(x) {
625
+ if (!x || typeof x !== "object")
626
+ return false;
627
+ const o = x;
628
+ if (o.version !== 1)
629
+ return false;
630
+ if (!o.install || typeof o.install !== "object")
631
+ return false;
632
+ if (!o.modules || typeof o.modules !== "object")
633
+ return false;
634
+ if (typeof o.install.installDir !== "string")
635
+ return false;
636
+ if (!["drizzle", "prisma", "raw-sql"].includes(String(o.install.storeAdapter)))
637
+ return false;
638
+ if (typeof o.install.aliasBase !== "string")
639
+ return false;
640
+ if (!Array.isArray(o.modules.extractors) || !Array.isArray(o.modules.connectors))
641
+ return false;
642
+ return true;
643
+ }
644
+ function toPresetUrl(input) {
645
+ const s = String(input ?? "").trim();
646
+ if (!s) {
647
+ throw new Error("Missing preset id/url");
648
+ }
649
+ if (s.startsWith("http://") || s.startsWith("https://"))
650
+ return s;
651
+ return docsUrl(`/api/presets/${encodeURIComponent(s)}`);
652
+ }
653
+ async function fetchPreset(input) {
654
+ const url = toPresetUrl(input);
655
+ if (typeof fetch !== "function") {
656
+ throw new Error(`Global fetch() is unavailable in this runtime; cannot fetch preset from ${url}. Set UNRAG_SITE_URL="${UNRAG_SITE_URL}" and use a newer Node runtime.`);
657
+ }
658
+ const res = await fetch(url, {
659
+ headers: {
660
+ "user-agent": "unrag-cli",
661
+ accept: "application/json"
662
+ }
663
+ });
664
+ if (!res.ok) {
665
+ const text = await res.text().catch(() => "");
666
+ throw new Error(`Failed to fetch preset (${res.status}) from ${url}${text ? `: ${text}` : ""}`);
667
+ }
668
+ const json = await res.json();
669
+ if (!isPresetPayloadV1(json)) {
670
+ throw new Error(`Invalid preset payload returned from ${url}`);
671
+ }
672
+ return json;
673
+ }
674
+
675
+ // cli/lib/packageJson.ts
676
+ import path4 from "node:path";
677
+ import { readFile as readFile4, writeFile as writeFile3 } from "node:fs/promises";
394
678
  async function detectPackageManager(projectRoot) {
395
- if (await exists(path3.join(projectRoot, "bun.lock")))
679
+ if (await exists(path4.join(projectRoot, "bun.lock")))
396
680
  return "bun";
397
- if (await exists(path3.join(projectRoot, "pnpm-lock.yaml")))
681
+ if (await exists(path4.join(projectRoot, "pnpm-lock.yaml")))
398
682
  return "pnpm";
399
- if (await exists(path3.join(projectRoot, "yarn.lock")))
683
+ if (await exists(path4.join(projectRoot, "yarn.lock")))
400
684
  return "yarn";
401
- if (await exists(path3.join(projectRoot, "package-lock.json")))
685
+ if (await exists(path4.join(projectRoot, "package-lock.json")))
402
686
  return "npm";
403
687
  return "npm";
404
688
  }
405
689
  async function readPackageJson(projectRoot) {
406
- const raw = await readFile3(path3.join(projectRoot, "package.json"), "utf8");
690
+ const raw = await readFile4(path4.join(projectRoot, "package.json"), "utf8");
407
691
  return JSON.parse(raw);
408
692
  }
409
693
  async function writePackageJson(projectRoot, pkg) {
410
- await writeFile3(path3.join(projectRoot, "package.json"), JSON.stringify(pkg, null, 2) + `
694
+ await writeFile3(path4.join(projectRoot, "package.json"), JSON.stringify(pkg, null, 2) + `
411
695
  `, "utf8");
412
696
  }
413
697
  function mergeDeps(pkg, deps, devDeps) {
@@ -431,7 +715,7 @@ function mergeDeps(pkg, deps, devDeps) {
431
715
  }
432
716
  function depsForAdapter(adapter) {
433
717
  const deps = {
434
- ai: "^5.0.113"
718
+ ai: "^6.0.3"
435
719
  };
436
720
  const devDeps = {};
437
721
  if (adapter === "drizzle") {
@@ -455,26 +739,30 @@ function depsForConnector(connector) {
455
739
  if (connector === "notion") {
456
740
  deps["@notionhq/client"] = "^2.2.16";
457
741
  }
742
+ if (connector === "google-drive") {
743
+ deps["googleapis"] = "^148.0.0";
744
+ deps["google-auth-library"] = "^10.0.0";
745
+ }
458
746
  return { deps, devDeps };
459
747
  }
460
748
  function depsForExtractor(extractor) {
461
749
  const deps = {};
462
750
  const devDeps = {};
463
751
  if (extractor === "pdf-llm") {
464
- deps["ai"] = "^5.0.113";
752
+ deps["ai"] = "^6.0.3";
465
753
  }
466
754
  if (extractor === "pdf-text-layer") {
467
755
  deps["pdfjs-dist"] = "^5.4.149";
468
756
  }
469
757
  if (extractor === "pdf-ocr") {}
470
758
  if (extractor === "image-ocr" || extractor === "image-caption-llm") {
471
- deps["ai"] = "^5.0.113";
759
+ deps["ai"] = "^6.0.3";
472
760
  }
473
761
  if (extractor === "audio-transcribe" || extractor === "video-transcribe") {
474
- deps["ai"] = "^5.0.113";
762
+ deps["ai"] = "^6.0.3";
475
763
  }
476
764
  if (extractor === "video-frames") {
477
- deps["ai"] = "^5.0.113";
765
+ deps["ai"] = "^6.0.3";
478
766
  }
479
767
  if (extractor === "file-text") {}
480
768
  if (extractor === "file-docx") {
@@ -488,6 +776,33 @@ function depsForExtractor(extractor) {
488
776
  }
489
777
  return { deps, devDeps };
490
778
  }
779
+ function depsForEmbeddingProvider(provider) {
780
+ const deps = {};
781
+ const devDeps = {};
782
+ if (provider === "openai")
783
+ deps["@ai-sdk/openai"] = "^3.0.1";
784
+ if (provider === "google")
785
+ deps["@ai-sdk/google"] = "^3.0.1";
786
+ if (provider === "azure")
787
+ deps["@ai-sdk/azure"] = "^3.0.1";
788
+ if (provider === "vertex")
789
+ deps["@ai-sdk/google-vertex"] = "^3.0.1";
790
+ if (provider === "bedrock")
791
+ deps["@ai-sdk/amazon-bedrock"] = "^3.0.72";
792
+ if (provider === "cohere")
793
+ deps["@ai-sdk/cohere"] = "^3.0.1";
794
+ if (provider === "mistral")
795
+ deps["@ai-sdk/mistral"] = "^3.0.1";
796
+ if (provider === "together")
797
+ deps["@ai-sdk/togetherai"] = "^3.0.1";
798
+ if (provider === "openrouter")
799
+ deps["@openrouter/sdk"] = "^0.3.10";
800
+ if (provider === "ollama")
801
+ deps["ollama-ai-provider-v2"] = "^2.0.0";
802
+ if (provider === "voyage")
803
+ deps["voyage-ai-provider"] = "^3.0.0";
804
+ return { deps, devDeps };
805
+ }
491
806
  function installCmd(pm) {
492
807
  if (pm === "bun")
493
808
  return "bun install";
@@ -499,8 +814,8 @@ function installCmd(pm) {
499
814
  }
500
815
 
501
816
  // cli/lib/tsconfig.ts
502
- import path4 from "node:path";
503
- import { readFile as readFile4, writeFile as writeFile4 } from "node:fs/promises";
817
+ import path5 from "node:path";
818
+ import { readFile as readFile5, writeFile as writeFile4 } from "node:fs/promises";
504
819
  import { parse } from "jsonc-parser";
505
820
  var parseJsoncLoose = (raw) => {
506
821
  const errors = [];
@@ -511,14 +826,14 @@ var parseJsoncLoose = (raw) => {
511
826
  return result;
512
827
  };
513
828
  async function patchTsconfigPaths(params) {
514
- const configFile = await exists(path4.join(params.projectRoot, "tsconfig.json")) ? "tsconfig.json" : await exists(path4.join(params.projectRoot, "jsconfig.json")) ? "jsconfig.json" : null;
829
+ const configFile = await exists(path5.join(params.projectRoot, "tsconfig.json")) ? "tsconfig.json" : await exists(path5.join(params.projectRoot, "jsconfig.json")) ? "jsconfig.json" : null;
515
830
  const aliasBase = params.aliasBase;
516
831
  const aliasKey = `${aliasBase}/*`;
517
832
  const target = [`./${params.installDir.replace(/\\/g, "/")}/*`];
518
833
  const configAliasKey = `${aliasBase}/config`;
519
834
  const configTarget = ["./unrag.config.ts"];
520
835
  if (!configFile) {
521
- const abs2 = path4.join(params.projectRoot, "tsconfig.json");
836
+ const abs2 = path5.join(params.projectRoot, "tsconfig.json");
522
837
  const next2 = {
523
838
  compilerOptions: {
524
839
  baseUrl: ".",
@@ -532,8 +847,8 @@ async function patchTsconfigPaths(params) {
532
847
  `, "utf8");
533
848
  return { changed: true, file: "tsconfig.json" };
534
849
  }
535
- const abs = path4.join(params.projectRoot, configFile);
536
- const raw = await readFile4(abs, "utf8");
850
+ const abs = path5.join(params.projectRoot, configFile);
851
+ const raw = await readFile5(abs, "utf8");
537
852
  let parsed;
538
853
  try {
539
854
  parsed = parseJsoncLoose(raw);
@@ -568,7 +883,7 @@ async function patchTsconfigPaths(params) {
568
883
  var CONFIG_FILE = "unrag.json";
569
884
  var CONFIG_VERSION = 1;
570
885
  var __filename2 = fileURLToPath(import.meta.url);
571
- var __dirname2 = path5.dirname(__filename2);
886
+ var __dirname2 = path6.dirname(__filename2);
572
887
  var parseInitArgs = (args) => {
573
888
  const out = {};
574
889
  for (let i = 0;i < args.length; i++) {
@@ -617,77 +932,35 @@ var parseInitArgs = (args) => {
617
932
  }
618
933
  continue;
619
934
  }
935
+ if (a === "--provider") {
936
+ const v = args[i + 1];
937
+ if (v === "ai" || v === "openai" || v === "google" || v === "openrouter" || v === "azure" || v === "vertex" || v === "bedrock" || v === "cohere" || v === "mistral" || v === "together" || v === "ollama" || v === "voyage") {
938
+ out.provider = v;
939
+ i++;
940
+ }
941
+ continue;
942
+ }
943
+ if (a === "--preset") {
944
+ const v = args[i + 1];
945
+ if (v) {
946
+ out.preset = v;
947
+ i++;
948
+ }
949
+ continue;
950
+ }
951
+ if (a === "--overwrite") {
952
+ const v = args[i + 1];
953
+ if (v === "skip" || v === "force") {
954
+ out.overwrite = v;
955
+ i++;
956
+ }
957
+ continue;
958
+ }
620
959
  }
621
960
  return out;
622
961
  };
623
- var DEFAULT_RICH_MEDIA_EXTRACTORS = ["pdf-text-layer", "file-text"];
624
- var EXTRACTOR_OPTIONS = [
625
- {
626
- group: "PDF",
627
- value: "pdf-text-layer",
628
- label: `pdf-text-layer (Fast/cheap extraction via PDF text layer)`,
629
- hint: "recommended"
630
- },
631
- {
632
- group: "PDF",
633
- value: "pdf-llm",
634
- label: `pdf-llm (LLM-based PDF extraction; higher cost)`
635
- },
636
- {
637
- group: "PDF",
638
- value: "pdf-ocr",
639
- label: `pdf-ocr (OCR scanned PDFs; requires native binaries)`,
640
- hint: "worker-only"
641
- },
642
- {
643
- group: "Image",
644
- value: "image-ocr",
645
- label: `image-ocr (Extract text from images via vision LLM)`
646
- },
647
- {
648
- group: "Image",
649
- value: "image-caption-llm",
650
- label: `image-caption-llm (Generate captions for images via vision LLM)`
651
- },
652
- {
653
- group: "Audio",
654
- value: "audio-transcribe",
655
- label: `audio-transcribe (Speech-to-text transcription)`
656
- },
657
- {
658
- group: "Video",
659
- value: "video-transcribe",
660
- label: `video-transcribe (Transcribe video audio track)`
661
- },
662
- {
663
- group: "Video",
664
- value: "video-frames",
665
- label: `video-frames (Sample frames + analyze via vision LLM; requires ffmpeg)`,
666
- hint: "worker-only"
667
- },
668
- {
669
- group: "Files",
670
- value: "file-text",
671
- label: `file-text (Extract text/markdown/json/html from common text files)`,
672
- hint: "recommended"
673
- },
674
- {
675
- group: "Files",
676
- value: "file-docx",
677
- label: `file-docx (Extract text from .docx files)`
678
- },
679
- {
680
- group: "Files",
681
- value: "file-pptx",
682
- label: `file-pptx (Extract text from .pptx slides)`
683
- },
684
- {
685
- group: "Files",
686
- value: "file-xlsx",
687
- label: `file-xlsx (Extract tables from .xlsx spreadsheets)`
688
- }
689
- ];
690
- var AVAILABLE_EXTRACTORS = new Set(EXTRACTOR_OPTIONS.map((o) => o.value));
962
+ var toExtractors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
963
+ var toConnectors = (xs) => (Array.isArray(xs) ? xs : []).map((s) => String(s).trim()).filter(Boolean);
691
964
  async function initCommand(args) {
692
965
  const root = await tryFindProjectRoot(process.cwd());
693
966
  if (!root) {
@@ -697,15 +970,42 @@ async function initCommand(args) {
697
970
  if (!cliPackageRoot) {
698
971
  throw new Error("Could not locate CLI package root (package.json not found).");
699
972
  }
700
- const registryRoot = path5.join(cliPackageRoot, "registry");
701
- const existing = await readJsonFile(path5.join(root, CONFIG_FILE));
973
+ const registryRoot = path6.join(cliPackageRoot, "registry");
974
+ const manifest = await readRegistryManifest(registryRoot);
975
+ const extractorOptions = manifest.extractors.map((ex) => {
976
+ const value = ex.id;
977
+ const label = ex.description ? `${ex.label} (${ex.description})` : ex.label;
978
+ return {
979
+ group: ex.group,
980
+ value,
981
+ label,
982
+ hint: ex.hint,
983
+ defaultSelected: Boolean(ex.defaultSelected)
984
+ };
985
+ });
986
+ const availableExtractors = new Set(extractorOptions.map((o) => o.value));
987
+ const defaultRichMediaExtractors = extractorOptions.filter((o) => o.defaultSelected).map((o) => o.value).sort();
988
+ const existing = await readJsonFile(path6.join(root, CONFIG_FILE));
702
989
  const parsed = parseInitArgs(args);
990
+ const preset = parsed.preset ? await fetchPreset(parsed.preset) : null;
991
+ if (preset) {
992
+ const hasOtherChoices = Boolean(parsed.installDir) || Boolean(parsed.storeAdapter) || Boolean(parsed.aliasBase) || typeof parsed.richMedia === "boolean" || (parsed.extractors ?? []).length > 0;
993
+ if (hasOtherChoices) {
994
+ throw new Error('When using "--preset", do not pass other init preference flags (--store/--dir/--alias/--rich-media/--extractors).');
995
+ }
996
+ }
997
+ const presetEmbeddingProvider = (() => {
998
+ const v = preset?.config?.embedding?.provider;
999
+ return v === "ai" || v === "openai" || v === "google" || v === "openrouter" || v === "azure" || v === "vertex" || v === "bedrock" || v === "cohere" || v === "mistral" || v === "together" || v === "ollama" || v === "voyage" || v === "custom" ? v : undefined;
1000
+ })();
703
1001
  const defaults = {
704
- installDir: existing?.installDir ?? "lib/unrag",
705
- storeAdapter: existing?.storeAdapter ?? "drizzle",
706
- aliasBase: existing?.aliasBase ?? "@unrag"
1002
+ installDir: preset?.install?.installDir ?? existing?.installDir ?? "lib/unrag",
1003
+ storeAdapter: preset?.install?.storeAdapter ?? existing?.storeAdapter ?? "drizzle",
1004
+ aliasBase: preset?.install?.aliasBase ?? existing?.aliasBase ?? "@unrag",
1005
+ embeddingProvider: parsed.provider ?? presetEmbeddingProvider ?? existing?.embeddingProvider ?? "ai"
707
1006
  };
708
- const nonInteractive = parsed.yes || !process.stdin.isTTY;
1007
+ const nonInteractive = Boolean(parsed.yes) || Boolean(preset) || !process.stdin.isTTY;
1008
+ const overwritePolicy = parsed.overwrite ?? "skip";
709
1009
  const installDirAnswer = parsed.installDir ? parsed.installDir : nonInteractive ? defaults.installDir : await text({
710
1010
  message: "Install directory",
711
1011
  initialValue: defaults.installDir,
@@ -756,12 +1056,41 @@ async function initCommand(args) {
756
1056
  return;
757
1057
  }
758
1058
  const aliasBase = String(aliasAnswer).trim();
1059
+ const embeddingProviderAnswer = parsed.provider ? parsed.provider : nonInteractive ? defaults.embeddingProvider : await select({
1060
+ message: "Embedding provider",
1061
+ initialValue: defaults.embeddingProvider,
1062
+ options: [
1063
+ { value: "ai", label: "Vercel AI Gateway (AI SDK)", hint: "default" },
1064
+ { value: "openai", label: "OpenAI" },
1065
+ { value: "google", label: "Google AI (Gemini)" },
1066
+ { value: "openrouter", label: "OpenRouter" },
1067
+ { value: "azure", label: "Azure OpenAI" },
1068
+ { value: "vertex", label: "Google Vertex AI" },
1069
+ { value: "bedrock", label: "AWS Bedrock" },
1070
+ { value: "cohere", label: "Cohere" },
1071
+ { value: "mistral", label: "Mistral" },
1072
+ { value: "together", label: "Together.ai" },
1073
+ { value: "ollama", label: "Ollama (local)" },
1074
+ { value: "voyage", label: "Voyage AI" }
1075
+ ]
1076
+ });
1077
+ if (isCancel2(embeddingProviderAnswer)) {
1078
+ cancel2("Cancelled.");
1079
+ return;
1080
+ }
1081
+ const embeddingProvider = embeddingProviderAnswer;
759
1082
  if (parsed.richMedia === false && (parsed.extractors ?? []).length > 0) {
760
1083
  throw new Error('Cannot use "--no-rich-media" together with "--extractors".');
761
1084
  }
762
- const extractorsFromArgs = (parsed.extractors ?? []).filter((x) => AVAILABLE_EXTRACTORS.has(x)).sort();
1085
+ const extractorsFromArgs = (preset ? toExtractors(preset.modules?.extractors) : parsed.extractors ?? []).filter((x) => availableExtractors.has(x)).sort();
1086
+ if (preset) {
1087
+ const unknown = toExtractors(preset.modules?.extractors).filter((x) => !availableExtractors.has(x));
1088
+ if (unknown.length > 0) {
1089
+ throw new Error(`Preset contains unknown extractors: ${unknown.join(", ")}`);
1090
+ }
1091
+ }
763
1092
  const richMediaAnswer = extractorsFromArgs.length > 0 ? true : typeof parsed.richMedia === "boolean" ? parsed.richMedia : nonInteractive ? false : await confirm2({
764
- message: "Enable rich media ingestion (PDF/images/audio/video/files)? This also enables multimodal image embeddings (you can change this later).",
1093
+ message: "Enable rich media ingestion (PDF/images/audio/video/files)? This enables extractor modules and assetProcessing (you can change this later).",
765
1094
  initialValue: false
766
1095
  });
767
1096
  if (isCancel2(richMediaAnswer)) {
@@ -769,9 +1098,9 @@ async function initCommand(args) {
769
1098
  return;
770
1099
  }
771
1100
  const richMediaEnabled = Boolean(richMediaAnswer);
772
- const selectedExtractorsAnswer = richMediaEnabled || extractorsFromArgs.length > 0 ? nonInteractive ? extractorsFromArgs.length > 0 ? extractorsFromArgs : DEFAULT_RICH_MEDIA_EXTRACTORS : await groupMultiselect({
1101
+ const selectedExtractorsAnswer = richMediaEnabled || extractorsFromArgs.length > 0 ? nonInteractive ? extractorsFromArgs.length > 0 ? extractorsFromArgs : defaultRichMediaExtractors.length > 0 ? defaultRichMediaExtractors : ["pdf-text-layer", "file-text"] : await groupMultiselect({
773
1102
  message: "Select extractors to enable (space to toggle, enter to confirm)",
774
- options: EXTRACTOR_OPTIONS.reduce((acc, opt) => {
1103
+ options: extractorOptions.reduce((acc, opt) => {
775
1104
  acc[opt.group] ??= [];
776
1105
  acc[opt.group].push({
777
1106
  value: opt.value,
@@ -780,7 +1109,7 @@ async function initCommand(args) {
780
1109
  });
781
1110
  return acc;
782
1111
  }, {}),
783
- initialValues: extractorsFromArgs.length > 0 ? extractorsFromArgs : DEFAULT_RICH_MEDIA_EXTRACTORS,
1112
+ initialValues: extractorsFromArgs.length > 0 ? extractorsFromArgs : defaultRichMediaExtractors.length > 0 ? defaultRichMediaExtractors : ["pdf-text-layer", "file-text"],
784
1113
  required: false
785
1114
  }) : [];
786
1115
  if (isCancel2(selectedExtractorsAnswer)) {
@@ -794,6 +1123,10 @@ async function initCommand(args) {
794
1123
  projectRoot: root,
795
1124
  registryRoot,
796
1125
  aliasBase,
1126
+ embeddingProvider,
1127
+ yes: nonInteractive,
1128
+ overwrite: overwritePolicy,
1129
+ presetConfig: preset?.config ?? undefined,
797
1130
  richMedia: richMediaEnabled ? {
798
1131
  enabled: true,
799
1132
  extractors: selectedExtractors
@@ -807,12 +1140,14 @@ async function initCommand(args) {
807
1140
  registryRoot,
808
1141
  installDir,
809
1142
  extractor,
810
- yes: nonInteractive
1143
+ yes: nonInteractive,
1144
+ overwrite: overwritePolicy
811
1145
  });
812
1146
  }
813
1147
  }
814
1148
  const pkg = await readPackageJson(root);
815
1149
  const { deps, devDeps } = depsForAdapter(storeAdapterAnswer);
1150
+ const embeddingDeps = depsForEmbeddingProvider(embeddingProvider);
816
1151
  const extractorDeps = {};
817
1152
  const extractorDevDeps = {};
818
1153
  for (const ex of selectedExtractors) {
@@ -820,7 +1155,34 @@ async function initCommand(args) {
820
1155
  Object.assign(extractorDeps, r.deps);
821
1156
  Object.assign(extractorDevDeps, r.devDeps);
822
1157
  }
823
- const merged = mergeDeps(pkg, { ...deps, ...extractorDeps }, { ...devDeps, ...extractorDevDeps });
1158
+ const connectorsFromPreset = preset ? toConnectors(preset.modules?.connectors) : [];
1159
+ const availableConnectorIds = new Set((manifest.connectors ?? []).filter((c) => c.status === "available").map((c) => String(c.id)));
1160
+ if (preset) {
1161
+ const unknown = connectorsFromPreset.filter((c) => !availableConnectorIds.has(c));
1162
+ if (unknown.length > 0) {
1163
+ throw new Error(`Preset contains unknown/unavailable connectors: ${unknown.join(", ")}`);
1164
+ }
1165
+ }
1166
+ if (connectorsFromPreset.length > 0) {
1167
+ for (const connector of connectorsFromPreset) {
1168
+ await copyConnectorFiles({
1169
+ projectRoot: root,
1170
+ registryRoot,
1171
+ installDir,
1172
+ connector,
1173
+ yes: nonInteractive,
1174
+ overwrite: overwritePolicy
1175
+ });
1176
+ }
1177
+ }
1178
+ const connectorDeps = {};
1179
+ const connectorDevDeps = {};
1180
+ for (const c of connectorsFromPreset) {
1181
+ const r = depsForConnector(c);
1182
+ Object.assign(connectorDeps, r.deps);
1183
+ Object.assign(connectorDevDeps, r.devDeps);
1184
+ }
1185
+ const merged = mergeDeps(pkg, { ...deps, ...embeddingDeps.deps, ...extractorDeps, ...connectorDeps }, { ...devDeps, ...embeddingDeps.devDeps, ...extractorDevDeps, ...connectorDevDeps });
824
1186
  if (merged.changes.length > 0) {
825
1187
  await writePackageJson(root, merged.pkg);
826
1188
  }
@@ -828,28 +1190,126 @@ async function initCommand(args) {
828
1190
  installDir,
829
1191
  storeAdapter: storeAdapterAnswer,
830
1192
  aliasBase,
1193
+ embeddingProvider,
831
1194
  version: CONFIG_VERSION,
832
- connectors: existing?.connectors ?? [],
1195
+ connectors: Array.from(new Set([...existing?.connectors ?? [], ...connectorsFromPreset])).sort(),
833
1196
  extractors: Array.from(new Set([
834
1197
  ...existing?.extractors ?? [],
835
1198
  ...richMediaEnabled ? selectedExtractors : []
836
1199
  ])).sort()
837
1200
  };
838
- await writeJsonFile(path5.join(root, CONFIG_FILE), config);
1201
+ await writeJsonFile(path6.join(root, CONFIG_FILE), config);
839
1202
  const pm = await detectPackageManager(root);
840
1203
  const installLine = merged.changes.length > 0 ? `Next: run \`${installCmd(pm)}\`` : "Dependencies already satisfied.";
841
1204
  const isNext = Boolean((merged.pkg.dependencies ?? {})["next"]) || Boolean((merged.pkg.devDependencies ?? {})["next"]);
842
1205
  const tsconfigResult = isNext ? await patchTsconfigPaths({ projectRoot: root, installDir, aliasBase }) : { changed: false };
1206
+ const envHint = (() => {
1207
+ if (embeddingProvider === "ai") {
1208
+ return [
1209
+ "Env:",
1210
+ "- DATABASE_URL=...",
1211
+ "- AI_GATEWAY_API_KEY=...",
1212
+ "- (optional) AI_GATEWAY_MODEL=openai/text-embedding-3-small"
1213
+ ];
1214
+ }
1215
+ if (embeddingProvider === "openai") {
1216
+ return [
1217
+ "Env:",
1218
+ "- DATABASE_URL=...",
1219
+ "- OPENAI_API_KEY=...",
1220
+ "- (optional) OPENAI_EMBEDDING_MODEL=text-embedding-3-small"
1221
+ ];
1222
+ }
1223
+ if (embeddingProvider === "google") {
1224
+ return [
1225
+ "Env:",
1226
+ "- DATABASE_URL=...",
1227
+ "- GOOGLE_GENERATIVE_AI_API_KEY=...",
1228
+ "- (optional) GOOGLE_GENERATIVE_AI_EMBEDDING_MODEL=gemini-embedding-001"
1229
+ ];
1230
+ }
1231
+ if (embeddingProvider === "openrouter") {
1232
+ return [
1233
+ "Env:",
1234
+ "- DATABASE_URL=...",
1235
+ "- OPENROUTER_API_KEY=...",
1236
+ "- (optional) OPENROUTER_EMBEDDING_MODEL=text-embedding-3-small"
1237
+ ];
1238
+ }
1239
+ if (embeddingProvider === "cohere") {
1240
+ return [
1241
+ "Env:",
1242
+ "- DATABASE_URL=...",
1243
+ "- COHERE_API_KEY=...",
1244
+ "- (optional) COHERE_EMBEDDING_MODEL=embed-english-v3.0"
1245
+ ];
1246
+ }
1247
+ if (embeddingProvider === "mistral") {
1248
+ return [
1249
+ "Env:",
1250
+ "- DATABASE_URL=...",
1251
+ "- MISTRAL_API_KEY=...",
1252
+ "- (optional) MISTRAL_EMBEDDING_MODEL=mistral-embed"
1253
+ ];
1254
+ }
1255
+ if (embeddingProvider === "together") {
1256
+ return [
1257
+ "Env:",
1258
+ "- DATABASE_URL=...",
1259
+ "- TOGETHER_AI_API_KEY=...",
1260
+ "- (optional) TOGETHER_AI_EMBEDDING_MODEL=togethercomputer/m2-bert-80M-2k-retrieval"
1261
+ ];
1262
+ }
1263
+ if (embeddingProvider === "voyage") {
1264
+ return [
1265
+ "Env:",
1266
+ "- DATABASE_URL=...",
1267
+ "- VOYAGE_API_KEY=...",
1268
+ "- (optional) VOYAGE_MODEL=voyage-3.5-lite"
1269
+ ];
1270
+ }
1271
+ if (embeddingProvider === "ollama") {
1272
+ return [
1273
+ "Env:",
1274
+ "- DATABASE_URL=...",
1275
+ "- (optional) OLLAMA_EMBEDDING_MODEL=nomic-embed-text"
1276
+ ];
1277
+ }
1278
+ if (embeddingProvider === "azure") {
1279
+ return [
1280
+ "Env:",
1281
+ "- DATABASE_URL=...",
1282
+ "- AZURE_OPENAI_API_KEY=...",
1283
+ "- AZURE_RESOURCE_NAME=...",
1284
+ "- (optional) AZURE_EMBEDDING_MODEL=text-embedding-3-small"
1285
+ ];
1286
+ }
1287
+ if (embeddingProvider === "vertex") {
1288
+ return [
1289
+ "Env:",
1290
+ "- DATABASE_URL=...",
1291
+ "- GOOGLE_APPLICATION_CREDENTIALS=... (when outside GCP)",
1292
+ "- (optional) GOOGLE_VERTEX_EMBEDDING_MODEL=text-embedding-004"
1293
+ ];
1294
+ }
1295
+ return [
1296
+ "Env:",
1297
+ "- DATABASE_URL=...",
1298
+ "- AWS_REGION=... (Bedrock)",
1299
+ "- AWS credentials (when outside AWS)",
1300
+ "- (optional) BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0"
1301
+ ];
1302
+ })();
843
1303
  outro([
844
1304
  "Installed Unrag.",
845
1305
  "",
846
- `- Code: ${path5.join(installDir)}`,
847
- `- Docs: ${path5.join(installDir, "unrag.md")}`,
1306
+ `- Code: ${path6.join(installDir)}`,
1307
+ `- Docs: ${path6.join(installDir, "unrag.md")}`,
848
1308
  `- Config: unrag.config.ts`,
849
1309
  `- Imports: ${aliasBase}/* and ${aliasBase}/config`,
850
1310
  "",
851
1311
  `- Rich media: ${richMediaEnabled ? "enabled" : "disabled"}`,
852
- richMediaEnabled ? `- Embeddings: multimodal enabled (images can be embedded directly)` : `- Embeddings: text-only (no direct image embedding)`,
1312
+ `- Embedding provider: ${embeddingProvider}`,
853
1313
  richMediaEnabled ? `- Extractors: ${selectedExtractors.length > 0 ? selectedExtractors.join(", ") : "none"}` : "",
854
1314
  richMediaEnabled ? ` Tip: you can tweak extractors + assetProcessing flags in unrag.config.ts later.` : ` Tip: re-run \`unrag init --rich-media\` (or edit unrag.config.ts) to enable rich media later.`,
855
1315
  isNext ? tsconfigResult.changed ? `- Next.js: updated ${tsconfigResult.file} (added aliases)` : `- Next.js: no tsconfig changes needed` : `- Next.js: not detected`,
@@ -857,6 +1317,8 @@ async function initCommand(args) {
857
1317
  merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
858
1318
  installLine,
859
1319
  "",
1320
+ ...envHint,
1321
+ "",
860
1322
  `Saved ${CONFIG_FILE}.`
861
1323
  ].join(`
862
1324
  `));
@@ -864,36 +1326,11 @@ async function initCommand(args) {
864
1326
 
865
1327
  // cli/commands/add.ts
866
1328
  import { outro as outro2 } from "@clack/prompts";
867
- import path6 from "node:path";
1329
+ import path7 from "node:path";
868
1330
  import { fileURLToPath as fileURLToPath2 } from "node:url";
869
-
870
- // cli/lib/constants.ts
871
- var UNRAG_SITE_URL = (process.env.UNRAG_SITE_URL ?? process.env.UNRAG_DOCS_BASE_URL)?.trim() || "https://unrag.dev";
872
- var UNRAG_GITHUB_REPO_URL = "https://github.com/BetterStacks/unrag";
873
- function docsUrl(siteRelativePath) {
874
- const p = siteRelativePath.startsWith("/") ? siteRelativePath : `/${siteRelativePath}`;
875
- const base = UNRAG_SITE_URL.endsWith("/") ? UNRAG_SITE_URL : `${UNRAG_SITE_URL}/`;
876
- return new URL(p.replace(/^\/+/, "/"), base).toString();
877
- }
878
-
879
- // cli/commands/add.ts
880
1331
  var CONFIG_FILE2 = "unrag.json";
881
1332
  var __filename3 = fileURLToPath2(import.meta.url);
882
- var __dirname3 = path6.dirname(__filename3);
883
- var AVAILABLE_EXTRACTORS2 = [
884
- "pdf-llm",
885
- "pdf-text-layer",
886
- "pdf-ocr",
887
- "image-ocr",
888
- "image-caption-llm",
889
- "audio-transcribe",
890
- "video-transcribe",
891
- "video-frames",
892
- "file-text",
893
- "file-docx",
894
- "file-pptx",
895
- "file-xlsx"
896
- ];
1333
+ var __dirname3 = path7.dirname(__filename3);
897
1334
  var parseAddArgs = (args) => {
898
1335
  const out = {};
899
1336
  for (let i = 0;i < args.length; i++) {
@@ -926,36 +1363,39 @@ async function addCommand(args) {
926
1363
  const parsed = parseAddArgs(args);
927
1364
  const kind = parsed.kind ?? "connector";
928
1365
  const name = parsed.name;
1366
+ const configPath = path7.join(root, CONFIG_FILE2);
1367
+ const config = await readJsonFile(configPath);
1368
+ if (!config?.installDir) {
1369
+ throw new Error(`Missing ${CONFIG_FILE2}. Run \`unrag@latest init\` first.`);
1370
+ }
1371
+ const cliPackageRoot = await findUp(__dirname3, "package.json");
1372
+ if (!cliPackageRoot) {
1373
+ throw new Error("Could not locate CLI package root (package.json not found).");
1374
+ }
1375
+ const registryRoot = path7.join(cliPackageRoot, "registry");
1376
+ const manifest = await readRegistryManifest(registryRoot);
1377
+ const availableExtractors = new Set(manifest.extractors.map((e) => e.id));
1378
+ const availableConnectors = new Set(manifest.connectors.filter((c) => c.status === "available").map((c) => c.id));
929
1379
  if (!name) {
930
1380
  outro2([
931
1381
  "Usage:",
932
1382
  " unrag add <connector>",
933
1383
  " unrag add extractor <name>",
934
1384
  "",
935
- "Available connectors: notion",
936
- `Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`
1385
+ `Available connectors: ${Array.from(availableConnectors).join(", ")}`,
1386
+ `Available extractors: ${Array.from(availableExtractors).join(", ")}`
937
1387
  ].join(`
938
1388
  `));
939
1389
  return;
940
1390
  }
941
- const configPath = path6.join(root, CONFIG_FILE2);
942
- const config = await readJsonFile(configPath);
943
- if (!config?.installDir) {
944
- throw new Error(`Missing ${CONFIG_FILE2}. Run \`unrag@latest init\` first.`);
945
- }
946
- const cliPackageRoot = await findUp(__dirname3, "package.json");
947
- if (!cliPackageRoot) {
948
- throw new Error("Could not locate CLI package root (package.json not found).");
949
- }
950
- const registryRoot = path6.join(cliPackageRoot, "registry");
951
1391
  const nonInteractive = parsed.yes || !process.stdin.isTTY;
952
1392
  const pkg = await readPackageJson(root);
953
1393
  if (kind === "connector") {
954
1394
  const connector = name;
955
- if (connector !== "notion") {
1395
+ if (!connector || !availableConnectors.has(connector)) {
956
1396
  outro2(`Unknown connector: ${name}
957
1397
 
958
- Available connectors: notion`);
1398
+ Available connectors: ${Array.from(availableConnectors).join(", ")}`);
959
1399
  return;
960
1400
  }
961
1401
  await copyConnectorFiles({
@@ -975,20 +1415,20 @@ Available connectors: notion`);
975
1415
  outro2([
976
1416
  `Installed connector: ${connector}.`,
977
1417
  "",
978
- `- Code: ${path6.join(config.installDir, "connectors", connector)}`,
1418
+ `- Code: ${path7.join(config.installDir, "connectors", connector)}`,
979
1419
  `- Docs: ${docsUrl(`/docs/connectors/${connector}`)}`,
980
1420
  "",
981
1421
  merged2.changes.length > 0 ? `Added deps: ${merged2.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
982
- nonInteractive ? "" : "Tip: keep NOTION_TOKEN server-side only (env var)."
1422
+ nonInteractive ? "" : connector === "notion" ? "Tip: keep NOTION_TOKEN server-side only (env var)." : connector === "google-drive" ? "Tip: keep Google OAuth refresh tokens and service account keys server-side only." : ""
983
1423
  ].filter(Boolean).join(`
984
1424
  `));
985
1425
  return;
986
1426
  }
987
1427
  const extractor = name;
988
- if (!extractor || !AVAILABLE_EXTRACTORS2.includes(extractor)) {
1428
+ if (!extractor || !availableExtractors.has(extractor)) {
989
1429
  outro2(`Unknown extractor: ${name}
990
1430
 
991
- Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`);
1431
+ Available extractors: ${Array.from(availableExtractors).join(", ")}`);
992
1432
  return;
993
1433
  }
994
1434
  await copyExtractorFiles({
@@ -1008,7 +1448,7 @@ Available extractors: ${AVAILABLE_EXTRACTORS2.join(", ")}`);
1008
1448
  outro2([
1009
1449
  `Installed extractor: ${extractor}.`,
1010
1450
  "",
1011
- `- Code: ${path6.join(config.installDir, "extractors", extractor)}`,
1451
+ `- Code: ${path7.join(config.installDir, "extractors", extractor)}`,
1012
1452
  "",
1013
1453
  merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
1014
1454
  "",
@@ -1028,7 +1468,7 @@ function renderHelp() {
1028
1468
  "",
1029
1469
  "Commands:",
1030
1470
  " init Install core files (config + store adapter templates)",
1031
- " add <connector> Install a connector (currently: notion)",
1471
+ " add <connector> Install a connector (notion, google-drive)",
1032
1472
  " help Show this help",
1033
1473
  "",
1034
1474
  "Global options:",
@@ -1039,6 +1479,8 @@ function renderHelp() {
1039
1479
  " --store <adapter> drizzle | prisma | raw-sql",
1040
1480
  " --dir <path> Install directory (alias: --install-dir)",
1041
1481
  " --alias <@name> Import alias base (e.g. @unrag)",
1482
+ " --preset <id|url> Install from a web-generated preset (non-interactive)",
1483
+ " --overwrite <mode> skip | force (when files already exist)",
1042
1484
  " --rich-media Enable rich media setup (also enables multimodal embeddings)",
1043
1485
  " --no-rich-media Disable rich media setup",
1044
1486
  " --extractors <list> Comma-separated extractors (implies --rich-media)",