@msbayindir/context-rag 1.0.0-beta.10 → 1.0.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -324,6 +324,12 @@ npx @msbayindir/context-rag init --force
324
324
 
325
325
  # Check setup status (Prisma models, pgvector, env variables)
326
326
  npx @msbayindir/context-rag status
327
+
328
+ # Check for embedding model mismatches
329
+ npx @msbayindir/context-rag check-embeddings
330
+
331
+ # Re-index documents (useful after changing embedding models)
332
+ npx @msbayindir/context-rag reindex --concurrency 5
327
333
  ```
328
334
 
329
335
  ---
@@ -593,35 +599,153 @@ await rag.ingest({
593
599
 
594
600
  ## ⚙️ Configuration
595
601
 
602
+ ```typescript
603
+ ## ⚙️ Configuration Reference
604
+
605
+ Context-RAG is highly configurable. Below is the complete list of all available options.
606
+
596
607
  ```typescript
597
608
  const rag = new ContextRAG({
598
- // Required
609
+ // ============================================
610
+ // CORE CONFIGURATION (Required)
611
+ // ============================================
612
+
613
+ /** Your initialized Prisma client instance */
599
614
  prisma: prismaClient,
600
- geminiApiKey: 'your-api-key',
601
615
 
602
- // Model selection
603
- model: 'gemini-3-flash-preview',
604
- embeddingModel: 'gemini-embedding-exp-03-07',
616
+ /** Gemini API Key (Required for generation and default embeddings) */
617
+ geminiApiKey: process.env.GEMINI_API_KEY!,
618
+
619
+ // ============================================
620
+ // MODEL SELECTION
621
+ // ============================================
605
622
 
606
- // Generation
623
+ /**
624
+ * Main LLM model for generation, orchestration, and RAG enhancement.
625
+ * Default: 'gemini-1.5-pro'
626
+ */
627
+ model: 'gemini-1.5-pro', // Options: 'gemini-1.5-flash', 'gemini-2.0-flash-exp', etc.
628
+
629
+ /**
630
+ * Configuration for the LLM generation (temperature, tokens, etc.)
631
+ */
607
632
  generationConfig: {
608
- temperature: 0.2,
609
- maxOutputTokens: 16384,
633
+ temperature: 0.3, // Creativity (0.0 - 1.0). Lower is more deterministic.
634
+ maxOutputTokens: 8192, // Maximum length of the generated response.
635
+ },
636
+
637
+ // ============================================
638
+ // EMBEDDING PROVIDER (Optional)
639
+ // ============================================
640
+
641
+ /**
642
+ * Choose your embedding provider.
643
+ * Default: Uses Gemini 'text-embedding-004'
644
+ */
645
+ embeddingProvider: {
646
+ // Provider: 'gemini' | 'openai' | 'cohere'
647
+ provider: 'openai',
648
+
649
+ // Model name (specific to the provider)
650
+ model: 'text-embedding-3-small',
651
+
652
+ // API Key (if different from geminiApiKey)
653
+ apiKey: process.env.OPENAI_API_KEY,
610
654
  },
611
655
 
612
- // Batch processing
656
+ // ============================================
657
+ // SYSTEM CONFIGURATION
658
+ // ============================================
659
+
660
+ /**
661
+ * Batch processing settings for ingestion.
662
+ * Adjust these based on your API rate limits.
663
+ */
613
664
  batchConfig: {
614
- pagesPerBatch: 15,
615
- maxConcurrency: 3,
616
- maxRetries: 3,
665
+ pagesPerBatch: 15, // How many pages to process in one go (Default: 15)
666
+ maxConcurrency: 3, // How many batches to run in parallel (Default: 3)
667
+ maxRetries: 3, // Retry failed batches (Default: 3)
668
+ retryDelayMs: 1000, // Initial delay before retry (Default: 1000ms)
669
+ backoffMultiplier: 2, // Exponential backoff factor (Default: 2)
670
+ },
671
+
672
+ /**
673
+ * Settings for splitting text into vector chunks.
674
+ */
675
+ chunkConfig: {
676
+ maxTokens: 500, // Maximum size of a single chunk (Default: 500)
677
+ overlapTokens: 50, // Overlap between chunks to preserve continuity (Default: 50)
678
+ },
679
+
680
+ /**
681
+ * API Rate Limiting protection.
682
+ */
683
+ rateLimitConfig: {
684
+ requestsPerMinute: 60, // Max RPM allowed (Default: 60)
685
+ adaptive: true, // Automatically slow down if 429 errors occur (Default: true)
686
+ },
687
+
688
+ /**
689
+ * System logging configuration.
690
+ */
691
+ logging: {
692
+ level: 'info', // 'debug' | 'info' | 'warn' | 'error'
693
+ structured: true, // Use JSON format for logs (Best for production tools like Datadog/CloudWatch)
617
694
  },
618
695
 
619
- // RAG Enhancement
696
+ // ============================================
697
+ // ADVANCED FEATURES
698
+ // ============================================
699
+
700
+ /**
701
+ * Reranking improves search relevance by re-scoring results.
702
+ */
703
+ rerankingConfig: {
704
+ enabled: true, // Enable automatic reranking (Default: false)
705
+ provider: 'cohere', // 'gemini' or 'cohere' (Cohere is recommended for best results)
706
+ cohereApiKey: process.env.COHERE_API_KEY, // Required if provider is 'cohere'
707
+ defaultCandidates: 50, // Retrieve top 50 from Vector DB...
708
+ defaultTopK: 10, // ...and return top 10 after reranking.
709
+ },
710
+
711
+ /**
712
+ * RAG Enhancement (Contextual Retrieval).
713
+ * Adds context to chunks before embedding them.
714
+ */
620
715
  ragEnhancement: {
716
+ // Approach: 'anthropic_contextual' (Recommended) or 'none'
621
717
  approach: 'anthropic_contextual',
622
- strategy: 'simple',
623
- skipChunkTypes: ['HEADING'],
718
+
719
+ // Strategy: 'llm' (Best Quality) or 'simple' (Template based)
720
+ strategy: 'llm',
721
+
722
+ // Model to use for generating context (Optional, defaults to main model)
723
+ // Tip: Use a cheaper model here (e.g., 'gemini-1.5-flash') to save costs.
724
+ model: 'gemini-1.5-flash',
725
+
726
+ // Prompt used to generate context (Optional, has good default)
727
+ contextPrompt: 'Situate this chunk within the document...',
728
+
729
+ // Don't waste tokens generating context for these types
730
+ skipChunkTypes: ['HEADING', 'IMAGE_REF', 'CODE'],
624
731
  },
732
+
733
+ /**
734
+ * Enable Structured Output (JSON Schema) for reliable parsing.
735
+ * Disable only if you are using a model that doesn't support it well.
736
+ * Default: true
737
+ */
738
+ useStructuredOutput: true,
739
+
740
+ /**
741
+ * Custom Chunk Type Mapping.
742
+ * Map your custom extraction types to system types for proper handling.
743
+ */
744
+ chunkTypeMapping: {
745
+ 'RECIPE': 'TEXT', // Treat 'RECIPE' as normal text
746
+ 'INGREDIENT_LIST': 'LIST', // Treat 'INGREDIENT_LIST' as a list
747
+ 'NUTRITIONAL_INFO': 'TABLE' // Treat 'NUTRITIONAL_INFO' as a table
748
+ }
625
749
  });
626
750
  ```
627
751
 
package/dist/bin/cli.cjs CHANGED
@@ -26,8 +26,111 @@ function _interopNamespace(e) {
26
26
  var fs__namespace = /*#__PURE__*/_interopNamespace(fs);
27
27
  var path__namespace = /*#__PURE__*/_interopNamespace(path);
28
28
 
29
+ var __defProp = Object.defineProperty;
30
+ var __getOwnPropNames = Object.getOwnPropertyNames;
31
+ var __esm = (fn, res) => function __init() {
32
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
33
+ };
34
+ var __export = (target, all) => {
35
+ for (var name in all)
36
+ __defProp(target, name, { get: all[name], enumerable: true });
37
+ };
38
+
39
+ // src/utils/embedding-utils.ts
40
+ var embedding_utils_exports = {};
41
+ __export(embedding_utils_exports, {
42
+ buildEmbeddingModelId: () => buildEmbeddingModelId,
43
+ checkEmbeddingMismatch: () => checkEmbeddingMismatch,
44
+ detectEmbeddingMismatch: () => detectEmbeddingMismatch,
45
+ getEmbeddingModelStats: () => getEmbeddingModelStats
46
+ });
47
+ async function detectEmbeddingMismatch(prisma, currentProvider) {
48
+ const result = await checkEmbeddingMismatch(prisma, currentProvider);
49
+ if (!result.hasMismatch) {
50
+ return null;
51
+ }
52
+ const severity = determineSeverity(result);
53
+ const message = buildMismatchMessage(result, severity);
54
+ return {
55
+ severity,
56
+ message,
57
+ details: result,
58
+ action: severity === "critical" ? "reindex-required" : "reindex"
59
+ };
60
+ }
61
+ async function checkEmbeddingMismatch(prisma, currentProvider) {
62
+ const stats = await getEmbeddingModelStats(prisma);
63
+ const totalChunks = await prisma.contextRagChunk.count();
64
+ let chunksToMigrate = 0;
65
+ for (const stat of stats) {
66
+ if (stat.model !== currentProvider.id) {
67
+ chunksToMigrate += stat.count;
68
+ }
69
+ }
70
+ const hasMismatch = chunksToMigrate > 0;
71
+ return {
72
+ hasMismatch,
73
+ currentModel: currentProvider.id,
74
+ currentProvider: extractProviderType(currentProvider.id),
75
+ currentDimension: currentProvider.dimension,
76
+ existingModels: stats,
77
+ chunksToMigrate,
78
+ totalChunks
79
+ };
80
+ }
81
+ async function getEmbeddingModelStats(prisma) {
82
+ const results = await prisma.$queryRaw`
83
+ SELECT
84
+ embedding_model,
85
+ embedding_dimension,
86
+ COUNT(*) as count
87
+ FROM context_rag_chunks
88
+ GROUP BY embedding_model, embedding_dimension
89
+ ORDER BY count DESC
90
+ `;
91
+ return results.map((r) => ({
92
+ model: r.embedding_model,
93
+ dimension: r.embedding_dimension,
94
+ count: Number(r.count)
95
+ }));
96
+ }
97
+ function determineSeverity(result) {
98
+ if (!result.hasMismatch) {
99
+ return "none";
100
+ }
101
+ for (const stat of result.existingModels) {
102
+ if (stat.dimension !== null && stat.dimension !== result.currentDimension) {
103
+ return "critical";
104
+ }
105
+ }
106
+ const mismatchPercentage = result.chunksToMigrate / result.totalChunks * 100;
107
+ if (mismatchPercentage > 50) {
108
+ return "critical";
109
+ }
110
+ return "warning";
111
+ }
112
+ function buildMismatchMessage(result, severity) {
113
+ const percentage = Math.round(result.chunksToMigrate / result.totalChunks * 100);
114
+ if (severity === "critical") {
115
+ return `\u26A0\uFE0F CRITICAL: ${result.chunksToMigrate} chunks (${percentage}%) were created with different embedding models. Current: ${result.currentModel} (${result.currentDimension}d). Search results may be inaccurate. Run 'npx context-rag reindex' to fix.`;
116
+ }
117
+ return `\u26A1 Warning: ${result.chunksToMigrate} chunks (${percentage}%) may have outdated embeddings. Current model: ${result.currentModel}. Consider running 'npx context-rag reindex' for optimal results.`;
118
+ }
119
+ function extractProviderType(modelId) {
120
+ if (modelId.startsWith("gemini")) return "gemini";
121
+ if (modelId.startsWith("openai") || modelId.includes("text-embedding")) return "openai";
122
+ if (modelId.startsWith("cohere") || modelId.includes("embed-")) return "cohere";
123
+ return "gemini";
124
+ }
125
+ function buildEmbeddingModelId(provider, model) {
126
+ return `${provider}-${model}`;
127
+ }
128
+ var init_embedding_utils = __esm({
129
+ "src/utils/embedding-utils.ts"() {
130
+ }
131
+ });
29
132
  var program = new commander.Command();
30
- program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.1");
133
+ program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.11");
31
134
  program.command("init").description("Initialize Context-RAG in your project").option("-f, --force", "Overwrite existing files").action(async (options) => {
32
135
  console.log("\u{1F680} Initializing Context-RAG...\n");
33
136
  try {
@@ -237,6 +340,42 @@ program.command("status").description("Check Context-RAG setup status").action(a
237
340
  console.log(` ${process.env["COHERE_API_KEY"] ? "\u2705" : "\u26AA"} COHERE_API_KEY (optional)`);
238
341
  console.log();
239
342
  });
343
+ program.command("check-embeddings").description("Check for embedding model mismatch between config and database").action(async () => {
344
+ console.log("\u{1F50D} Checking embedding model status...\n");
345
+ try {
346
+ void await Promise.resolve().then(() => (init_embedding_utils(), embedding_utils_exports));
347
+ console.log("\u26A0\uFE0F Full mismatch detection requires database connection.");
348
+ console.log(" Use this command programmatically with your Prisma client.\n");
349
+ console.log("Example:");
350
+ console.log(' import { detectEmbeddingMismatch } from "@msbayindir/context-rag";');
351
+ console.log(" const mismatch = await detectEmbeddingMismatch(prisma, provider);");
352
+ console.log();
353
+ } catch (error) {
354
+ console.error("\u274C Error:", error.message);
355
+ process.exit(1);
356
+ }
357
+ });
358
+ program.command("reindex").description("Re-index all chunks with current embedding model").option("-c, --concurrency <number>", "Number of concurrent embedding calls", "5").option("-b, --batch-size <number>", "Batch size for processing", "50").option("-d, --document-id <id>", "Re-index specific document only").action(async (options) => {
359
+ console.log("\u{1F504} Starting re-indexing operation...\n");
360
+ console.log("Options:");
361
+ console.log(` Concurrency: ${options.concurrency}`);
362
+ console.log(` Batch size: ${options.batchSize}`);
363
+ if (options.documentId) {
364
+ console.log(` Document ID: ${options.documentId}`);
365
+ }
366
+ console.log();
367
+ console.log("\u26A0\uFE0F Re-indexing requires database connection and embedding provider.");
368
+ console.log(" Use this command programmatically:\n");
369
+ console.log("Example:");
370
+ console.log(' import { MigrationService } from "@msbayindir/context-rag";');
371
+ console.log(" const migrationService = new MigrationService(prisma, provider, config, logger);");
372
+ console.log(" const result = await migrationService.reindex({");
373
+ console.log(` concurrency: ${options.concurrency},`);
374
+ console.log(` batchSize: ${options.batchSize},`);
375
+ console.log(" onProgress: (p) => console.log(`${p.processed}/${p.total}`)");
376
+ console.log(" });");
377
+ console.log();
378
+ });
240
379
  program.parse();
241
380
  //# sourceMappingURL=cli.cjs.map
242
381
  //# sourceMappingURL=cli.cjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/bin/cli.ts"],"names":["Command","path","fs"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAMA,IAAM,OAAA,GAAU,IAAIA,iBAAA,EAAQ;AAE5B,OAAA,CACK,KAAK,aAAa,CAAA,CAClB,YAAY,8CAA8C,CAAA,CAC1D,QAAQ,cAAc,CAAA;AAE3B,OAAA,CACK,OAAA,CAAQ,MAAM,CAAA,CACd,WAAA,CAAY,wCAAwC,CAAA,CACpD,MAAA,CAAO,aAAA,EAAe,0BAA0B,CAAA,CAChD,MAAA,CAAO,OAAO,OAAA,KAAY;AACvB,EAAA,OAAA,CAAQ,IAAI,yCAAkC,CAAA;AAE9C,EAAA,IAAI;AAEA,IAAA,MAAM,SAAA,GAAiBC,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,IAAO,QAAQ,CAAA;AACnD,IAAA,MAAM,UAAA,GAAkBA,eAAA,CAAA,IAAA,CAAK,SAAA,EAAW,eAAe,CAAA;AAEvD,IAAA,IAAI,YAAA,GAAe,KAAA;AACnB,IAAA,IAAI;AACA,MAAA,MAASC,qBAAO,UAAU,CAAA;AAC1B,MAAA,YAAA,GAAe,IAAA;AAAA,IACnB,CAAA,CAAA,MAAQ;AACJ,MAAA,YAAA,GAAe,KAAA;AAAA,IACnB;AAEA,IAAA,IAAI,CAAC,YAAA,EAAc;AACf,MAAA,OAAA,CAAQ,IAAI,wDAAmD,CAAA;AAC/D,MAAA,OAAA,CAAQ,IAAI,0CAA0C,CAAA;AACtD,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,MAAM,cAAA,GAAiB,MAASA,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAG5D,IAAA,IAAI,eAAe,QAAA,CAAS,iBAAiB,CAAA,IAAK,CAAC,QAAQ,KAAA,EAAO;AAC9D,MAAA,OAAA,CAAQ,IAAI,2DAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,IAAI,CAAC,cAAA,CAAe,QAAA,CAAS,sBAAsB,CAAA,EAAG;AAClD,MAAA,OAAA,CAAQ,IAAI,wDAA8C,CAAA;AAC1D,MAAA,OAAA,CAAQ,IAAI,+CAA+C,CAAA;AAC3D,MAAA,OAAA,CAAQ,IAAI,uBAAuB,CAAA;AACnC,MAAA,OAAA,CAAQ,IAAI,oCAAoC,CAAA;AAChD,MAAA,OAAA,CAAQ,IAAI,iDAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AACpB,MAAA,OAAA,CAAQ,IAAI,oBAAoB,CAAA;AAChC,MAAA,OAAA,CAAQ,IAAI,8BAA8B,CAAA;AAC1C,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,IAAI,4BAA4B,CAAA;AACxC,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AAAA,IACxB;AAGA,IAAA,MAAM,gBAAA,GAAmB;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,CAAA;AAwIzB,IAAA,IAAI,SAAA,GAAY,cAAA;AAChB,IAAA,IAAI,OAAA,CAAQ,KAAA,IAAS,cAAA,CAAe,QAAA,CAAS,uBAAuB,CAAA,EAAG;AACnE,MAAA,MAAM,WAAA,GAAc,wEAAA;AACpB,MAAA,MAAM,UAAA,GAAa,SAAA,CAAU,OAAA,CAAQ,WAAW,CAAA;AAChD,MAAA,IAAI,eAAe,CAAA,CAAA,EAAI;AACnB,QAAA,SAAA,GAAY,SAAA,CAAU,SAAA,CAAU,CAAA,EAAG,UAAU,EAAE,IAAA,EAAK;AAAA,MACxD;AAAA,IACJ;AAGA,IAAA,SAAA,GAAY,SAAA,CAAU,IAAA,EAAK,GAAI,IAAA,GAAO,gBAAA;AAGtC,IAAA,MAASA,aAAA,CAAA,SAAA,CAAU,YAAY,SAAS,CAAA;AAExC,IAAA,OAAA,CAAQ,IAAI,2DAAsD,CAAA;AAClE,IAAA,OAAA,CAAQ,IAAI,aAAa,CAAA;AACzB,IAAA,OAAA,CAAQ,IAAI,yDAAyD,CAAA;AACrE,IAAA,OAAA,CAAQ,IAAI,4EAA4E,CAAA;AACxF,IAAA,OAAA,CAAQ,IAAI,iCAAiC,CAAA;AAAA,EAEjD,SAAS,KAAA,EAAO;AACZ,IAAA,OAAA,CAAQ,KAAA,CAAM,eAAA,EAAa,KAAA,CAAgB,OAAO,CAAA;AAClD,IAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,EAClB;AACJ,CAAC,CAAA;AAEL,OAAA,CACK,QAAQ,QAAQ,CAAA,CAChB,YAAY,gCAAgC,CAAA,CAC5C,OAAO,YAAY;AAChB,EAAA,OAAA,CAAQ,IAAI,4CAAqC,CAAA;AAGjD,EAAA,MAAM,aAAkBD,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,EAAI,EAAG,UAAU,eAAe,CAAA;AACrE,EAAA,IAAI;AACA,IAAA,MAAM,MAAA,GAAS,MAASC,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAEpD,IAAA,OAAA,CAAQ,IAAI,gBAAgB,CAAA;AAC5B,IAAA,OAAA,CAAQ,IAAI,CAAA,4BAAA,CAAyB,CAAA;AACrC,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,iBAAiB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACpF,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,sBAAsB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACzF,IAAA,OAAA,CAAQ,GAAA,EAAI;AAAA,EAChB,CAAA,CAAA,MAAQ;AACJ,IAAA,OAAA,CAAQ,IAAI,yCAAoC,CAAA;AAAA,EACpD;AAIA,EAAA,OAAA,CAAQ,IAAI,cAAc,CAAA;AAC1B,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,cAAc,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,aAAA,CAAe,CAAA;AACvE,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,eAAA,CAAiB,CAAA;AAC3E,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,0BAAA,CAA4B,CAAA;AACtF,EAAA,OAAA,CAAQ,GAAA,EAAI;AAChB,CAAC,CAAA;AAEL,OAAA,CAAQ,KAAA,EAAM","file":"cli.cjs","sourcesContent":["#!/usr/bin/env node\r\n\r\nimport { Command } from 'commander';\r\nimport * as fs from 'fs/promises';\r\nimport * as path from 'path';\r\n\r\nconst program = new Command();\r\n\r\nprogram\r\n .name('context-rag')\r\n .description('Context-RAG CLI - Setup and management tools')\r\n .version('1.0.0-beta.1');\r\n\r\nprogram\r\n .command('init')\r\n .description('Initialize Context-RAG in your project')\r\n .option('-f, --force', 'Overwrite existing files')\r\n .action(async (options) => {\r\n console.log('🚀 Initializing Context-RAG...\\n');\r\n\r\n try {\r\n // Check if prisma directory exists\r\n const prismaDir = path.join(process.cwd(), 'prisma');\r\n const schemaPath = path.join(prismaDir, 'schema.prisma');\r\n\r\n let schemaExists = false;\r\n try {\r\n await fs.access(schemaPath);\r\n schemaExists = true;\r\n } catch {\r\n schemaExists = false;\r\n }\r\n\r\n if (!schemaExists) {\r\n console.log('❌ Prisma schema not found at prisma/schema.prisma');\r\n console.log(' Please run `npx prisma init` first.\\n');\r\n process.exit(1);\r\n }\r\n\r\n // Read existing schema\r\n const existingSchema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n // Check if Context-RAG models already exist\r\n if (existingSchema.includes('ContextRagChunk') && !options.force) {\r\n console.log('⚠️ Context-RAG models already exist in schema.');\r\n console.log(' Use --force to overwrite.\\n');\r\n process.exit(0);\r\n }\r\n\r\n // Check for pgvector extension\r\n if (!existingSchema.includes('postgresqlExtensions')) {\r\n console.log('⚠️ Warning: pgvector extension not enabled.');\r\n console.log(' Add the following to your schema.prisma:\\n');\r\n console.log(' generator client {');\r\n console.log(' provider = \"prisma-client-js\"');\r\n console.log(' previewFeatures = [\"postgresqlExtensions\"]');\r\n console.log(' }\\n');\r\n console.log(' datasource db {');\r\n console.log(' provider = \"postgresql\"');\r\n console.log(' url = env(\"DATABASE_URL\")');\r\n console.log(' extensions = [vector]');\r\n console.log(' }\\n');\r\n }\r\n\r\n // Context-RAG models to append\r\n const contextRagModels = `\r\n// ============================================\r\n// Context-RAG Models\r\n// ============================================\r\n\r\n/// Stores prompt configurations for different document types\r\nmodel ContextRagPromptConfig {\r\n id String @id @default(uuid())\r\n documentType String @map(\"document_type\")\r\n name String\r\n systemPrompt String @map(\"system_prompt\") @db.Text\r\n chunkStrategy Json @map(\"chunk_strategy\")\r\n version Int @default(1)\r\n isActive Boolean @default(true) @map(\"is_active\")\r\n isDefault Boolean @default(false) @map(\"is_default\")\r\n createdBy String? @map(\"created_by\")\r\n changeLog String? @map(\"change_log\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n chunks ContextRagChunk[]\r\n\r\n @@unique([documentType, version])\r\n @@index([documentType, isActive])\r\n @@map(\"context_rag_prompt_configs\")\r\n}\r\n\r\n/// Stores vector chunks for semantic search\r\nmodel ContextRagChunk {\r\n id String @id @default(uuid())\r\n promptConfigId String @map(\"prompt_config_id\")\r\n promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)\r\n documentId String @map(\"document_id\")\r\n chunkIndex Int @map(\"chunk_index\")\r\n chunkType String @map(\"chunk_type\")\r\n\r\n /// Plain text content optimized for vector search\r\n searchContent String @map(\"search_content\") @db.Text\r\n\r\n /// Enriched content: context + searchContent (for RAG enhancement)\r\n enrichedContent String? @map(\"enriched_content\") @db.Text\r\n\r\n /// AI-generated context text only (for debugging)\r\n contextText String? @map(\"context_text\") @db.Text\r\n\r\n /// Vector embedding (768 dimensions for Gemini)\r\n searchVector Unsupported(\"vector(768)\") @map(\"search_vector\")\r\n\r\n /// Rich Markdown content for display\r\n displayContent String @map(\"display_content\") @db.Text\r\n\r\n sourcePageStart Int @map(\"source_page_start\")\r\n sourcePageEnd Int @map(\"source_page_end\")\r\n confidenceScore Float @default(0.5) @map(\"confidence_score\")\r\n metadata Json\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n\r\n @@index([promptConfigId])\r\n @@index([documentId])\r\n @@index([chunkType])\r\n @@index([confidenceScore])\r\n @@map(\"context_rag_chunks\")\r\n}\r\n\r\n/// Tracks document processing state\r\nmodel ContextRagDocument {\r\n id String @id @default(uuid())\r\n filename String\r\n fileHash String @map(\"file_hash\")\r\n fileSize Int @map(\"file_size\")\r\n pageCount Int @map(\"page_count\")\r\n documentType String? @map(\"document_type\")\r\n\r\n /// Experiment identifier for A/B testing models\r\n experimentId String? @map(\"experiment_id\")\r\n\r\n /// AI model used for processing\r\n modelName String? @map(\"model_name\")\r\n\r\n /// Model configuration as JSON\r\n modelConfig Json? @map(\"model_config\")\r\n\r\n status String @default(\"PENDING\")\r\n\r\n promptConfigId String? @map(\"prompt_config_id\")\r\n totalBatches Int @default(0) @map(\"total_batches\")\r\n completedBatches Int @default(0) @map(\"completed_batches\")\r\n failedBatches Int @default(0) @map(\"failed_batches\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n errorMessage String? @map(\"error_message\")\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n\r\n batches ContextRagBatch[]\r\n\r\n @@unique([fileHash, experimentId])\r\n @@index([status])\r\n @@index([fileHash])\r\n @@index([documentType])\r\n @@index([experimentId])\r\n @@map(\"context_rag_documents\")\r\n}\r\n\r\n/// Tracks individual batch processing jobs\r\nmodel ContextRagBatch {\r\n id String @id @default(uuid())\r\n documentId String @map(\"document_id\")\r\n document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)\r\n\r\n batchIndex Int @map(\"batch_index\")\r\n pageStart Int @map(\"page_start\")\r\n pageEnd Int @map(\"page_end\")\r\n status String @default(\"PENDING\")\r\n retryCount Int @default(0) @map(\"retry_count\")\r\n lastError String? @map(\"last_error\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n\r\n startedAt DateTime? @map(\"started_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n @@unique([documentId, batchIndex])\r\n @@index([documentId, status])\r\n @@index([status])\r\n @@map(\"context_rag_batches\")\r\n}\r\n`;\r\n\r\n // Remove existing Context-RAG models if force\r\n let newSchema = existingSchema;\r\n if (options.force && existingSchema.includes('// Context-RAG Models')) {\r\n const startMarker = '// ============================================\\n// Context-RAG Models';\r\n const startIndex = newSchema.indexOf(startMarker);\r\n if (startIndex !== -1) {\r\n newSchema = newSchema.substring(0, startIndex).trim();\r\n }\r\n }\r\n\r\n // Append new models\r\n newSchema = newSchema.trim() + '\\n' + contextRagModels;\r\n\r\n // Write updated schema\r\n await fs.writeFile(schemaPath, newSchema);\r\n\r\n console.log('✅ Context-RAG models added to prisma/schema.prisma\\n');\r\n console.log('Next steps:');\r\n console.log(' 1. Run: npx prisma migrate dev --name add_context_rag');\r\n console.log(' 2. Enable pgvector in PostgreSQL: CREATE EXTENSION IF NOT EXISTS vector;');\r\n console.log(' 3. Start using Context-RAG!\\n');\r\n\r\n } catch (error) {\r\n console.error('❌ Error:', (error as Error).message);\r\n process.exit(1);\r\n }\r\n });\r\n\r\nprogram\r\n .command('status')\r\n .description('Check Context-RAG setup status')\r\n .action(async () => {\r\n console.log('🔍 Checking Context-RAG status...\\n');\r\n\r\n // Check schema\r\n const schemaPath = path.join(process.cwd(), 'prisma', 'schema.prisma');\r\n try {\r\n const schema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n console.log('Prisma Schema:');\r\n console.log(` ✅ schema.prisma found`);\r\n console.log(` ${schema.includes('ContextRagChunk') ? '✅' : '❌'} Context-RAG models`);\r\n console.log(` ${schema.includes('postgresqlExtensions') ? '✅' : '❌'} pgvector extension`);\r\n console.log();\r\n } catch {\r\n console.log('❌ prisma/schema.prisma not found\\n');\r\n }\r\n\r\n // Check env - note: these are checked at runtime, not via centralized env\r\n // since CLI may run before env is fully configured\r\n console.log('Environment:');\r\n console.log(` ${process.env['DATABASE_URL'] ? '✅' : '❌'} DATABASE_URL`);\r\n console.log(` ${process.env['GEMINI_API_KEY'] ? '✅' : '❌'} GEMINI_API_KEY`);\r\n console.log(` ${process.env['COHERE_API_KEY'] ? '✅' : '⚪'} COHERE_API_KEY (optional)`);\r\n console.log();\r\n });\r\n\r\nprogram.parse();\r\n"]}
1
+ {"version":3,"sources":["../../src/utils/embedding-utils.ts","../../src/bin/cli.ts"],"names":["Command","path","fs"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,IAAA,uBAAA,GAAA,EAAA;AAAA,QAAA,CAAA,uBAAA,EAAA;AAAA,EAAA,qBAAA,EAAA,MAAA,qBAAA;AAAA,EAAA,sBAAA,EAAA,MAAA,sBAAA;AAAA,EAAA,uBAAA,EAAA,MAAA,uBAAA;AAAA,EAAA,sBAAA,EAAA,MAAA;AAAA,CAAA,CAAA;AAsBA,eAAsB,uBAAA,CAClB,QACA,eAAA,EAC4B;AAC5B,EAAA,MAAM,MAAA,GAAS,MAAM,sBAAA,CAAuB,MAAA,EAAQ,eAAe,CAAA;AAEnE,EAAA,IAAI,CAAC,OAAO,WAAA,EAAa;AACrB,IAAA,OAAO,IAAA;AAAA,EACX;AAEA,EAAA,MAAM,QAAA,GAAW,kBAAkB,MAAM,CAAA;AACzC,EAAA,MAAM,OAAA,GAAU,oBAAA,CAAqB,MAAA,EAAQ,QAAQ,CAAA;AAErD,EAAA,OAAO;AAAA,IACH,QAAA;AAAA,IACA,OAAA;AAAA,IACA,OAAA,EAAS,MAAA;AAAA,IACT,MAAA,EAAQ,QAAA,KAAa,UAAA,GAAa,kBAAA,GAAqB;AAAA,GAC3D;AACJ;AAKA,eAAsB,sBAAA,CAClB,QACA,eAAA,EACuB;AAEvB,EAAA,MAAM,KAAA,GAAQ,MAAM,sBAAA,CAAuB,MAAM,CAAA;AAGjD,EAAA,MAAM,WAAA,GAAc,MAAM,MAAA,CAAO,eAAA,CAAgB,KAAA,EAAM;AAGvD,EAAA,IAAI,eAAA,GAAkB,CAAA;AACtB,EAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AAEtB,IAAA,IAAI,IAAA,CAAK,KAAA,KAAU,eAAA,CAAgB,EAAA,EAAI;AACnC,MAAA,eAAA,IAAmB,IAAA,CAAK,KAAA;AAAA,IAC5B;AAAA,EACJ;AAEA,EAAA,MAAM,cAAc,eAAA,GAAkB,CAAA;AAEtC,EAAA,OAAO;AAAA,IACH,WAAA;AAAA,IACA,cAAc,eAAA,CAAgB,EAAA;AAAA,IAC9B,eAAA,EAAiB,mBAAA,CAAoB,eAAA,CAAgB,EAAE,CAAA;AAAA,IACvD,kBAAkB,eAAA,CAAgB,SAAA;AAAA,IAClC,cAAA,EAAgB,KAAA;AAAA,IAChB,eAAA;AAAA,IACA;AAAA,GACJ;AACJ;AAKA,eAAsB,uBAClB,MAAA,EAC8B;AAE9B,EAAA,MAAM,OAAA,GAAU,MAAM,MAAA,CAAO,SAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,IAAA,CAAA;AAc7B,EAAA,OAAO,OAAA,CAAQ,IAAI,CAAA,CAAA,MAAM;AAAA,IACrB,OAAO,CAAA,CAAE,eAAA;AAAA,IACT,WAAW,CAAA,CAAE,mBAAA;AAAA,IACb,KAAA,EAAO,MAAA,CAAO,CAAA,CAAE,KAAK;AAAA,GACzB,CAAE,CAAA;AACN;AAKA,SAAS,kBAAkB,MAAA,EAA0C;AACjE,EAAA,IAAI,CAAC,OAAO,WAAA,EAAa;AACrB,IAAA,OAAO,MAAA;AAAA,EACX;AAGA,EAAA,KAAA,MAAW,IAAA,IAAQ,OAAO,cAAA,EAAgB;AACtC,IAAA,IAAI,KAAK,SAAA,KAAc,IAAA,IAAQ,IAAA,CAAK,SAAA,KAAc,OAAO,gBAAA,EAAkB;AACvE,MAAA,OAAO,UAAA;AAAA,IACX;AAAA,EACJ;AAGA,EAAA,MAAM,kBAAA,GAAsB,MAAA,CAAO,eAAA,GAAkB,MAAA,CAAO,WAAA,GAAe,GAAA;AAE3E,EAAA,IAAI,qBAAqB,EAAA,EAAI;AACzB,IAAA,OAAO,UAAA;AAAA,EACX;AAEA,EAAA,OAAO,SAAA;AACX;AAKA,SAAS,oBAAA,CAAqB,QAAwB,QAAA,EAAoC;AACtF,EAAA,MAAM,aAAa,IAAA,CAAK,KAAA,CAAO,OAAO,eAAA,GAAkB,MAAA,CAAO,cAAe,GAAG,CAAA;AAEjF,EAAA,IAAI,aAAa,UAAA,EAAY;AACzB,IAAA,OAAO,CAAA,uBAAA,EAAgB,MAAA,CAAO,eAAe,CAAA,SAAA,EAAY,UAAU,6DACnD,MAAA,CAAO,YAAY,CAAA,EAAA,EAAK,MAAA,CAAO,gBAAgB,CAAA,2EAAA,CAAA;AAAA,EAEnE;AAEA,EAAA,OAAO,mBAAc,MAAA,CAAO,eAAe,YAAY,UAAU,CAAA,gDAAA,EAC3C,OAAO,YAAY,CAAA,iEAAA,CAAA;AAE7C;AAKA,SAAS,oBAAoB,OAAA,EAAiD;AAC1E,EAAA,IAAI,OAAA,CAAQ,UAAA,CAAW,QAAQ,CAAA,EAAG,OAAO,QAAA;AACzC,EAAA,IAAI,OAAA,CAAQ,WAAW,QAAQ,CAAA,IAAK,QAAQ,QAAA,CAAS,gBAAgB,GAAG,OAAO,QAAA;AAC/E,EAAA,IAAI,OAAA,CAAQ,WAAW,QAAQ,CAAA,IAAK,QAAQ,QAAA,CAAS,QAAQ,GAAG,OAAO,QAAA;AACvE,EAAA,OAAO,QAAA;AACX;AAKO,SAAS,qBAAA,CAAsB,UAAkB,KAAA,EAAuB;AAC3E,EAAA,OAAO,CAAA,EAAG,QAAQ,CAAA,CAAA,EAAI,KAAK,CAAA,CAAA;AAC/B;AAnKA,IAAA,oBAAA,GAAA,KAAA,CAAA;AAAA,EAAA,8BAAA,GAAA;AAAA,EAAA;AAAA,CAAA,CAAA;ACMA,IAAM,OAAA,GAAU,IAAIA,iBAAA,EAAQ;AAE5B,OAAA,CACK,KAAK,aAAa,CAAA,CAClB,YAAY,8CAA8C,CAAA,CAC1D,QAAQ,eAAe,CAAA;AAE5B,OAAA,CACK,OAAA,CAAQ,MAAM,CAAA,CACd,WAAA,CAAY,wCAAwC,CAAA,CACpD,MAAA,CAAO,aAAA,EAAe,0BAA0B,CAAA,CAChD,MAAA,CAAO,OAAO,OAAA,KAAY;AACvB,EAAA,OAAA,CAAQ,IAAI,yCAAkC,CAAA;AAE9C,EAAA,IAAI;AAEA,IAAA,MAAM,SAAA,GAAiBC,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,IAAO,QAAQ,CAAA;AACnD,IAAA,MAAM,UAAA,GAAkBA,eAAA,CAAA,IAAA,CAAK,SAAA,EAAW,eAAe,CAAA;AAEvD,IAAA,IAAI,YAAA,GAAe,KAAA;AACnB,IAAA,IAAI;AACA,MAAA,MAASC,qBAAO,UAAU,CAAA;AAC1B,MAAA,YAAA,GAAe,IAAA;AAAA,IACnB,CAAA,CAAA,MAAQ;AACJ,MAAA,YAAA,GAAe,KAAA;AAAA,IACnB;AAEA,IAAA,IAAI,CAAC,YAAA,EAAc;AACf,MAAA,OAAA,CAAQ,IAAI,wDAAmD,CAAA;AAC/D,MAAA,OAAA,CAAQ,IAAI,0CAA0C,CAAA;AACtD,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,MAAM,cAAA,GAAiB,MAASA,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAG5D,IAAA,IAAI,eAAe,QAAA,CAAS,iBAAiB,CAAA,IAAK,CAAC,QAAQ,KAAA,EAAO;AAC9D,MAAA,OAAA,CAAQ,IAAI,2DAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,IAAI,CAAC,cAAA,CAAe,QAAA,CAAS,sBAAsB,CAAA,EAAG;AAClD,MAAA,OAAA,CAAQ,IAAI,wDAA8C,CAAA;AAC1D,MAAA,OAAA,CAAQ,IAAI,+CAA+C,CAAA;AAC3D,MAAA,OAAA,CAAQ,IAAI,uBAAuB,CAAA;AACnC,MAAA,OAAA,CAAQ,IAAI,oCAAoC,CAAA;AAChD,MAAA,OAAA,CAAQ,IAAI,iDAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AACpB,MAAA,OAAA,CAAQ,IAAI,oBAAoB,CAAA;AAChC,MAAA,OAAA,CAAQ,IAAI,8BAA8B,CAAA;AAC1C,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,IAAI,4BAA4B,CAAA;AACxC,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AAAA,IACxB;AAGA,IAAA,MAAM,gBAAA,GAAmB;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,CAAA;AAwIzB,IAAA,IAAI,SAAA,GAAY,cAAA;AAChB,IAAA,IAAI,OAAA,CAAQ,KAAA,IAAS,cAAA,CAAe,QAAA,CAAS,uBAAuB,CAAA,EAAG;AACnE,MAAA,MAAM,WAAA,GAAc,wEAAA;AACpB,MAAA,MAAM,UAAA,GAAa,SAAA,CAAU,OAAA,CAAQ,WAAW,CAAA;AAChD,MAAA,IAAI,eAAe,CAAA,CAAA,EAAI;AACnB,QAAA,SAAA,GAAY,SAAA,CAAU,SAAA,CAAU,CAAA,EAAG,UAAU,EAAE,IAAA,EAAK;AAAA,MACxD;AAAA,IACJ;AAGA,IAAA,SAAA,GAAY,SAAA,CAAU,IAAA,EAAK,GAAI,IAAA,GAAO,gBAAA;AAGtC,IAAA,MAASA,aAAA,CAAA,SAAA,CAAU,YAAY,SAAS,CAAA;AAExC,IAAA,OAAA,CAAQ,IAAI,2DAAsD,CAAA;AAClE,IAAA,OAAA,CAAQ,IAAI,aAAa,CAAA;AACzB,IAAA,OAAA,CAAQ,IAAI,yDAAyD,CAAA;AACrE,IAAA,OAAA,CAAQ,IAAI,4EAA4E,CAAA;AACxF,IAAA,OAAA,CAAQ,IAAI,iCAAiC,CAAA;AAAA,EAEjD,SAAS,KAAA,EAAO;AACZ,IAAA,OAAA,CAAQ,KAAA,CAAM,eAAA,EAAa,KAAA,CAAgB,OAAO,CAAA;AAClD,IAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,EAClB;AACJ,CAAC,CAAA;AAEL,OAAA,CACK,QAAQ,QAAQ,CAAA,CAChB,YAAY,gCAAgC,CAAA,CAC5C,OAAO,YAAY;AAChB,EAAA,OAAA,CAAQ,IAAI,4CAAqC,CAAA;AAGjD,EAAA,MAAM,aAAkBD,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,EAAI,EAAG,UAAU,eAAe,CAAA;AACrE,EAAA,IAAI;AACA,IAAA,MAAM,MAAA,GAAS,MAASC,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAEpD,IAAA,OAAA,CAAQ,IAAI,gBAAgB,CAAA;AAC5B,IAAA,OAAA,CAAQ,IAAI,CAAA,4BAAA,CAAyB,CAAA;AACrC,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,iBAAiB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACpF,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,sBAAsB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACzF,IAAA,OAAA,CAAQ,GAAA,EAAI;AAAA,EAChB,CAAA,CAAA,MAAQ;AACJ,IAAA,OAAA,CAAQ,IAAI,yCAAoC,CAAA;AAAA,EACpD;AAIA,EAAA,OAAA,CAAQ,IAAI,cAAc,CAAA;AAC1B,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,cAAc,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,aAAA,CAAe,CAAA;AACvE,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,eAAA,CAAiB,CAAA;AAC3E,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,0BAAA,CAA4B,CAAA;AACtF,EAAA,OAAA,CAAQ,GAAA,EAAI;AAChB,CAAC,CAAA;AAEL,OAAA,CACK,QAAQ,kBAAkB,CAAA,CAC1B,YAAY,gEAAgE,CAAA,CAC5E,OAAO,YAAY;AAChB,EAAA,OAAA,CAAQ,IAAI,gDAAyC,CAAA;AAErD,EAAA,IAAI;AAEA,IAAA,KAAM,MAAM,OAAA,CAAA,OAAA,EAAA,CAAA,IAAA,CAAA,OAAA,oBAAA,EAAA,EAAA,uBAAA,CAAA,CAAA;AAGZ,IAAA,OAAA,CAAQ,IAAI,qEAA2D,CAAA;AACvE,IAAA,OAAA,CAAQ,IAAI,iEAAiE,CAAA;AAC7E,IAAA,OAAA,CAAQ,IAAI,UAAU,CAAA;AACtB,IAAA,OAAA,CAAQ,IAAI,sEAAsE,CAAA;AAClF,IAAA,OAAA,CAAQ,IAAI,qEAAqE,CAAA;AACjF,IAAA,OAAA,CAAQ,GAAA,EAAI;AAAA,EAChB,SAAS,KAAA,EAAO;AACZ,IAAA,OAAA,CAAQ,KAAA,CAAM,eAAA,EAAa,KAAA,CAAgB,OAAO,CAAA;AAClD,IAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,EAClB;AACJ,CAAC,CAAA;AAEL,OAAA,CACK,OAAA,CAAQ,SAAS,CAAA,CACjB,WAAA,CAAY,kDAAkD,CAAA,CAC9D,MAAA,CAAO,4BAAA,EAA8B,sCAAA,EAAwC,GAAG,CAAA,CAChF,OAAO,2BAAA,EAA6B,2BAAA,EAA6B,IAAI,CAAA,CACrE,MAAA,CAAO,0BAA0B,iCAAiC,CAAA,CAClE,MAAA,CAAO,OAAO,OAAA,KAAY;AACvB,EAAA,OAAA,CAAQ,IAAI,+CAAwC,CAAA;AAEpD,EAAA,OAAA,CAAQ,IAAI,UAAU,CAAA;AACtB,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,eAAA,EAAkB,OAAA,CAAQ,WAAW,CAAA,CAAE,CAAA;AACnD,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,cAAA,EAAiB,OAAA,CAAQ,SAAS,CAAA,CAAE,CAAA;AAChD,EAAA,IAAI,QAAQ,UAAA,EAAY;AACpB,IAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,eAAA,EAAkB,OAAA,CAAQ,UAAU,CAAA,CAAE,CAAA;AAAA,EACtD;AACA,EAAA,OAAA,CAAQ,GAAA,EAAI;AAEZ,EAAA,OAAA,CAAQ,IAAI,gFAAsE,CAAA;AAClF,EAAA,OAAA,CAAQ,IAAI,yCAAyC,CAAA;AACrD,EAAA,OAAA,CAAQ,IAAI,UAAU,CAAA;AACtB,EAAA,OAAA,CAAQ,IAAI,+DAA+D,CAAA;AAC3E,EAAA,OAAA,CAAQ,IAAI,oFAAoF,CAAA;AAChG,EAAA,OAAA,CAAQ,IAAI,mDAAmD,CAAA;AAC/D,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,iBAAA,EAAoB,OAAA,CAAQ,WAAW,CAAA,CAAA,CAAG,CAAA;AACtD,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,eAAA,EAAkB,OAAA,CAAQ,SAAS,CAAA,CAAA,CAAG,CAAA;AAClD,EAAA,OAAA,CAAQ,IAAI,iEAAiE,CAAA;AAC7E,EAAA,OAAA,CAAQ,IAAI,OAAO,CAAA;AACnB,EAAA,OAAA,CAAQ,GAAA,EAAI;AAChB,CAAC,CAAA;AAEL,OAAA,CAAQ,KAAA,EAAM","file":"cli.cjs","sourcesContent":["/**\r\n * Embedding Utilities\r\n * \r\n * Utilities for embedding model detection, mismatch checking, and metadata management.\r\n */\r\n\r\nimport type { PrismaClientLike } from '../types/config.types.js';\r\nimport type { EmbeddingProvider } from '../types/embedding-provider.types.js';\r\nimport type {\r\n MismatchResult,\r\n MismatchInfo,\r\n MismatchSeverity,\r\n EmbeddingModelStats,\r\n} from '../types/migration.types.js';\r\n\r\n/**\r\n * Detect embedding model mismatch between config and database\r\n * \r\n * @param prisma - Prisma client instance\r\n * @param currentProvider - Currently configured embedding provider\r\n * @returns Mismatch info if there's a problem, null if everything matches\r\n */\r\nexport async function detectEmbeddingMismatch(\r\n prisma: PrismaClientLike,\r\n currentProvider: EmbeddingProvider\r\n): Promise<MismatchInfo | null> {\r\n const result = await checkEmbeddingMismatch(prisma, currentProvider);\r\n\r\n if (!result.hasMismatch) {\r\n return null;\r\n }\r\n\r\n const severity = determineSeverity(result);\r\n const message = buildMismatchMessage(result, severity);\r\n\r\n return {\r\n severity,\r\n message,\r\n details: result,\r\n action: severity === 'critical' ? 'reindex-required' : 'reindex',\r\n };\r\n}\r\n\r\n/**\r\n * Check for embedding model mismatch\r\n */\r\nexport async function checkEmbeddingMismatch(\r\n prisma: PrismaClientLike,\r\n currentProvider: EmbeddingProvider\r\n): Promise<MismatchResult> {\r\n // Get model statistics from database\r\n const stats = await getEmbeddingModelStats(prisma);\r\n\r\n // Get total chunk count\r\n const totalChunks = await prisma.contextRagChunk.count();\r\n\r\n // Calculate chunks that need migration\r\n let chunksToMigrate = 0;\r\n for (const stat of stats) {\r\n // Chunks with different model or null model need migration\r\n if (stat.model !== currentProvider.id) {\r\n chunksToMigrate += stat.count;\r\n }\r\n }\r\n\r\n const hasMismatch = chunksToMigrate > 0;\r\n\r\n return {\r\n hasMismatch,\r\n currentModel: currentProvider.id,\r\n currentProvider: extractProviderType(currentProvider.id),\r\n currentDimension: currentProvider.dimension,\r\n existingModels: stats,\r\n chunksToMigrate,\r\n totalChunks,\r\n };\r\n}\r\n\r\n/**\r\n * Get statistics about embedding models in the database\r\n */\r\nexport async function getEmbeddingModelStats(\r\n prisma: PrismaClientLike\r\n): Promise<EmbeddingModelStats[]> {\r\n // Use raw query for GROUP BY with null handling\r\n const results = await prisma.$queryRaw<Array<{\r\n embedding_model: string | null;\r\n embedding_dimension: number | null;\r\n count: bigint;\r\n }>>`\r\n SELECT \r\n embedding_model,\r\n embedding_dimension,\r\n COUNT(*) as count\r\n FROM context_rag_chunks\r\n GROUP BY embedding_model, embedding_dimension\r\n ORDER BY count DESC\r\n `;\r\n\r\n return results.map(r => ({\r\n model: r.embedding_model,\r\n dimension: r.embedding_dimension,\r\n count: Number(r.count),\r\n }));\r\n}\r\n\r\n/**\r\n * Determine severity of mismatch\r\n */\r\nfunction determineSeverity(result: MismatchResult): MismatchSeverity {\r\n if (!result.hasMismatch) {\r\n return 'none';\r\n }\r\n\r\n // Check if there are dimension mismatches (critical)\r\n for (const stat of result.existingModels) {\r\n if (stat.dimension !== null && stat.dimension !== result.currentDimension) {\r\n return 'critical';\r\n }\r\n }\r\n\r\n // Check percentage of mismatched chunks\r\n const mismatchPercentage = (result.chunksToMigrate / result.totalChunks) * 100;\r\n\r\n if (mismatchPercentage > 50) {\r\n return 'critical';\r\n }\r\n\r\n return 'warning';\r\n}\r\n\r\n/**\r\n * Build user-friendly mismatch message\r\n */\r\nfunction buildMismatchMessage(result: MismatchResult, severity: MismatchSeverity): string {\r\n const percentage = Math.round((result.chunksToMigrate / result.totalChunks) * 100);\r\n\r\n if (severity === 'critical') {\r\n return `⚠️ CRITICAL: ${result.chunksToMigrate} chunks (${percentage}%) were created with different embedding models. ` +\r\n `Current: ${result.currentModel} (${result.currentDimension}d). ` +\r\n `Search results may be inaccurate. Run 'npx context-rag reindex' to fix.`;\r\n }\r\n\r\n return `⚡ Warning: ${result.chunksToMigrate} chunks (${percentage}%) may have outdated embeddings. ` +\r\n `Current model: ${result.currentModel}. ` +\r\n `Consider running 'npx context-rag reindex' for optimal results.`;\r\n}\r\n\r\n/**\r\n * Extract provider type from model ID\r\n */\r\nfunction extractProviderType(modelId: string): 'gemini' | 'openai' | 'cohere' {\r\n if (modelId.startsWith('gemini')) return 'gemini';\r\n if (modelId.startsWith('openai') || modelId.includes('text-embedding')) return 'openai';\r\n if (modelId.startsWith('cohere') || modelId.includes('embed-')) return 'cohere';\r\n return 'gemini'; // default\r\n}\r\n\r\n/**\r\n * Build embedding model identifier from provider and model name\r\n */\r\nexport function buildEmbeddingModelId(provider: string, model: string): string {\r\n return `${provider}-${model}`;\r\n}\r\n","#!/usr/bin/env node\r\n\r\nimport { Command } from 'commander';\r\nimport * as fs from 'fs/promises';\r\nimport * as path from 'path';\r\n\r\nconst program = new Command();\r\n\r\nprogram\r\n .name('context-rag')\r\n .description('Context-RAG CLI - Setup and management tools')\r\n .version('1.0.0-beta.11');\r\n\r\nprogram\r\n .command('init')\r\n .description('Initialize Context-RAG in your project')\r\n .option('-f, --force', 'Overwrite existing files')\r\n .action(async (options) => {\r\n console.log('🚀 Initializing Context-RAG...\\n');\r\n\r\n try {\r\n // Check if prisma directory exists\r\n const prismaDir = path.join(process.cwd(), 'prisma');\r\n const schemaPath = path.join(prismaDir, 'schema.prisma');\r\n\r\n let schemaExists = false;\r\n try {\r\n await fs.access(schemaPath);\r\n schemaExists = true;\r\n } catch {\r\n schemaExists = false;\r\n }\r\n\r\n if (!schemaExists) {\r\n console.log('❌ Prisma schema not found at prisma/schema.prisma');\r\n console.log(' Please run `npx prisma init` first.\\n');\r\n process.exit(1);\r\n }\r\n\r\n // Read existing schema\r\n const existingSchema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n // Check if Context-RAG models already exist\r\n if (existingSchema.includes('ContextRagChunk') && !options.force) {\r\n console.log('⚠️ Context-RAG models already exist in schema.');\r\n console.log(' Use --force to overwrite.\\n');\r\n process.exit(0);\r\n }\r\n\r\n // Check for pgvector extension\r\n if (!existingSchema.includes('postgresqlExtensions')) {\r\n console.log('⚠️ Warning: pgvector extension not enabled.');\r\n console.log(' Add the following to your schema.prisma:\\n');\r\n console.log(' generator client {');\r\n console.log(' provider = \"prisma-client-js\"');\r\n console.log(' previewFeatures = [\"postgresqlExtensions\"]');\r\n console.log(' }\\n');\r\n console.log(' datasource db {');\r\n console.log(' provider = \"postgresql\"');\r\n console.log(' url = env(\"DATABASE_URL\")');\r\n console.log(' extensions = [vector]');\r\n console.log(' }\\n');\r\n }\r\n\r\n // Context-RAG models to append\r\n const contextRagModels = `\r\n// ============================================\r\n// Context-RAG Models\r\n// ============================================\r\n\r\n/// Stores prompt configurations for different document types\r\nmodel ContextRagPromptConfig {\r\n id String @id @default(uuid())\r\n documentType String @map(\"document_type\")\r\n name String\r\n systemPrompt String @map(\"system_prompt\") @db.Text\r\n chunkStrategy Json @map(\"chunk_strategy\")\r\n version Int @default(1)\r\n isActive Boolean @default(true) @map(\"is_active\")\r\n isDefault Boolean @default(false) @map(\"is_default\")\r\n createdBy String? @map(\"created_by\")\r\n changeLog String? @map(\"change_log\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n chunks ContextRagChunk[]\r\n\r\n @@unique([documentType, version])\r\n @@index([documentType, isActive])\r\n @@map(\"context_rag_prompt_configs\")\r\n}\r\n\r\n/// Stores vector chunks for semantic search\r\nmodel ContextRagChunk {\r\n id String @id @default(uuid())\r\n promptConfigId String @map(\"prompt_config_id\")\r\n promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)\r\n documentId String @map(\"document_id\")\r\n chunkIndex Int @map(\"chunk_index\")\r\n chunkType String @map(\"chunk_type\")\r\n\r\n /// Plain text content optimized for vector search\r\n searchContent String @map(\"search_content\") @db.Text\r\n\r\n /// Enriched content: context + searchContent (for RAG enhancement)\r\n enrichedContent String? @map(\"enriched_content\") @db.Text\r\n\r\n /// AI-generated context text only (for debugging)\r\n contextText String? @map(\"context_text\") @db.Text\r\n\r\n /// Vector embedding (768 dimensions for Gemini)\r\n searchVector Unsupported(\"vector(768)\") @map(\"search_vector\")\r\n\r\n /// Rich Markdown content for display\r\n displayContent String @map(\"display_content\") @db.Text\r\n\r\n sourcePageStart Int @map(\"source_page_start\")\r\n sourcePageEnd Int @map(\"source_page_end\")\r\n confidenceScore Float @default(0.5) @map(\"confidence_score\")\r\n metadata Json\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n\r\n @@index([promptConfigId])\r\n @@index([documentId])\r\n @@index([chunkType])\r\n @@index([confidenceScore])\r\n @@map(\"context_rag_chunks\")\r\n}\r\n\r\n/// Tracks document processing state\r\nmodel ContextRagDocument {\r\n id String @id @default(uuid())\r\n filename String\r\n fileHash String @map(\"file_hash\")\r\n fileSize Int @map(\"file_size\")\r\n pageCount Int @map(\"page_count\")\r\n documentType String? @map(\"document_type\")\r\n\r\n /// Experiment identifier for A/B testing models\r\n experimentId String? @map(\"experiment_id\")\r\n\r\n /// AI model used for processing\r\n modelName String? @map(\"model_name\")\r\n\r\n /// Model configuration as JSON\r\n modelConfig Json? @map(\"model_config\")\r\n\r\n status String @default(\"PENDING\")\r\n\r\n promptConfigId String? @map(\"prompt_config_id\")\r\n totalBatches Int @default(0) @map(\"total_batches\")\r\n completedBatches Int @default(0) @map(\"completed_batches\")\r\n failedBatches Int @default(0) @map(\"failed_batches\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n errorMessage String? @map(\"error_message\")\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n\r\n batches ContextRagBatch[]\r\n\r\n @@unique([fileHash, experimentId])\r\n @@index([status])\r\n @@index([fileHash])\r\n @@index([documentType])\r\n @@index([experimentId])\r\n @@map(\"context_rag_documents\")\r\n}\r\n\r\n/// Tracks individual batch processing jobs\r\nmodel ContextRagBatch {\r\n id String @id @default(uuid())\r\n documentId String @map(\"document_id\")\r\n document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)\r\n\r\n batchIndex Int @map(\"batch_index\")\r\n pageStart Int @map(\"page_start\")\r\n pageEnd Int @map(\"page_end\")\r\n status String @default(\"PENDING\")\r\n retryCount Int @default(0) @map(\"retry_count\")\r\n lastError String? @map(\"last_error\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n\r\n startedAt DateTime? @map(\"started_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n @@unique([documentId, batchIndex])\r\n @@index([documentId, status])\r\n @@index([status])\r\n @@map(\"context_rag_batches\")\r\n}\r\n`;\r\n\r\n // Remove existing Context-RAG models if force\r\n let newSchema = existingSchema;\r\n if (options.force && existingSchema.includes('// Context-RAG Models')) {\r\n const startMarker = '// ============================================\\n// Context-RAG Models';\r\n const startIndex = newSchema.indexOf(startMarker);\r\n if (startIndex !== -1) {\r\n newSchema = newSchema.substring(0, startIndex).trim();\r\n }\r\n }\r\n\r\n // Append new models\r\n newSchema = newSchema.trim() + '\\n' + contextRagModels;\r\n\r\n // Write updated schema\r\n await fs.writeFile(schemaPath, newSchema);\r\n\r\n console.log('✅ Context-RAG models added to prisma/schema.prisma\\n');\r\n console.log('Next steps:');\r\n console.log(' 1. Run: npx prisma migrate dev --name add_context_rag');\r\n console.log(' 2. Enable pgvector in PostgreSQL: CREATE EXTENSION IF NOT EXISTS vector;');\r\n console.log(' 3. Start using Context-RAG!\\n');\r\n\r\n } catch (error) {\r\n console.error('❌ Error:', (error as Error).message);\r\n process.exit(1);\r\n }\r\n });\r\n\r\nprogram\r\n .command('status')\r\n .description('Check Context-RAG setup status')\r\n .action(async () => {\r\n console.log('🔍 Checking Context-RAG status...\\n');\r\n\r\n // Check schema\r\n const schemaPath = path.join(process.cwd(), 'prisma', 'schema.prisma');\r\n try {\r\n const schema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n console.log('Prisma Schema:');\r\n console.log(` ✅ schema.prisma found`);\r\n console.log(` ${schema.includes('ContextRagChunk') ? '✅' : '❌'} Context-RAG models`);\r\n console.log(` ${schema.includes('postgresqlExtensions') ? '✅' : '❌'} pgvector extension`);\r\n console.log();\r\n } catch {\r\n console.log('❌ prisma/schema.prisma not found\\n');\r\n }\r\n\r\n // Check env - note: these are checked at runtime, not via centralized env\r\n // since CLI may run before env is fully configured\r\n console.log('Environment:');\r\n console.log(` ${process.env['DATABASE_URL'] ? '✅' : '❌'} DATABASE_URL`);\r\n console.log(` ${process.env['GEMINI_API_KEY'] ? '✅' : '❌'} GEMINI_API_KEY`);\r\n console.log(` ${process.env['COHERE_API_KEY'] ? '✅' : '⚪'} COHERE_API_KEY (optional)`);\r\n console.log();\r\n });\r\n\r\nprogram\r\n .command('check-embeddings')\r\n .description('Check for embedding model mismatch between config and database')\r\n .action(async () => {\r\n console.log('🔍 Checking embedding model status...\\n');\r\n\r\n try {\r\n // Dynamic import to verify module exists (void to suppress unused warning)\r\n void (await import('../utils/embedding-utils.js'));\r\n\r\n // We can't fully check without a configured client, so just show stats\r\n console.log('⚠️ Full mismatch detection requires database connection.');\r\n console.log(' Use this command programmatically with your Prisma client.\\n');\r\n console.log('Example:');\r\n console.log(' import { detectEmbeddingMismatch } from \"@msbayindir/context-rag\";');\r\n console.log(' const mismatch = await detectEmbeddingMismatch(prisma, provider);');\r\n console.log();\r\n } catch (error) {\r\n console.error('❌ Error:', (error as Error).message);\r\n process.exit(1);\r\n }\r\n });\r\n\r\nprogram\r\n .command('reindex')\r\n .description('Re-index all chunks with current embedding model')\r\n .option('-c, --concurrency <number>', 'Number of concurrent embedding calls', '5')\r\n .option('-b, --batch-size <number>', 'Batch size for processing', '50')\r\n .option('-d, --document-id <id>', 'Re-index specific document only')\r\n .action(async (options) => {\r\n console.log('🔄 Starting re-indexing operation...\\n');\r\n\r\n console.log('Options:');\r\n console.log(` Concurrency: ${options.concurrency}`);\r\n console.log(` Batch size: ${options.batchSize}`);\r\n if (options.documentId) {\r\n console.log(` Document ID: ${options.documentId}`);\r\n }\r\n console.log();\r\n\r\n console.log('⚠️ Re-indexing requires database connection and embedding provider.');\r\n console.log(' Use this command programmatically:\\n');\r\n console.log('Example:');\r\n console.log(' import { MigrationService } from \"@msbayindir/context-rag\";');\r\n console.log(' const migrationService = new MigrationService(prisma, provider, config, logger);');\r\n console.log(' const result = await migrationService.reindex({');\r\n console.log(` concurrency: ${options.concurrency},`);\r\n console.log(` batchSize: ${options.batchSize},`);\r\n console.log(' onProgress: (p) => console.log(`${p.processed}/${p.total}`)');\r\n console.log(' });');\r\n console.log();\r\n });\r\n\r\nprogram.parse();\r\n"]}
package/dist/bin/cli.js CHANGED
@@ -3,8 +3,111 @@ import { Command } from 'commander';
3
3
  import * as fs from 'fs/promises';
4
4
  import * as path from 'path';
5
5
 
6
+ var __defProp = Object.defineProperty;
7
+ var __getOwnPropNames = Object.getOwnPropertyNames;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
11
+ var __export = (target, all) => {
12
+ for (var name in all)
13
+ __defProp(target, name, { get: all[name], enumerable: true });
14
+ };
15
+
16
+ // src/utils/embedding-utils.ts
17
+ var embedding_utils_exports = {};
18
+ __export(embedding_utils_exports, {
19
+ buildEmbeddingModelId: () => buildEmbeddingModelId,
20
+ checkEmbeddingMismatch: () => checkEmbeddingMismatch,
21
+ detectEmbeddingMismatch: () => detectEmbeddingMismatch,
22
+ getEmbeddingModelStats: () => getEmbeddingModelStats
23
+ });
24
+ async function detectEmbeddingMismatch(prisma, currentProvider) {
25
+ const result = await checkEmbeddingMismatch(prisma, currentProvider);
26
+ if (!result.hasMismatch) {
27
+ return null;
28
+ }
29
+ const severity = determineSeverity(result);
30
+ const message = buildMismatchMessage(result, severity);
31
+ return {
32
+ severity,
33
+ message,
34
+ details: result,
35
+ action: severity === "critical" ? "reindex-required" : "reindex"
36
+ };
37
+ }
38
+ async function checkEmbeddingMismatch(prisma, currentProvider) {
39
+ const stats = await getEmbeddingModelStats(prisma);
40
+ const totalChunks = await prisma.contextRagChunk.count();
41
+ let chunksToMigrate = 0;
42
+ for (const stat of stats) {
43
+ if (stat.model !== currentProvider.id) {
44
+ chunksToMigrate += stat.count;
45
+ }
46
+ }
47
+ const hasMismatch = chunksToMigrate > 0;
48
+ return {
49
+ hasMismatch,
50
+ currentModel: currentProvider.id,
51
+ currentProvider: extractProviderType(currentProvider.id),
52
+ currentDimension: currentProvider.dimension,
53
+ existingModels: stats,
54
+ chunksToMigrate,
55
+ totalChunks
56
+ };
57
+ }
58
+ async function getEmbeddingModelStats(prisma) {
59
+ const results = await prisma.$queryRaw`
60
+ SELECT
61
+ embedding_model,
62
+ embedding_dimension,
63
+ COUNT(*) as count
64
+ FROM context_rag_chunks
65
+ GROUP BY embedding_model, embedding_dimension
66
+ ORDER BY count DESC
67
+ `;
68
+ return results.map((r) => ({
69
+ model: r.embedding_model,
70
+ dimension: r.embedding_dimension,
71
+ count: Number(r.count)
72
+ }));
73
+ }
74
+ function determineSeverity(result) {
75
+ if (!result.hasMismatch) {
76
+ return "none";
77
+ }
78
+ for (const stat of result.existingModels) {
79
+ if (stat.dimension !== null && stat.dimension !== result.currentDimension) {
80
+ return "critical";
81
+ }
82
+ }
83
+ const mismatchPercentage = result.chunksToMigrate / result.totalChunks * 100;
84
+ if (mismatchPercentage > 50) {
85
+ return "critical";
86
+ }
87
+ return "warning";
88
+ }
89
+ function buildMismatchMessage(result, severity) {
90
+ const percentage = Math.round(result.chunksToMigrate / result.totalChunks * 100);
91
+ if (severity === "critical") {
92
+ return `\u26A0\uFE0F CRITICAL: ${result.chunksToMigrate} chunks (${percentage}%) were created with different embedding models. Current: ${result.currentModel} (${result.currentDimension}d). Search results may be inaccurate. Run 'npx context-rag reindex' to fix.`;
93
+ }
94
+ return `\u26A1 Warning: ${result.chunksToMigrate} chunks (${percentage}%) may have outdated embeddings. Current model: ${result.currentModel}. Consider running 'npx context-rag reindex' for optimal results.`;
95
+ }
96
+ function extractProviderType(modelId) {
97
+ if (modelId.startsWith("gemini")) return "gemini";
98
+ if (modelId.startsWith("openai") || modelId.includes("text-embedding")) return "openai";
99
+ if (modelId.startsWith("cohere") || modelId.includes("embed-")) return "cohere";
100
+ return "gemini";
101
+ }
102
+ function buildEmbeddingModelId(provider, model) {
103
+ return `${provider}-${model}`;
104
+ }
105
+ var init_embedding_utils = __esm({
106
+ "src/utils/embedding-utils.ts"() {
107
+ }
108
+ });
6
109
  var program = new Command();
7
- program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.1");
110
+ program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.11");
8
111
  program.command("init").description("Initialize Context-RAG in your project").option("-f, --force", "Overwrite existing files").action(async (options) => {
9
112
  console.log("\u{1F680} Initializing Context-RAG...\n");
10
113
  try {
@@ -214,6 +317,42 @@ program.command("status").description("Check Context-RAG setup status").action(a
214
317
  console.log(` ${process.env["COHERE_API_KEY"] ? "\u2705" : "\u26AA"} COHERE_API_KEY (optional)`);
215
318
  console.log();
216
319
  });
320
+ program.command("check-embeddings").description("Check for embedding model mismatch between config and database").action(async () => {
321
+ console.log("\u{1F50D} Checking embedding model status...\n");
322
+ try {
323
+ void await Promise.resolve().then(() => (init_embedding_utils(), embedding_utils_exports));
324
+ console.log("\u26A0\uFE0F Full mismatch detection requires database connection.");
325
+ console.log(" Use this command programmatically with your Prisma client.\n");
326
+ console.log("Example:");
327
+ console.log(' import { detectEmbeddingMismatch } from "@msbayindir/context-rag";');
328
+ console.log(" const mismatch = await detectEmbeddingMismatch(prisma, provider);");
329
+ console.log();
330
+ } catch (error) {
331
+ console.error("\u274C Error:", error.message);
332
+ process.exit(1);
333
+ }
334
+ });
335
+ program.command("reindex").description("Re-index all chunks with current embedding model").option("-c, --concurrency <number>", "Number of concurrent embedding calls", "5").option("-b, --batch-size <number>", "Batch size for processing", "50").option("-d, --document-id <id>", "Re-index specific document only").action(async (options) => {
336
+ console.log("\u{1F504} Starting re-indexing operation...\n");
337
+ console.log("Options:");
338
+ console.log(` Concurrency: ${options.concurrency}`);
339
+ console.log(` Batch size: ${options.batchSize}`);
340
+ if (options.documentId) {
341
+ console.log(` Document ID: ${options.documentId}`);
342
+ }
343
+ console.log();
344
+ console.log("\u26A0\uFE0F Re-indexing requires database connection and embedding provider.");
345
+ console.log(" Use this command programmatically:\n");
346
+ console.log("Example:");
347
+ console.log(' import { MigrationService } from "@msbayindir/context-rag";');
348
+ console.log(" const migrationService = new MigrationService(prisma, provider, config, logger);");
349
+ console.log(" const result = await migrationService.reindex({");
350
+ console.log(` concurrency: ${options.concurrency},`);
351
+ console.log(` batchSize: ${options.batchSize},`);
352
+ console.log(" onProgress: (p) => console.log(`${p.processed}/${p.total}`)");
353
+ console.log(" });");
354
+ console.log();
355
+ });
217
356
  program.parse();
218
357
  //# sourceMappingURL=cli.js.map
219
358
  //# sourceMappingURL=cli.js.map