@msbayindir/context-rag 1.0.0-beta.10 → 1.0.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +139 -15
- package/dist/bin/cli.cjs +140 -1
- package/dist/bin/cli.cjs.map +1 -1
- package/dist/bin/cli.js +140 -1
- package/dist/bin/cli.js.map +1 -1
- package/dist/index.cjs +526 -36
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.js +525 -36
- package/dist/index.js.map +1 -1
- package/package.json +97 -93
- package/prisma/schema.prisma +8 -0
package/README.md
CHANGED
|
@@ -324,6 +324,12 @@ npx @msbayindir/context-rag init --force
|
|
|
324
324
|
|
|
325
325
|
# Check setup status (Prisma models, pgvector, env variables)
|
|
326
326
|
npx @msbayindir/context-rag status
|
|
327
|
+
|
|
328
|
+
# Check for embedding model mismatches
|
|
329
|
+
npx @msbayindir/context-rag check-embeddings
|
|
330
|
+
|
|
331
|
+
# Re-index documents (useful after changing embedding models)
|
|
332
|
+
npx @msbayindir/context-rag reindex --concurrency 5
|
|
327
333
|
```
|
|
328
334
|
|
|
329
335
|
---
|
|
@@ -593,35 +599,153 @@ await rag.ingest({
|
|
|
593
599
|
|
|
594
600
|
## ⚙️ Configuration
|
|
595
601
|
|
|
602
|
+
```typescript
|
|
603
|
+
## ⚙️ Configuration Reference
|
|
604
|
+
|
|
605
|
+
Context-RAG is highly configurable. Below is the complete list of all available options.
|
|
606
|
+
|
|
596
607
|
```typescript
|
|
597
608
|
const rag = new ContextRAG({
|
|
598
|
-
//
|
|
609
|
+
// ============================================
|
|
610
|
+
// CORE CONFIGURATION (Required)
|
|
611
|
+
// ============================================
|
|
612
|
+
|
|
613
|
+
/** Your initialized Prisma client instance */
|
|
599
614
|
prisma: prismaClient,
|
|
600
|
-
geminiApiKey: 'your-api-key',
|
|
601
615
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
616
|
+
/** Gemini API Key (Required for generation and default embeddings) */
|
|
617
|
+
geminiApiKey: process.env.GEMINI_API_KEY!,
|
|
618
|
+
|
|
619
|
+
// ============================================
|
|
620
|
+
// MODEL SELECTION
|
|
621
|
+
// ============================================
|
|
605
622
|
|
|
606
|
-
|
|
623
|
+
/**
|
|
624
|
+
* Main LLM model for generation, orchestration, and RAG enhancement.
|
|
625
|
+
* Default: 'gemini-1.5-pro'
|
|
626
|
+
*/
|
|
627
|
+
model: 'gemini-1.5-pro', // Options: 'gemini-1.5-flash', 'gemini-2.0-flash-exp', etc.
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Configuration for the LLM generation (temperature, tokens, etc.)
|
|
631
|
+
*/
|
|
607
632
|
generationConfig: {
|
|
608
|
-
temperature: 0.
|
|
609
|
-
maxOutputTokens:
|
|
633
|
+
temperature: 0.3, // Creativity (0.0 - 1.0). Lower is more deterministic.
|
|
634
|
+
maxOutputTokens: 8192, // Maximum length of the generated response.
|
|
635
|
+
},
|
|
636
|
+
|
|
637
|
+
// ============================================
|
|
638
|
+
// EMBEDDING PROVIDER (Optional)
|
|
639
|
+
// ============================================
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* Choose your embedding provider.
|
|
643
|
+
* Default: Uses Gemini 'text-embedding-004'
|
|
644
|
+
*/
|
|
645
|
+
embeddingProvider: {
|
|
646
|
+
// Provider: 'gemini' | 'openai' | 'cohere'
|
|
647
|
+
provider: 'openai',
|
|
648
|
+
|
|
649
|
+
// Model name (specific to the provider)
|
|
650
|
+
model: 'text-embedding-3-small',
|
|
651
|
+
|
|
652
|
+
// API Key (if different from geminiApiKey)
|
|
653
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
610
654
|
},
|
|
611
655
|
|
|
612
|
-
//
|
|
656
|
+
// ============================================
|
|
657
|
+
// SYSTEM CONFIGURATION
|
|
658
|
+
// ============================================
|
|
659
|
+
|
|
660
|
+
/**
|
|
661
|
+
* Batch processing settings for ingestion.
|
|
662
|
+
* Adjust these based on your API rate limits.
|
|
663
|
+
*/
|
|
613
664
|
batchConfig: {
|
|
614
|
-
pagesPerBatch: 15,
|
|
615
|
-
maxConcurrency: 3,
|
|
616
|
-
maxRetries: 3,
|
|
665
|
+
pagesPerBatch: 15, // How many pages to process in one go (Default: 15)
|
|
666
|
+
maxConcurrency: 3, // How many batches to run in parallel (Default: 3)
|
|
667
|
+
maxRetries: 3, // Retry failed batches (Default: 3)
|
|
668
|
+
retryDelayMs: 1000, // Initial delay before retry (Default: 1000ms)
|
|
669
|
+
backoffMultiplier: 2, // Exponential backoff factor (Default: 2)
|
|
670
|
+
},
|
|
671
|
+
|
|
672
|
+
/**
|
|
673
|
+
* Settings for splitting text into vector chunks.
|
|
674
|
+
*/
|
|
675
|
+
chunkConfig: {
|
|
676
|
+
maxTokens: 500, // Maximum size of a single chunk (Default: 500)
|
|
677
|
+
overlapTokens: 50, // Overlap between chunks to preserve continuity (Default: 50)
|
|
678
|
+
},
|
|
679
|
+
|
|
680
|
+
/**
|
|
681
|
+
* API Rate Limiting protection.
|
|
682
|
+
*/
|
|
683
|
+
rateLimitConfig: {
|
|
684
|
+
requestsPerMinute: 60, // Max RPM allowed (Default: 60)
|
|
685
|
+
adaptive: true, // Automatically slow down if 429 errors occur (Default: true)
|
|
686
|
+
},
|
|
687
|
+
|
|
688
|
+
/**
|
|
689
|
+
* System logging configuration.
|
|
690
|
+
*/
|
|
691
|
+
logging: {
|
|
692
|
+
level: 'info', // 'debug' | 'info' | 'warn' | 'error'
|
|
693
|
+
structured: true, // Use JSON format for logs (Best for production tools like Datadog/CloudWatch)
|
|
617
694
|
},
|
|
618
695
|
|
|
619
|
-
//
|
|
696
|
+
// ============================================
|
|
697
|
+
// ADVANCED FEATURES
|
|
698
|
+
// ============================================
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* Reranking improves search relevance by re-scoring results.
|
|
702
|
+
*/
|
|
703
|
+
rerankingConfig: {
|
|
704
|
+
enabled: true, // Enable automatic reranking (Default: false)
|
|
705
|
+
provider: 'cohere', // 'gemini' or 'cohere' (Cohere is recommended for best results)
|
|
706
|
+
cohereApiKey: process.env.COHERE_API_KEY, // Required if provider is 'cohere'
|
|
707
|
+
defaultCandidates: 50, // Retrieve top 50 from Vector DB...
|
|
708
|
+
defaultTopK: 10, // ...and return top 10 after reranking.
|
|
709
|
+
},
|
|
710
|
+
|
|
711
|
+
/**
|
|
712
|
+
* RAG Enhancement (Contextual Retrieval).
|
|
713
|
+
* Adds context to chunks before embedding them.
|
|
714
|
+
*/
|
|
620
715
|
ragEnhancement: {
|
|
716
|
+
// Approach: 'anthropic_contextual' (Recommended) or 'none'
|
|
621
717
|
approach: 'anthropic_contextual',
|
|
622
|
-
|
|
623
|
-
|
|
718
|
+
|
|
719
|
+
// Strategy: 'llm' (Best Quality) or 'simple' (Template based)
|
|
720
|
+
strategy: 'llm',
|
|
721
|
+
|
|
722
|
+
// Model to use for generating context (Optional, defaults to main model)
|
|
723
|
+
// Tip: Use a cheaper model here (e.g., 'gemini-1.5-flash') to save costs.
|
|
724
|
+
model: 'gemini-1.5-flash',
|
|
725
|
+
|
|
726
|
+
// Prompt used to generate context (Optional, has good default)
|
|
727
|
+
contextPrompt: 'Situate this chunk within the document...',
|
|
728
|
+
|
|
729
|
+
// Don't waste tokens generating context for these types
|
|
730
|
+
skipChunkTypes: ['HEADING', 'IMAGE_REF', 'CODE'],
|
|
624
731
|
},
|
|
732
|
+
|
|
733
|
+
/**
|
|
734
|
+
* Enable Structured Output (JSON Schema) for reliable parsing.
|
|
735
|
+
* Disable only if you are using a model that doesn't support it well.
|
|
736
|
+
* Default: true
|
|
737
|
+
*/
|
|
738
|
+
useStructuredOutput: true,
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* Custom Chunk Type Mapping.
|
|
742
|
+
* Map your custom extraction types to system types for proper handling.
|
|
743
|
+
*/
|
|
744
|
+
chunkTypeMapping: {
|
|
745
|
+
'RECIPE': 'TEXT', // Treat 'RECIPE' as normal text
|
|
746
|
+
'INGREDIENT_LIST': 'LIST', // Treat 'INGREDIENT_LIST' as a list
|
|
747
|
+
'NUTRITIONAL_INFO': 'TABLE' // Treat 'NUTRITIONAL_INFO' as a table
|
|
748
|
+
}
|
|
625
749
|
});
|
|
626
750
|
```
|
|
627
751
|
|
package/dist/bin/cli.cjs
CHANGED
|
@@ -26,8 +26,111 @@ function _interopNamespace(e) {
|
|
|
26
26
|
var fs__namespace = /*#__PURE__*/_interopNamespace(fs);
|
|
27
27
|
var path__namespace = /*#__PURE__*/_interopNamespace(path);
|
|
28
28
|
|
|
29
|
+
var __defProp = Object.defineProperty;
|
|
30
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
31
|
+
var __esm = (fn, res) => function __init() {
|
|
32
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
33
|
+
};
|
|
34
|
+
var __export = (target, all) => {
|
|
35
|
+
for (var name in all)
|
|
36
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// src/utils/embedding-utils.ts
|
|
40
|
+
var embedding_utils_exports = {};
|
|
41
|
+
__export(embedding_utils_exports, {
|
|
42
|
+
buildEmbeddingModelId: () => buildEmbeddingModelId,
|
|
43
|
+
checkEmbeddingMismatch: () => checkEmbeddingMismatch,
|
|
44
|
+
detectEmbeddingMismatch: () => detectEmbeddingMismatch,
|
|
45
|
+
getEmbeddingModelStats: () => getEmbeddingModelStats
|
|
46
|
+
});
|
|
47
|
+
async function detectEmbeddingMismatch(prisma, currentProvider) {
|
|
48
|
+
const result = await checkEmbeddingMismatch(prisma, currentProvider);
|
|
49
|
+
if (!result.hasMismatch) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
const severity = determineSeverity(result);
|
|
53
|
+
const message = buildMismatchMessage(result, severity);
|
|
54
|
+
return {
|
|
55
|
+
severity,
|
|
56
|
+
message,
|
|
57
|
+
details: result,
|
|
58
|
+
action: severity === "critical" ? "reindex-required" : "reindex"
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
async function checkEmbeddingMismatch(prisma, currentProvider) {
|
|
62
|
+
const stats = await getEmbeddingModelStats(prisma);
|
|
63
|
+
const totalChunks = await prisma.contextRagChunk.count();
|
|
64
|
+
let chunksToMigrate = 0;
|
|
65
|
+
for (const stat of stats) {
|
|
66
|
+
if (stat.model !== currentProvider.id) {
|
|
67
|
+
chunksToMigrate += stat.count;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
const hasMismatch = chunksToMigrate > 0;
|
|
71
|
+
return {
|
|
72
|
+
hasMismatch,
|
|
73
|
+
currentModel: currentProvider.id,
|
|
74
|
+
currentProvider: extractProviderType(currentProvider.id),
|
|
75
|
+
currentDimension: currentProvider.dimension,
|
|
76
|
+
existingModels: stats,
|
|
77
|
+
chunksToMigrate,
|
|
78
|
+
totalChunks
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
async function getEmbeddingModelStats(prisma) {
|
|
82
|
+
const results = await prisma.$queryRaw`
|
|
83
|
+
SELECT
|
|
84
|
+
embedding_model,
|
|
85
|
+
embedding_dimension,
|
|
86
|
+
COUNT(*) as count
|
|
87
|
+
FROM context_rag_chunks
|
|
88
|
+
GROUP BY embedding_model, embedding_dimension
|
|
89
|
+
ORDER BY count DESC
|
|
90
|
+
`;
|
|
91
|
+
return results.map((r) => ({
|
|
92
|
+
model: r.embedding_model,
|
|
93
|
+
dimension: r.embedding_dimension,
|
|
94
|
+
count: Number(r.count)
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
function determineSeverity(result) {
|
|
98
|
+
if (!result.hasMismatch) {
|
|
99
|
+
return "none";
|
|
100
|
+
}
|
|
101
|
+
for (const stat of result.existingModels) {
|
|
102
|
+
if (stat.dimension !== null && stat.dimension !== result.currentDimension) {
|
|
103
|
+
return "critical";
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
const mismatchPercentage = result.chunksToMigrate / result.totalChunks * 100;
|
|
107
|
+
if (mismatchPercentage > 50) {
|
|
108
|
+
return "critical";
|
|
109
|
+
}
|
|
110
|
+
return "warning";
|
|
111
|
+
}
|
|
112
|
+
function buildMismatchMessage(result, severity) {
|
|
113
|
+
const percentage = Math.round(result.chunksToMigrate / result.totalChunks * 100);
|
|
114
|
+
if (severity === "critical") {
|
|
115
|
+
return `\u26A0\uFE0F CRITICAL: ${result.chunksToMigrate} chunks (${percentage}%) were created with different embedding models. Current: ${result.currentModel} (${result.currentDimension}d). Search results may be inaccurate. Run 'npx context-rag reindex' to fix.`;
|
|
116
|
+
}
|
|
117
|
+
return `\u26A1 Warning: ${result.chunksToMigrate} chunks (${percentage}%) may have outdated embeddings. Current model: ${result.currentModel}. Consider running 'npx context-rag reindex' for optimal results.`;
|
|
118
|
+
}
|
|
119
|
+
function extractProviderType(modelId) {
|
|
120
|
+
if (modelId.startsWith("gemini")) return "gemini";
|
|
121
|
+
if (modelId.startsWith("openai") || modelId.includes("text-embedding")) return "openai";
|
|
122
|
+
if (modelId.startsWith("cohere") || modelId.includes("embed-")) return "cohere";
|
|
123
|
+
return "gemini";
|
|
124
|
+
}
|
|
125
|
+
function buildEmbeddingModelId(provider, model) {
|
|
126
|
+
return `${provider}-${model}`;
|
|
127
|
+
}
|
|
128
|
+
var init_embedding_utils = __esm({
|
|
129
|
+
"src/utils/embedding-utils.ts"() {
|
|
130
|
+
}
|
|
131
|
+
});
|
|
29
132
|
var program = new commander.Command();
|
|
30
|
-
program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.
|
|
133
|
+
program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.11");
|
|
31
134
|
program.command("init").description("Initialize Context-RAG in your project").option("-f, --force", "Overwrite existing files").action(async (options) => {
|
|
32
135
|
console.log("\u{1F680} Initializing Context-RAG...\n");
|
|
33
136
|
try {
|
|
@@ -237,6 +340,42 @@ program.command("status").description("Check Context-RAG setup status").action(a
|
|
|
237
340
|
console.log(` ${process.env["COHERE_API_KEY"] ? "\u2705" : "\u26AA"} COHERE_API_KEY (optional)`);
|
|
238
341
|
console.log();
|
|
239
342
|
});
|
|
343
|
+
program.command("check-embeddings").description("Check for embedding model mismatch between config and database").action(async () => {
|
|
344
|
+
console.log("\u{1F50D} Checking embedding model status...\n");
|
|
345
|
+
try {
|
|
346
|
+
void await Promise.resolve().then(() => (init_embedding_utils(), embedding_utils_exports));
|
|
347
|
+
console.log("\u26A0\uFE0F Full mismatch detection requires database connection.");
|
|
348
|
+
console.log(" Use this command programmatically with your Prisma client.\n");
|
|
349
|
+
console.log("Example:");
|
|
350
|
+
console.log(' import { detectEmbeddingMismatch } from "@msbayindir/context-rag";');
|
|
351
|
+
console.log(" const mismatch = await detectEmbeddingMismatch(prisma, provider);");
|
|
352
|
+
console.log();
|
|
353
|
+
} catch (error) {
|
|
354
|
+
console.error("\u274C Error:", error.message);
|
|
355
|
+
process.exit(1);
|
|
356
|
+
}
|
|
357
|
+
});
|
|
358
|
+
program.command("reindex").description("Re-index all chunks with current embedding model").option("-c, --concurrency <number>", "Number of concurrent embedding calls", "5").option("-b, --batch-size <number>", "Batch size for processing", "50").option("-d, --document-id <id>", "Re-index specific document only").action(async (options) => {
|
|
359
|
+
console.log("\u{1F504} Starting re-indexing operation...\n");
|
|
360
|
+
console.log("Options:");
|
|
361
|
+
console.log(` Concurrency: ${options.concurrency}`);
|
|
362
|
+
console.log(` Batch size: ${options.batchSize}`);
|
|
363
|
+
if (options.documentId) {
|
|
364
|
+
console.log(` Document ID: ${options.documentId}`);
|
|
365
|
+
}
|
|
366
|
+
console.log();
|
|
367
|
+
console.log("\u26A0\uFE0F Re-indexing requires database connection and embedding provider.");
|
|
368
|
+
console.log(" Use this command programmatically:\n");
|
|
369
|
+
console.log("Example:");
|
|
370
|
+
console.log(' import { MigrationService } from "@msbayindir/context-rag";');
|
|
371
|
+
console.log(" const migrationService = new MigrationService(prisma, provider, config, logger);");
|
|
372
|
+
console.log(" const result = await migrationService.reindex({");
|
|
373
|
+
console.log(` concurrency: ${options.concurrency},`);
|
|
374
|
+
console.log(` batchSize: ${options.batchSize},`);
|
|
375
|
+
console.log(" onProgress: (p) => console.log(`${p.processed}/${p.total}`)");
|
|
376
|
+
console.log(" });");
|
|
377
|
+
console.log();
|
|
378
|
+
});
|
|
240
379
|
program.parse();
|
|
241
380
|
//# sourceMappingURL=cli.cjs.map
|
|
242
381
|
//# sourceMappingURL=cli.cjs.map
|
package/dist/bin/cli.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/bin/cli.ts"],"names":["Command","path","fs"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAMA,IAAM,OAAA,GAAU,IAAIA,iBAAA,EAAQ;AAE5B,OAAA,CACK,KAAK,aAAa,CAAA,CAClB,YAAY,8CAA8C,CAAA,CAC1D,QAAQ,cAAc,CAAA;AAE3B,OAAA,CACK,OAAA,CAAQ,MAAM,CAAA,CACd,WAAA,CAAY,wCAAwC,CAAA,CACpD,MAAA,CAAO,aAAA,EAAe,0BAA0B,CAAA,CAChD,MAAA,CAAO,OAAO,OAAA,KAAY;AACvB,EAAA,OAAA,CAAQ,IAAI,yCAAkC,CAAA;AAE9C,EAAA,IAAI;AAEA,IAAA,MAAM,SAAA,GAAiBC,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,IAAO,QAAQ,CAAA;AACnD,IAAA,MAAM,UAAA,GAAkBA,eAAA,CAAA,IAAA,CAAK,SAAA,EAAW,eAAe,CAAA;AAEvD,IAAA,IAAI,YAAA,GAAe,KAAA;AACnB,IAAA,IAAI;AACA,MAAA,MAASC,qBAAO,UAAU,CAAA;AAC1B,MAAA,YAAA,GAAe,IAAA;AAAA,IACnB,CAAA,CAAA,MAAQ;AACJ,MAAA,YAAA,GAAe,KAAA;AAAA,IACnB;AAEA,IAAA,IAAI,CAAC,YAAA,EAAc;AACf,MAAA,OAAA,CAAQ,IAAI,wDAAmD,CAAA;AAC/D,MAAA,OAAA,CAAQ,IAAI,0CAA0C,CAAA;AACtD,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,MAAM,cAAA,GAAiB,MAASA,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAG5D,IAAA,IAAI,eAAe,QAAA,CAAS,iBAAiB,CAAA,IAAK,CAAC,QAAQ,KAAA,EAAO;AAC9D,MAAA,OAAA,CAAQ,IAAI,2DAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,IAAI,CAAC,cAAA,CAAe,QAAA,CAAS,sBAAsB,CAAA,EAAG;AAClD,MAAA,OAAA,CAAQ,IAAI,wDAA8C,CAAA;AAC1D,MAAA,OAAA,CAAQ,IAAI,+CAA+C,CAAA;AAC3D,MAAA,OAAA,CAAQ,IAAI,uBAAuB,CAAA;AACnC,MAAA,OAAA,CAAQ,IAAI,oCAAoC,CAAA;AAChD,MAAA,OAAA,CAAQ,IAAI,iDAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AACpB,MAAA,OAAA,CAAQ,IAAI,oBAAoB,CAAA;AAChC,MAAA,OAAA,CAAQ,IAAI,8BAA8B,CAAA;AAC1C,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,IAAI,4BAA4B,CAAA;AACxC,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AAAA,IACxB;AAGA,IAAA,MAAM,gBAAA,GAAmB;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,CAAA;AAwIzB,IAAA,IAAI,SAAA,GAAY,cAAA;AAChB,IAAA,IAAI,OAAA,CAAQ,KAAA,IAAS,cAAA,CAAe,QAAA,CAAS,uBAAuB,CAAA,EAAG;AACnE,MAAA,MAAM,WAAA,GAAc,wEAAA;AACpB,MAAA,MAAM,UAAA,GAAa,SAAA,CAAU,OAAA,CAAQ,WAAW,CAAA;AAChD,MAAA,IAAI,eAAe,CAAA,CAAA,EAAI;AACnB,QAAA,SAAA,GAAY,SAAA,CAAU,SAAA,CAAU,CAAA,EAAG,UAAU,EAAE,IAAA,EAAK;AAAA,MACxD;AAAA,IACJ;AAGA,IAAA,SAAA,GAAY,SAAA,CAAU,IAAA,EAAK,GAAI,IAAA,GAAO,gBAAA;AAGtC,IAAA,MAASA,aAAA,CAAA,SAAA,CAAU,YAAY,SAAS,CAAA;AAExC,IAAA,OAAA,CAAQ,IAAI,2DAAsD,CAAA;AAClE,IAAA,OAAA,CAAQ,IAAI,aAAa,CAAA;AACzB,IAAA,OAAA,CAAQ,IAAI,yDAAyD,CAAA;AACrE,IAAA,OAAA,CAAQ,IAAI,4EAA4E,CAAA;AACxF,IAAA,OAAA,CAAQ,IAAI,iCAAiC,CAAA;AAAA,EAEjD,SAAS,KAAA,EAAO;AACZ,IAAA,OAAA,CAAQ,KAAA,CAAM,eAAA,EAAa,KAAA,CAAgB,OAAO,CAAA;AAClD,IAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,EAClB;AACJ,CAAC,CAAA;AAEL,OAAA,CACK,QAAQ,QAAQ,CAAA,CAChB,YAAY,gCAAgC,CAAA,CAC5C,OAAO,YAAY;AAChB,EAAA,OAAA,CAAQ,IAAI,4CAAqC,CAAA;AAGjD,EAAA,MAAM,aAAkBD,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,EAAI,EAAG,UAAU,eAAe,CAAA;AACrE,EAAA,IAAI;AACA,IAAA,MAAM,MAAA,GAAS,MAASC,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAEpD,IAAA,OAAA,CAAQ,IAAI,gBAAgB,CAAA;AAC5B,IAAA,OAAA,CAAQ,IAAI,CAAA,4BAAA,CAAyB,CAAA;AACrC,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,iBAAiB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACpF,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,sBAAsB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACzF,IAAA,OAAA,CAAQ,GAAA,EAAI;AAAA,EAChB,CAAA,CAAA,MAAQ;AACJ,IAAA,OAAA,CAAQ,IAAI,yCAAoC,CAAA;AAAA,EACpD;AAIA,EAAA,OAAA,CAAQ,IAAI,cAAc,CAAA;AAC1B,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,cAAc,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,aAAA,CAAe,CAAA;AACvE,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,eAAA,CAAiB,CAAA;AAC3E,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,0BAAA,CAA4B,CAAA;AACtF,EAAA,OAAA,CAAQ,GAAA,EAAI;AAChB,CAAC,CAAA;AAEL,OAAA,CAAQ,KAAA,EAAM","file":"cli.cjs","sourcesContent":["#!/usr/bin/env node\r\n\r\nimport { Command } from 'commander';\r\nimport * as fs from 'fs/promises';\r\nimport * as path from 'path';\r\n\r\nconst program = new Command();\r\n\r\nprogram\r\n .name('context-rag')\r\n .description('Context-RAG CLI - Setup and management tools')\r\n .version('1.0.0-beta.1');\r\n\r\nprogram\r\n .command('init')\r\n .description('Initialize Context-RAG in your project')\r\n .option('-f, --force', 'Overwrite existing files')\r\n .action(async (options) => {\r\n console.log('🚀 Initializing Context-RAG...\\n');\r\n\r\n try {\r\n // Check if prisma directory exists\r\n const prismaDir = path.join(process.cwd(), 'prisma');\r\n const schemaPath = path.join(prismaDir, 'schema.prisma');\r\n\r\n let schemaExists = false;\r\n try {\r\n await fs.access(schemaPath);\r\n schemaExists = true;\r\n } catch {\r\n schemaExists = false;\r\n }\r\n\r\n if (!schemaExists) {\r\n console.log('❌ Prisma schema not found at prisma/schema.prisma');\r\n console.log(' Please run `npx prisma init` first.\\n');\r\n process.exit(1);\r\n }\r\n\r\n // Read existing schema\r\n const existingSchema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n // Check if Context-RAG models already exist\r\n if (existingSchema.includes('ContextRagChunk') && !options.force) {\r\n console.log('⚠️ Context-RAG models already exist in schema.');\r\n console.log(' Use --force to overwrite.\\n');\r\n process.exit(0);\r\n }\r\n\r\n // Check for pgvector extension\r\n if (!existingSchema.includes('postgresqlExtensions')) {\r\n console.log('⚠️ Warning: pgvector extension not enabled.');\r\n console.log(' Add the following to your schema.prisma:\\n');\r\n console.log(' generator client {');\r\n console.log(' provider = \"prisma-client-js\"');\r\n console.log(' previewFeatures = [\"postgresqlExtensions\"]');\r\n console.log(' }\\n');\r\n console.log(' datasource db {');\r\n console.log(' provider = \"postgresql\"');\r\n console.log(' url = env(\"DATABASE_URL\")');\r\n console.log(' extensions = [vector]');\r\n console.log(' }\\n');\r\n }\r\n\r\n // Context-RAG models to append\r\n const contextRagModels = `\r\n// ============================================\r\n// Context-RAG Models\r\n// ============================================\r\n\r\n/// Stores prompt configurations for different document types\r\nmodel ContextRagPromptConfig {\r\n id String @id @default(uuid())\r\n documentType String @map(\"document_type\")\r\n name String\r\n systemPrompt String @map(\"system_prompt\") @db.Text\r\n chunkStrategy Json @map(\"chunk_strategy\")\r\n version Int @default(1)\r\n isActive Boolean @default(true) @map(\"is_active\")\r\n isDefault Boolean @default(false) @map(\"is_default\")\r\n createdBy String? @map(\"created_by\")\r\n changeLog String? @map(\"change_log\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n chunks ContextRagChunk[]\r\n\r\n @@unique([documentType, version])\r\n @@index([documentType, isActive])\r\n @@map(\"context_rag_prompt_configs\")\r\n}\r\n\r\n/// Stores vector chunks for semantic search\r\nmodel ContextRagChunk {\r\n id String @id @default(uuid())\r\n promptConfigId String @map(\"prompt_config_id\")\r\n promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)\r\n documentId String @map(\"document_id\")\r\n chunkIndex Int @map(\"chunk_index\")\r\n chunkType String @map(\"chunk_type\")\r\n\r\n /// Plain text content optimized for vector search\r\n searchContent String @map(\"search_content\") @db.Text\r\n\r\n /// Enriched content: context + searchContent (for RAG enhancement)\r\n enrichedContent String? @map(\"enriched_content\") @db.Text\r\n\r\n /// AI-generated context text only (for debugging)\r\n contextText String? @map(\"context_text\") @db.Text\r\n\r\n /// Vector embedding (768 dimensions for Gemini)\r\n searchVector Unsupported(\"vector(768)\") @map(\"search_vector\")\r\n\r\n /// Rich Markdown content for display\r\n displayContent String @map(\"display_content\") @db.Text\r\n\r\n sourcePageStart Int @map(\"source_page_start\")\r\n sourcePageEnd Int @map(\"source_page_end\")\r\n confidenceScore Float @default(0.5) @map(\"confidence_score\")\r\n metadata Json\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n\r\n @@index([promptConfigId])\r\n @@index([documentId])\r\n @@index([chunkType])\r\n @@index([confidenceScore])\r\n @@map(\"context_rag_chunks\")\r\n}\r\n\r\n/// Tracks document processing state\r\nmodel ContextRagDocument {\r\n id String @id @default(uuid())\r\n filename String\r\n fileHash String @map(\"file_hash\")\r\n fileSize Int @map(\"file_size\")\r\n pageCount Int @map(\"page_count\")\r\n documentType String? @map(\"document_type\")\r\n\r\n /// Experiment identifier for A/B testing models\r\n experimentId String? @map(\"experiment_id\")\r\n\r\n /// AI model used for processing\r\n modelName String? @map(\"model_name\")\r\n\r\n /// Model configuration as JSON\r\n modelConfig Json? @map(\"model_config\")\r\n\r\n status String @default(\"PENDING\")\r\n\r\n promptConfigId String? @map(\"prompt_config_id\")\r\n totalBatches Int @default(0) @map(\"total_batches\")\r\n completedBatches Int @default(0) @map(\"completed_batches\")\r\n failedBatches Int @default(0) @map(\"failed_batches\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n errorMessage String? @map(\"error_message\")\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n\r\n batches ContextRagBatch[]\r\n\r\n @@unique([fileHash, experimentId])\r\n @@index([status])\r\n @@index([fileHash])\r\n @@index([documentType])\r\n @@index([experimentId])\r\n @@map(\"context_rag_documents\")\r\n}\r\n\r\n/// Tracks individual batch processing jobs\r\nmodel ContextRagBatch {\r\n id String @id @default(uuid())\r\n documentId String @map(\"document_id\")\r\n document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)\r\n\r\n batchIndex Int @map(\"batch_index\")\r\n pageStart Int @map(\"page_start\")\r\n pageEnd Int @map(\"page_end\")\r\n status String @default(\"PENDING\")\r\n retryCount Int @default(0) @map(\"retry_count\")\r\n lastError String? @map(\"last_error\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n\r\n startedAt DateTime? @map(\"started_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n @@unique([documentId, batchIndex])\r\n @@index([documentId, status])\r\n @@index([status])\r\n @@map(\"context_rag_batches\")\r\n}\r\n`;\r\n\r\n // Remove existing Context-RAG models if force\r\n let newSchema = existingSchema;\r\n if (options.force && existingSchema.includes('// Context-RAG Models')) {\r\n const startMarker = '// ============================================\\n// Context-RAG Models';\r\n const startIndex = newSchema.indexOf(startMarker);\r\n if (startIndex !== -1) {\r\n newSchema = newSchema.substring(0, startIndex).trim();\r\n }\r\n }\r\n\r\n // Append new models\r\n newSchema = newSchema.trim() + '\\n' + contextRagModels;\r\n\r\n // Write updated schema\r\n await fs.writeFile(schemaPath, newSchema);\r\n\r\n console.log('✅ Context-RAG models added to prisma/schema.prisma\\n');\r\n console.log('Next steps:');\r\n console.log(' 1. Run: npx prisma migrate dev --name add_context_rag');\r\n console.log(' 2. Enable pgvector in PostgreSQL: CREATE EXTENSION IF NOT EXISTS vector;');\r\n console.log(' 3. Start using Context-RAG!\\n');\r\n\r\n } catch (error) {\r\n console.error('❌ Error:', (error as Error).message);\r\n process.exit(1);\r\n }\r\n });\r\n\r\nprogram\r\n .command('status')\r\n .description('Check Context-RAG setup status')\r\n .action(async () => {\r\n console.log('🔍 Checking Context-RAG status...\\n');\r\n\r\n // Check schema\r\n const schemaPath = path.join(process.cwd(), 'prisma', 'schema.prisma');\r\n try {\r\n const schema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n console.log('Prisma Schema:');\r\n console.log(` ✅ schema.prisma found`);\r\n console.log(` ${schema.includes('ContextRagChunk') ? '✅' : '❌'} Context-RAG models`);\r\n console.log(` ${schema.includes('postgresqlExtensions') ? '✅' : '❌'} pgvector extension`);\r\n console.log();\r\n } catch {\r\n console.log('❌ prisma/schema.prisma not found\\n');\r\n }\r\n\r\n // Check env - note: these are checked at runtime, not via centralized env\r\n // since CLI may run before env is fully configured\r\n console.log('Environment:');\r\n console.log(` ${process.env['DATABASE_URL'] ? '✅' : '❌'} DATABASE_URL`);\r\n console.log(` ${process.env['GEMINI_API_KEY'] ? '✅' : '❌'} GEMINI_API_KEY`);\r\n console.log(` ${process.env['COHERE_API_KEY'] ? '✅' : '⚪'} COHERE_API_KEY (optional)`);\r\n console.log();\r\n });\r\n\r\nprogram.parse();\r\n"]}
|
|
1
|
+
{"version":3,"sources":["../../src/utils/embedding-utils.ts","../../src/bin/cli.ts"],"names":["Command","path","fs"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,IAAA,uBAAA,GAAA,EAAA;AAAA,QAAA,CAAA,uBAAA,EAAA;AAAA,EAAA,qBAAA,EAAA,MAAA,qBAAA;AAAA,EAAA,sBAAA,EAAA,MAAA,sBAAA;AAAA,EAAA,uBAAA,EAAA,MAAA,uBAAA;AAAA,EAAA,sBAAA,EAAA,MAAA;AAAA,CAAA,CAAA;AAsBA,eAAsB,uBAAA,CAClB,QACA,eAAA,EAC4B;AAC5B,EAAA,MAAM,MAAA,GAAS,MAAM,sBAAA,CAAuB,MAAA,EAAQ,eAAe,CAAA;AAEnE,EAAA,IAAI,CAAC,OAAO,WAAA,EAAa;AACrB,IAAA,OAAO,IAAA;AAAA,EACX;AAEA,EAAA,MAAM,QAAA,GAAW,kBAAkB,MAAM,CAAA;AACzC,EAAA,MAAM,OAAA,GAAU,oBAAA,CAAqB,MAAA,EAAQ,QAAQ,CAAA;AAErD,EAAA,OAAO;AAAA,IACH,QAAA;AAAA,IACA,OAAA;AAAA,IACA,OAAA,EAAS,MAAA;AAAA,IACT,MAAA,EAAQ,QAAA,KAAa,UAAA,GAAa,kBAAA,GAAqB;AAAA,GAC3D;AACJ;AAKA,eAAsB,sBAAA,CAClB,QACA,eAAA,EACuB;AAEvB,EAAA,MAAM,KAAA,GAAQ,MAAM,sBAAA,CAAuB,MAAM,CAAA;AAGjD,EAAA,MAAM,WAAA,GAAc,MAAM,MAAA,CAAO,eAAA,CAAgB,KAAA,EAAM;AAGvD,EAAA,IAAI,eAAA,GAAkB,CAAA;AACtB,EAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AAEtB,IAAA,IAAI,IAAA,CAAK,KAAA,KAAU,eAAA,CAAgB,EAAA,EAAI;AACnC,MAAA,eAAA,IAAmB,IAAA,CAAK,KAAA;AAAA,IAC5B;AAAA,EACJ;AAEA,EAAA,MAAM,cAAc,eAAA,GAAkB,CAAA;AAEtC,EAAA,OAAO;AAAA,IACH,WAAA;AAAA,IACA,cAAc,eAAA,CAAgB,EAAA;AAAA,IAC9B,eAAA,EAAiB,mBAAA,CAAoB,eAAA,CAAgB,EAAE,CAAA;AAAA,IACvD,kBAAkB,eAAA,CAAgB,SAAA;AAAA,IAClC,cAAA,EAAgB,KAAA;AAAA,IAChB,eAAA;AAAA,IACA;AAAA,GACJ;AACJ;AAKA,eAAsB,uBAClB,MAAA,EAC8B;AAE9B,EAAA,MAAM,OAAA,GAAU,MAAM,MAAA,CAAO,SAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,IAAA,CAAA;AAc7B,EAAA,OAAO,OAAA,CAAQ,IAAI,CAAA,CAAA,MAAM;AAAA,IACrB,OAAO,CAAA,CAAE,eAAA;AAAA,IACT,WAAW,CAAA,CAAE,mBAAA;AAAA,IACb,KAAA,EAAO,MAAA,CAAO,CAAA,CAAE,KAAK;AAAA,GACzB,CAAE,CAAA;AACN;AAKA,SAAS,kBAAkB,MAAA,EAA0C;AACjE,EAAA,IAAI,CAAC,OAAO,WAAA,EAAa;AACrB,IAAA,OAAO,MAAA;AAAA,EACX;AAGA,EAAA,KAAA,MAAW,IAAA,IAAQ,OAAO,cAAA,EAAgB;AACtC,IAAA,IAAI,KAAK,SAAA,KAAc,IAAA,IAAQ,IAAA,CAAK,SAAA,KAAc,OAAO,gBAAA,EAAkB;AACvE,MAAA,OAAO,UAAA;AAAA,IACX;AAAA,EACJ;AAGA,EAAA,MAAM,kBAAA,GAAsB,MAAA,CAAO,eAAA,GAAkB,MAAA,CAAO,WAAA,GAAe,GAAA;AAE3E,EAAA,IAAI,qBAAqB,EAAA,EAAI;AACzB,IAAA,OAAO,UAAA;AAAA,EACX;AAEA,EAAA,OAAO,SAAA;AACX;AAKA,SAAS,oBAAA,CAAqB,QAAwB,QAAA,EAAoC;AACtF,EAAA,MAAM,aAAa,IAAA,CAAK,KAAA,CAAO,OAAO,eAAA,GAAkB,MAAA,CAAO,cAAe,GAAG,CAAA;AAEjF,EAAA,IAAI,aAAa,UAAA,EAAY;AACzB,IAAA,OAAO,CAAA,uBAAA,EAAgB,MAAA,CAAO,eAAe,CAAA,SAAA,EAAY,UAAU,6DACnD,MAAA,CAAO,YAAY,CAAA,EAAA,EAAK,MAAA,CAAO,gBAAgB,CAAA,2EAAA,CAAA;AAAA,EAEnE;AAEA,EAAA,OAAO,mBAAc,MAAA,CAAO,eAAe,YAAY,UAAU,CAAA,gDAAA,EAC3C,OAAO,YAAY,CAAA,iEAAA,CAAA;AAE7C;AAKA,SAAS,oBAAoB,OAAA,EAAiD;AAC1E,EAAA,IAAI,OAAA,CAAQ,UAAA,CAAW,QAAQ,CAAA,EAAG,OAAO,QAAA;AACzC,EAAA,IAAI,OAAA,CAAQ,WAAW,QAAQ,CAAA,IAAK,QAAQ,QAAA,CAAS,gBAAgB,GAAG,OAAO,QAAA;AAC/E,EAAA,IAAI,OAAA,CAAQ,WAAW,QAAQ,CAAA,IAAK,QAAQ,QAAA,CAAS,QAAQ,GAAG,OAAO,QAAA;AACvE,EAAA,OAAO,QAAA;AACX;AAKO,SAAS,qBAAA,CAAsB,UAAkB,KAAA,EAAuB;AAC3E,EAAA,OAAO,CAAA,EAAG,QAAQ,CAAA,CAAA,EAAI,KAAK,CAAA,CAAA;AAC/B;AAnKA,IAAA,oBAAA,GAAA,KAAA,CAAA;AAAA,EAAA,8BAAA,GAAA;AAAA,EAAA;AAAA,CAAA,CAAA;ACMA,IAAM,OAAA,GAAU,IAAIA,iBAAA,EAAQ;AAE5B,OAAA,CACK,KAAK,aAAa,CAAA,CAClB,YAAY,8CAA8C,CAAA,CAC1D,QAAQ,eAAe,CAAA;AAE5B,OAAA,CACK,OAAA,CAAQ,MAAM,CAAA,CACd,WAAA,CAAY,wCAAwC,CAAA,CACpD,MAAA,CAAO,aAAA,EAAe,0BAA0B,CAAA,CAChD,MAAA,CAAO,OAAO,OAAA,KAAY;AACvB,EAAA,OAAA,CAAQ,IAAI,yCAAkC,CAAA;AAE9C,EAAA,IAAI;AAEA,IAAA,MAAM,SAAA,GAAiBC,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,IAAO,QAAQ,CAAA;AACnD,IAAA,MAAM,UAAA,GAAkBA,eAAA,CAAA,IAAA,CAAK,SAAA,EAAW,eAAe,CAAA;AAEvD,IAAA,IAAI,YAAA,GAAe,KAAA;AACnB,IAAA,IAAI;AACA,MAAA,MAASC,qBAAO,UAAU,CAAA;AAC1B,MAAA,YAAA,GAAe,IAAA;AAAA,IACnB,CAAA,CAAA,MAAQ;AACJ,MAAA,YAAA,GAAe,KAAA;AAAA,IACnB;AAEA,IAAA,IAAI,CAAC,YAAA,EAAc;AACf,MAAA,OAAA,CAAQ,IAAI,wDAAmD,CAAA;AAC/D,MAAA,OAAA,CAAQ,IAAI,0CAA0C,CAAA;AACtD,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,MAAM,cAAA,GAAiB,MAASA,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAG5D,IAAA,IAAI,eAAe,QAAA,CAAS,iBAAiB,CAAA,IAAK,CAAC,QAAQ,KAAA,EAAO;AAC9D,MAAA,OAAA,CAAQ,IAAI,2DAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,IAClB;AAGA,IAAA,IAAI,CAAC,cAAA,CAAe,QAAA,CAAS,sBAAsB,CAAA,EAAG;AAClD,MAAA,OAAA,CAAQ,IAAI,wDAA8C,CAAA;AAC1D,MAAA,OAAA,CAAQ,IAAI,+CAA+C,CAAA;AAC3D,MAAA,OAAA,CAAQ,IAAI,uBAAuB,CAAA;AACnC,MAAA,OAAA,CAAQ,IAAI,oCAAoC,CAAA;AAChD,MAAA,OAAA,CAAQ,IAAI,iDAAiD,CAAA;AAC7D,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AACpB,MAAA,OAAA,CAAQ,IAAI,oBAAoB,CAAA;AAChC,MAAA,OAAA,CAAQ,IAAI,8BAA8B,CAAA;AAC1C,MAAA,OAAA,CAAQ,IAAI,gCAAgC,CAAA;AAC5C,MAAA,OAAA,CAAQ,IAAI,4BAA4B,CAAA;AACxC,MAAA,OAAA,CAAQ,IAAI,QAAQ,CAAA;AAAA,IACxB;AAGA,IAAA,MAAM,gBAAA,GAAmB;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,CAAA;AAwIzB,IAAA,IAAI,SAAA,GAAY,cAAA;AAChB,IAAA,IAAI,OAAA,CAAQ,KAAA,IAAS,cAAA,CAAe,QAAA,CAAS,uBAAuB,CAAA,EAAG;AACnE,MAAA,MAAM,WAAA,GAAc,wEAAA;AACpB,MAAA,MAAM,UAAA,GAAa,SAAA,CAAU,OAAA,CAAQ,WAAW,CAAA;AAChD,MAAA,IAAI,eAAe,CAAA,CAAA,EAAI;AACnB,QAAA,SAAA,GAAY,SAAA,CAAU,SAAA,CAAU,CAAA,EAAG,UAAU,EAAE,IAAA,EAAK;AAAA,MACxD;AAAA,IACJ;AAGA,IAAA,SAAA,GAAY,SAAA,CAAU,IAAA,EAAK,GAAI,IAAA,GAAO,gBAAA;AAGtC,IAAA,MAASA,aAAA,CAAA,SAAA,CAAU,YAAY,SAAS,CAAA;AAExC,IAAA,OAAA,CAAQ,IAAI,2DAAsD,CAAA;AAClE,IAAA,OAAA,CAAQ,IAAI,aAAa,CAAA;AACzB,IAAA,OAAA,CAAQ,IAAI,yDAAyD,CAAA;AACrE,IAAA,OAAA,CAAQ,IAAI,4EAA4E,CAAA;AACxF,IAAA,OAAA,CAAQ,IAAI,iCAAiC,CAAA;AAAA,EAEjD,SAAS,KAAA,EAAO;AACZ,IAAA,OAAA,CAAQ,KAAA,CAAM,eAAA,EAAa,KAAA,CAAgB,OAAO,CAAA;AAClD,IAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,EAClB;AACJ,CAAC,CAAA;AAEL,OAAA,CACK,QAAQ,QAAQ,CAAA,CAChB,YAAY,gCAAgC,CAAA,CAC5C,OAAO,YAAY;AAChB,EAAA,OAAA,CAAQ,IAAI,4CAAqC,CAAA;AAGjD,EAAA,MAAM,aAAkBD,eAAA,CAAA,IAAA,CAAK,OAAA,CAAQ,GAAA,EAAI,EAAG,UAAU,eAAe,CAAA;AACrE,EAAA,IAAI;AACA,IAAA,MAAM,MAAA,GAAS,MAASC,aAAA,CAAA,QAAA,CAAS,UAAA,EAAY,OAAO,CAAA;AAEpD,IAAA,OAAA,CAAQ,IAAI,gBAAgB,CAAA;AAC5B,IAAA,OAAA,CAAQ,IAAI,CAAA,4BAAA,CAAyB,CAAA;AACrC,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,iBAAiB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACpF,IAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,MAAA,CAAO,QAAA,CAAS,sBAAsB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,mBAAA,CAAqB,CAAA;AACzF,IAAA,OAAA,CAAQ,GAAA,EAAI;AAAA,EAChB,CAAA,CAAA,MAAQ;AACJ,IAAA,OAAA,CAAQ,IAAI,yCAAoC,CAAA;AAAA,EACpD;AAIA,EAAA,OAAA,CAAQ,IAAI,cAAc,CAAA;AAC1B,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,cAAc,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,aAAA,CAAe,CAAA;AACvE,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,eAAA,CAAiB,CAAA;AAC3E,EAAA,OAAA,CAAQ,GAAA,CAAI,KAAK,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,GAAI,QAAA,GAAM,QAAG,CAAA,0BAAA,CAA4B,CAAA;AACtF,EAAA,OAAA,CAAQ,GAAA,EAAI;AAChB,CAAC,CAAA;AAEL,OAAA,CACK,QAAQ,kBAAkB,CAAA,CAC1B,YAAY,gEAAgE,CAAA,CAC5E,OAAO,YAAY;AAChB,EAAA,OAAA,CAAQ,IAAI,gDAAyC,CAAA;AAErD,EAAA,IAAI;AAEA,IAAA,KAAM,MAAM,OAAA,CAAA,OAAA,EAAA,CAAA,IAAA,CAAA,OAAA,oBAAA,EAAA,EAAA,uBAAA,CAAA,CAAA;AAGZ,IAAA,OAAA,CAAQ,IAAI,qEAA2D,CAAA;AACvE,IAAA,OAAA,CAAQ,IAAI,iEAAiE,CAAA;AAC7E,IAAA,OAAA,CAAQ,IAAI,UAAU,CAAA;AACtB,IAAA,OAAA,CAAQ,IAAI,sEAAsE,CAAA;AAClF,IAAA,OAAA,CAAQ,IAAI,qEAAqE,CAAA;AACjF,IAAA,OAAA,CAAQ,GAAA,EAAI;AAAA,EAChB,SAAS,KAAA,EAAO;AACZ,IAAA,OAAA,CAAQ,KAAA,CAAM,eAAA,EAAa,KAAA,CAAgB,OAAO,CAAA;AAClD,IAAA,OAAA,CAAQ,KAAK,CAAC,CAAA;AAAA,EAClB;AACJ,CAAC,CAAA;AAEL,OAAA,CACK,OAAA,CAAQ,SAAS,CAAA,CACjB,WAAA,CAAY,kDAAkD,CAAA,CAC9D,MAAA,CAAO,4BAAA,EAA8B,sCAAA,EAAwC,GAAG,CAAA,CAChF,OAAO,2BAAA,EAA6B,2BAAA,EAA6B,IAAI,CAAA,CACrE,MAAA,CAAO,0BAA0B,iCAAiC,CAAA,CAClE,MAAA,CAAO,OAAO,OAAA,KAAY;AACvB,EAAA,OAAA,CAAQ,IAAI,+CAAwC,CAAA;AAEpD,EAAA,OAAA,CAAQ,IAAI,UAAU,CAAA;AACtB,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,eAAA,EAAkB,OAAA,CAAQ,WAAW,CAAA,CAAE,CAAA;AACnD,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,cAAA,EAAiB,OAAA,CAAQ,SAAS,CAAA,CAAE,CAAA;AAChD,EAAA,IAAI,QAAQ,UAAA,EAAY;AACpB,IAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,eAAA,EAAkB,OAAA,CAAQ,UAAU,CAAA,CAAE,CAAA;AAAA,EACtD;AACA,EAAA,OAAA,CAAQ,GAAA,EAAI;AAEZ,EAAA,OAAA,CAAQ,IAAI,gFAAsE,CAAA;AAClF,EAAA,OAAA,CAAQ,IAAI,yCAAyC,CAAA;AACrD,EAAA,OAAA,CAAQ,IAAI,UAAU,CAAA;AACtB,EAAA,OAAA,CAAQ,IAAI,+DAA+D,CAAA;AAC3E,EAAA,OAAA,CAAQ,IAAI,oFAAoF,CAAA;AAChG,EAAA,OAAA,CAAQ,IAAI,mDAAmD,CAAA;AAC/D,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,iBAAA,EAAoB,OAAA,CAAQ,WAAW,CAAA,CAAA,CAAG,CAAA;AACtD,EAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,eAAA,EAAkB,OAAA,CAAQ,SAAS,CAAA,CAAA,CAAG,CAAA;AAClD,EAAA,OAAA,CAAQ,IAAI,iEAAiE,CAAA;AAC7E,EAAA,OAAA,CAAQ,IAAI,OAAO,CAAA;AACnB,EAAA,OAAA,CAAQ,GAAA,EAAI;AAChB,CAAC,CAAA;AAEL,OAAA,CAAQ,KAAA,EAAM","file":"cli.cjs","sourcesContent":["/**\r\n * Embedding Utilities\r\n * \r\n * Utilities for embedding model detection, mismatch checking, and metadata management.\r\n */\r\n\r\nimport type { PrismaClientLike } from '../types/config.types.js';\r\nimport type { EmbeddingProvider } from '../types/embedding-provider.types.js';\r\nimport type {\r\n MismatchResult,\r\n MismatchInfo,\r\n MismatchSeverity,\r\n EmbeddingModelStats,\r\n} from '../types/migration.types.js';\r\n\r\n/**\r\n * Detect embedding model mismatch between config and database\r\n * \r\n * @param prisma - Prisma client instance\r\n * @param currentProvider - Currently configured embedding provider\r\n * @returns Mismatch info if there's a problem, null if everything matches\r\n */\r\nexport async function detectEmbeddingMismatch(\r\n prisma: PrismaClientLike,\r\n currentProvider: EmbeddingProvider\r\n): Promise<MismatchInfo | null> {\r\n const result = await checkEmbeddingMismatch(prisma, currentProvider);\r\n\r\n if (!result.hasMismatch) {\r\n return null;\r\n }\r\n\r\n const severity = determineSeverity(result);\r\n const message = buildMismatchMessage(result, severity);\r\n\r\n return {\r\n severity,\r\n message,\r\n details: result,\r\n action: severity === 'critical' ? 'reindex-required' : 'reindex',\r\n };\r\n}\r\n\r\n/**\r\n * Check for embedding model mismatch\r\n */\r\nexport async function checkEmbeddingMismatch(\r\n prisma: PrismaClientLike,\r\n currentProvider: EmbeddingProvider\r\n): Promise<MismatchResult> {\r\n // Get model statistics from database\r\n const stats = await getEmbeddingModelStats(prisma);\r\n\r\n // Get total chunk count\r\n const totalChunks = await prisma.contextRagChunk.count();\r\n\r\n // Calculate chunks that need migration\r\n let chunksToMigrate = 0;\r\n for (const stat of stats) {\r\n // Chunks with different model or null model need migration\r\n if (stat.model !== currentProvider.id) {\r\n chunksToMigrate += stat.count;\r\n }\r\n }\r\n\r\n const hasMismatch = chunksToMigrate > 0;\r\n\r\n return {\r\n hasMismatch,\r\n currentModel: currentProvider.id,\r\n currentProvider: extractProviderType(currentProvider.id),\r\n currentDimension: currentProvider.dimension,\r\n existingModels: stats,\r\n chunksToMigrate,\r\n totalChunks,\r\n };\r\n}\r\n\r\n/**\r\n * Get statistics about embedding models in the database\r\n */\r\nexport async function getEmbeddingModelStats(\r\n prisma: PrismaClientLike\r\n): Promise<EmbeddingModelStats[]> {\r\n // Use raw query for GROUP BY with null handling\r\n const results = await prisma.$queryRaw<Array<{\r\n embedding_model: string | null;\r\n embedding_dimension: number | null;\r\n count: bigint;\r\n }>>`\r\n SELECT \r\n embedding_model,\r\n embedding_dimension,\r\n COUNT(*) as count\r\n FROM context_rag_chunks\r\n GROUP BY embedding_model, embedding_dimension\r\n ORDER BY count DESC\r\n `;\r\n\r\n return results.map(r => ({\r\n model: r.embedding_model,\r\n dimension: r.embedding_dimension,\r\n count: Number(r.count),\r\n }));\r\n}\r\n\r\n/**\r\n * Determine severity of mismatch\r\n */\r\nfunction determineSeverity(result: MismatchResult): MismatchSeverity {\r\n if (!result.hasMismatch) {\r\n return 'none';\r\n }\r\n\r\n // Check if there are dimension mismatches (critical)\r\n for (const stat of result.existingModels) {\r\n if (stat.dimension !== null && stat.dimension !== result.currentDimension) {\r\n return 'critical';\r\n }\r\n }\r\n\r\n // Check percentage of mismatched chunks\r\n const mismatchPercentage = (result.chunksToMigrate / result.totalChunks) * 100;\r\n\r\n if (mismatchPercentage > 50) {\r\n return 'critical';\r\n }\r\n\r\n return 'warning';\r\n}\r\n\r\n/**\r\n * Build user-friendly mismatch message\r\n */\r\nfunction buildMismatchMessage(result: MismatchResult, severity: MismatchSeverity): string {\r\n const percentage = Math.round((result.chunksToMigrate / result.totalChunks) * 100);\r\n\r\n if (severity === 'critical') {\r\n return `⚠️ CRITICAL: ${result.chunksToMigrate} chunks (${percentage}%) were created with different embedding models. ` +\r\n `Current: ${result.currentModel} (${result.currentDimension}d). ` +\r\n `Search results may be inaccurate. Run 'npx context-rag reindex' to fix.`;\r\n }\r\n\r\n return `⚡ Warning: ${result.chunksToMigrate} chunks (${percentage}%) may have outdated embeddings. ` +\r\n `Current model: ${result.currentModel}. ` +\r\n `Consider running 'npx context-rag reindex' for optimal results.`;\r\n}\r\n\r\n/**\r\n * Extract provider type from model ID\r\n */\r\nfunction extractProviderType(modelId: string): 'gemini' | 'openai' | 'cohere' {\r\n if (modelId.startsWith('gemini')) return 'gemini';\r\n if (modelId.startsWith('openai') || modelId.includes('text-embedding')) return 'openai';\r\n if (modelId.startsWith('cohere') || modelId.includes('embed-')) return 'cohere';\r\n return 'gemini'; // default\r\n}\r\n\r\n/**\r\n * Build embedding model identifier from provider and model name\r\n */\r\nexport function buildEmbeddingModelId(provider: string, model: string): string {\r\n return `${provider}-${model}`;\r\n}\r\n","#!/usr/bin/env node\r\n\r\nimport { Command } from 'commander';\r\nimport * as fs from 'fs/promises';\r\nimport * as path from 'path';\r\n\r\nconst program = new Command();\r\n\r\nprogram\r\n .name('context-rag')\r\n .description('Context-RAG CLI - Setup and management tools')\r\n .version('1.0.0-beta.11');\r\n\r\nprogram\r\n .command('init')\r\n .description('Initialize Context-RAG in your project')\r\n .option('-f, --force', 'Overwrite existing files')\r\n .action(async (options) => {\r\n console.log('🚀 Initializing Context-RAG...\\n');\r\n\r\n try {\r\n // Check if prisma directory exists\r\n const prismaDir = path.join(process.cwd(), 'prisma');\r\n const schemaPath = path.join(prismaDir, 'schema.prisma');\r\n\r\n let schemaExists = false;\r\n try {\r\n await fs.access(schemaPath);\r\n schemaExists = true;\r\n } catch {\r\n schemaExists = false;\r\n }\r\n\r\n if (!schemaExists) {\r\n console.log('❌ Prisma schema not found at prisma/schema.prisma');\r\n console.log(' Please run `npx prisma init` first.\\n');\r\n process.exit(1);\r\n }\r\n\r\n // Read existing schema\r\n const existingSchema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n // Check if Context-RAG models already exist\r\n if (existingSchema.includes('ContextRagChunk') && !options.force) {\r\n console.log('⚠️ Context-RAG models already exist in schema.');\r\n console.log(' Use --force to overwrite.\\n');\r\n process.exit(0);\r\n }\r\n\r\n // Check for pgvector extension\r\n if (!existingSchema.includes('postgresqlExtensions')) {\r\n console.log('⚠️ Warning: pgvector extension not enabled.');\r\n console.log(' Add the following to your schema.prisma:\\n');\r\n console.log(' generator client {');\r\n console.log(' provider = \"prisma-client-js\"');\r\n console.log(' previewFeatures = [\"postgresqlExtensions\"]');\r\n console.log(' }\\n');\r\n console.log(' datasource db {');\r\n console.log(' provider = \"postgresql\"');\r\n console.log(' url = env(\"DATABASE_URL\")');\r\n console.log(' extensions = [vector]');\r\n console.log(' }\\n');\r\n }\r\n\r\n // Context-RAG models to append\r\n const contextRagModels = `\r\n// ============================================\r\n// Context-RAG Models\r\n// ============================================\r\n\r\n/// Stores prompt configurations for different document types\r\nmodel ContextRagPromptConfig {\r\n id String @id @default(uuid())\r\n documentType String @map(\"document_type\")\r\n name String\r\n systemPrompt String @map(\"system_prompt\") @db.Text\r\n chunkStrategy Json @map(\"chunk_strategy\")\r\n version Int @default(1)\r\n isActive Boolean @default(true) @map(\"is_active\")\r\n isDefault Boolean @default(false) @map(\"is_default\")\r\n createdBy String? @map(\"created_by\")\r\n changeLog String? @map(\"change_log\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n chunks ContextRagChunk[]\r\n\r\n @@unique([documentType, version])\r\n @@index([documentType, isActive])\r\n @@map(\"context_rag_prompt_configs\")\r\n}\r\n\r\n/// Stores vector chunks for semantic search\r\nmodel ContextRagChunk {\r\n id String @id @default(uuid())\r\n promptConfigId String @map(\"prompt_config_id\")\r\n promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)\r\n documentId String @map(\"document_id\")\r\n chunkIndex Int @map(\"chunk_index\")\r\n chunkType String @map(\"chunk_type\")\r\n\r\n /// Plain text content optimized for vector search\r\n searchContent String @map(\"search_content\") @db.Text\r\n\r\n /// Enriched content: context + searchContent (for RAG enhancement)\r\n enrichedContent String? @map(\"enriched_content\") @db.Text\r\n\r\n /// AI-generated context text only (for debugging)\r\n contextText String? @map(\"context_text\") @db.Text\r\n\r\n /// Vector embedding (768 dimensions for Gemini)\r\n searchVector Unsupported(\"vector(768)\") @map(\"search_vector\")\r\n\r\n /// Rich Markdown content for display\r\n displayContent String @map(\"display_content\") @db.Text\r\n\r\n sourcePageStart Int @map(\"source_page_start\")\r\n sourcePageEnd Int @map(\"source_page_end\")\r\n confidenceScore Float @default(0.5) @map(\"confidence_score\")\r\n metadata Json\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n\r\n @@index([promptConfigId])\r\n @@index([documentId])\r\n @@index([chunkType])\r\n @@index([confidenceScore])\r\n @@map(\"context_rag_chunks\")\r\n}\r\n\r\n/// Tracks document processing state\r\nmodel ContextRagDocument {\r\n id String @id @default(uuid())\r\n filename String\r\n fileHash String @map(\"file_hash\")\r\n fileSize Int @map(\"file_size\")\r\n pageCount Int @map(\"page_count\")\r\n documentType String? @map(\"document_type\")\r\n\r\n /// Experiment identifier for A/B testing models\r\n experimentId String? @map(\"experiment_id\")\r\n\r\n /// AI model used for processing\r\n modelName String? @map(\"model_name\")\r\n\r\n /// Model configuration as JSON\r\n modelConfig Json? @map(\"model_config\")\r\n\r\n status String @default(\"PENDING\")\r\n\r\n promptConfigId String? @map(\"prompt_config_id\")\r\n totalBatches Int @default(0) @map(\"total_batches\")\r\n completedBatches Int @default(0) @map(\"completed_batches\")\r\n failedBatches Int @default(0) @map(\"failed_batches\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n errorMessage String? @map(\"error_message\")\r\n\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n\r\n batches ContextRagBatch[]\r\n\r\n @@unique([fileHash, experimentId])\r\n @@index([status])\r\n @@index([fileHash])\r\n @@index([documentType])\r\n @@index([experimentId])\r\n @@map(\"context_rag_documents\")\r\n}\r\n\r\n/// Tracks individual batch processing jobs\r\nmodel ContextRagBatch {\r\n id String @id @default(uuid())\r\n documentId String @map(\"document_id\")\r\n document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)\r\n\r\n batchIndex Int @map(\"batch_index\")\r\n pageStart Int @map(\"page_start\")\r\n pageEnd Int @map(\"page_end\")\r\n status String @default(\"PENDING\")\r\n retryCount Int @default(0) @map(\"retry_count\")\r\n lastError String? @map(\"last_error\")\r\n\r\n tokenUsage Json? @map(\"token_usage\")\r\n processingMs Int? @map(\"processing_ms\")\r\n\r\n startedAt DateTime? @map(\"started_at\")\r\n completedAt DateTime? @map(\"completed_at\")\r\n createdAt DateTime @default(now()) @map(\"created_at\")\r\n updatedAt DateTime @updatedAt @map(\"updated_at\")\r\n\r\n @@unique([documentId, batchIndex])\r\n @@index([documentId, status])\r\n @@index([status])\r\n @@map(\"context_rag_batches\")\r\n}\r\n`;\r\n\r\n // Remove existing Context-RAG models if force\r\n let newSchema = existingSchema;\r\n if (options.force && existingSchema.includes('// Context-RAG Models')) {\r\n const startMarker = '// ============================================\\n// Context-RAG Models';\r\n const startIndex = newSchema.indexOf(startMarker);\r\n if (startIndex !== -1) {\r\n newSchema = newSchema.substring(0, startIndex).trim();\r\n }\r\n }\r\n\r\n // Append new models\r\n newSchema = newSchema.trim() + '\\n' + contextRagModels;\r\n\r\n // Write updated schema\r\n await fs.writeFile(schemaPath, newSchema);\r\n\r\n console.log('✅ Context-RAG models added to prisma/schema.prisma\\n');\r\n console.log('Next steps:');\r\n console.log(' 1. Run: npx prisma migrate dev --name add_context_rag');\r\n console.log(' 2. Enable pgvector in PostgreSQL: CREATE EXTENSION IF NOT EXISTS vector;');\r\n console.log(' 3. Start using Context-RAG!\\n');\r\n\r\n } catch (error) {\r\n console.error('❌ Error:', (error as Error).message);\r\n process.exit(1);\r\n }\r\n });\r\n\r\nprogram\r\n .command('status')\r\n .description('Check Context-RAG setup status')\r\n .action(async () => {\r\n console.log('🔍 Checking Context-RAG status...\\n');\r\n\r\n // Check schema\r\n const schemaPath = path.join(process.cwd(), 'prisma', 'schema.prisma');\r\n try {\r\n const schema = await fs.readFile(schemaPath, 'utf-8');\r\n\r\n console.log('Prisma Schema:');\r\n console.log(` ✅ schema.prisma found`);\r\n console.log(` ${schema.includes('ContextRagChunk') ? '✅' : '❌'} Context-RAG models`);\r\n console.log(` ${schema.includes('postgresqlExtensions') ? '✅' : '❌'} pgvector extension`);\r\n console.log();\r\n } catch {\r\n console.log('❌ prisma/schema.prisma not found\\n');\r\n }\r\n\r\n // Check env - note: these are checked at runtime, not via centralized env\r\n // since CLI may run before env is fully configured\r\n console.log('Environment:');\r\n console.log(` ${process.env['DATABASE_URL'] ? '✅' : '❌'} DATABASE_URL`);\r\n console.log(` ${process.env['GEMINI_API_KEY'] ? '✅' : '❌'} GEMINI_API_KEY`);\r\n console.log(` ${process.env['COHERE_API_KEY'] ? '✅' : '⚪'} COHERE_API_KEY (optional)`);\r\n console.log();\r\n });\r\n\r\nprogram\r\n .command('check-embeddings')\r\n .description('Check for embedding model mismatch between config and database')\r\n .action(async () => {\r\n console.log('🔍 Checking embedding model status...\\n');\r\n\r\n try {\r\n // Dynamic import to verify module exists (void to suppress unused warning)\r\n void (await import('../utils/embedding-utils.js'));\r\n\r\n // We can't fully check without a configured client, so just show stats\r\n console.log('⚠️ Full mismatch detection requires database connection.');\r\n console.log(' Use this command programmatically with your Prisma client.\\n');\r\n console.log('Example:');\r\n console.log(' import { detectEmbeddingMismatch } from \"@msbayindir/context-rag\";');\r\n console.log(' const mismatch = await detectEmbeddingMismatch(prisma, provider);');\r\n console.log();\r\n } catch (error) {\r\n console.error('❌ Error:', (error as Error).message);\r\n process.exit(1);\r\n }\r\n });\r\n\r\nprogram\r\n .command('reindex')\r\n .description('Re-index all chunks with current embedding model')\r\n .option('-c, --concurrency <number>', 'Number of concurrent embedding calls', '5')\r\n .option('-b, --batch-size <number>', 'Batch size for processing', '50')\r\n .option('-d, --document-id <id>', 'Re-index specific document only')\r\n .action(async (options) => {\r\n console.log('🔄 Starting re-indexing operation...\\n');\r\n\r\n console.log('Options:');\r\n console.log(` Concurrency: ${options.concurrency}`);\r\n console.log(` Batch size: ${options.batchSize}`);\r\n if (options.documentId) {\r\n console.log(` Document ID: ${options.documentId}`);\r\n }\r\n console.log();\r\n\r\n console.log('⚠️ Re-indexing requires database connection and embedding provider.');\r\n console.log(' Use this command programmatically:\\n');\r\n console.log('Example:');\r\n console.log(' import { MigrationService } from \"@msbayindir/context-rag\";');\r\n console.log(' const migrationService = new MigrationService(prisma, provider, config, logger);');\r\n console.log(' const result = await migrationService.reindex({');\r\n console.log(` concurrency: ${options.concurrency},`);\r\n console.log(` batchSize: ${options.batchSize},`);\r\n console.log(' onProgress: (p) => console.log(`${p.processed}/${p.total}`)');\r\n console.log(' });');\r\n console.log();\r\n });\r\n\r\nprogram.parse();\r\n"]}
|
package/dist/bin/cli.js
CHANGED
|
@@ -3,8 +3,111 @@ import { Command } from 'commander';
|
|
|
3
3
|
import * as fs from 'fs/promises';
|
|
4
4
|
import * as path from 'path';
|
|
5
5
|
|
|
6
|
+
var __defProp = Object.defineProperty;
|
|
7
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
8
|
+
var __esm = (fn, res) => function __init() {
|
|
9
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
10
|
+
};
|
|
11
|
+
var __export = (target, all) => {
|
|
12
|
+
for (var name in all)
|
|
13
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
// src/utils/embedding-utils.ts
|
|
17
|
+
var embedding_utils_exports = {};
|
|
18
|
+
__export(embedding_utils_exports, {
|
|
19
|
+
buildEmbeddingModelId: () => buildEmbeddingModelId,
|
|
20
|
+
checkEmbeddingMismatch: () => checkEmbeddingMismatch,
|
|
21
|
+
detectEmbeddingMismatch: () => detectEmbeddingMismatch,
|
|
22
|
+
getEmbeddingModelStats: () => getEmbeddingModelStats
|
|
23
|
+
});
|
|
24
|
+
async function detectEmbeddingMismatch(prisma, currentProvider) {
|
|
25
|
+
const result = await checkEmbeddingMismatch(prisma, currentProvider);
|
|
26
|
+
if (!result.hasMismatch) {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
const severity = determineSeverity(result);
|
|
30
|
+
const message = buildMismatchMessage(result, severity);
|
|
31
|
+
return {
|
|
32
|
+
severity,
|
|
33
|
+
message,
|
|
34
|
+
details: result,
|
|
35
|
+
action: severity === "critical" ? "reindex-required" : "reindex"
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
async function checkEmbeddingMismatch(prisma, currentProvider) {
|
|
39
|
+
const stats = await getEmbeddingModelStats(prisma);
|
|
40
|
+
const totalChunks = await prisma.contextRagChunk.count();
|
|
41
|
+
let chunksToMigrate = 0;
|
|
42
|
+
for (const stat of stats) {
|
|
43
|
+
if (stat.model !== currentProvider.id) {
|
|
44
|
+
chunksToMigrate += stat.count;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
const hasMismatch = chunksToMigrate > 0;
|
|
48
|
+
return {
|
|
49
|
+
hasMismatch,
|
|
50
|
+
currentModel: currentProvider.id,
|
|
51
|
+
currentProvider: extractProviderType(currentProvider.id),
|
|
52
|
+
currentDimension: currentProvider.dimension,
|
|
53
|
+
existingModels: stats,
|
|
54
|
+
chunksToMigrate,
|
|
55
|
+
totalChunks
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
async function getEmbeddingModelStats(prisma) {
|
|
59
|
+
const results = await prisma.$queryRaw`
|
|
60
|
+
SELECT
|
|
61
|
+
embedding_model,
|
|
62
|
+
embedding_dimension,
|
|
63
|
+
COUNT(*) as count
|
|
64
|
+
FROM context_rag_chunks
|
|
65
|
+
GROUP BY embedding_model, embedding_dimension
|
|
66
|
+
ORDER BY count DESC
|
|
67
|
+
`;
|
|
68
|
+
return results.map((r) => ({
|
|
69
|
+
model: r.embedding_model,
|
|
70
|
+
dimension: r.embedding_dimension,
|
|
71
|
+
count: Number(r.count)
|
|
72
|
+
}));
|
|
73
|
+
}
|
|
74
|
+
function determineSeverity(result) {
|
|
75
|
+
if (!result.hasMismatch) {
|
|
76
|
+
return "none";
|
|
77
|
+
}
|
|
78
|
+
for (const stat of result.existingModels) {
|
|
79
|
+
if (stat.dimension !== null && stat.dimension !== result.currentDimension) {
|
|
80
|
+
return "critical";
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
const mismatchPercentage = result.chunksToMigrate / result.totalChunks * 100;
|
|
84
|
+
if (mismatchPercentage > 50) {
|
|
85
|
+
return "critical";
|
|
86
|
+
}
|
|
87
|
+
return "warning";
|
|
88
|
+
}
|
|
89
|
+
function buildMismatchMessage(result, severity) {
|
|
90
|
+
const percentage = Math.round(result.chunksToMigrate / result.totalChunks * 100);
|
|
91
|
+
if (severity === "critical") {
|
|
92
|
+
return `\u26A0\uFE0F CRITICAL: ${result.chunksToMigrate} chunks (${percentage}%) were created with different embedding models. Current: ${result.currentModel} (${result.currentDimension}d). Search results may be inaccurate. Run 'npx context-rag reindex' to fix.`;
|
|
93
|
+
}
|
|
94
|
+
return `\u26A1 Warning: ${result.chunksToMigrate} chunks (${percentage}%) may have outdated embeddings. Current model: ${result.currentModel}. Consider running 'npx context-rag reindex' for optimal results.`;
|
|
95
|
+
}
|
|
96
|
+
function extractProviderType(modelId) {
|
|
97
|
+
if (modelId.startsWith("gemini")) return "gemini";
|
|
98
|
+
if (modelId.startsWith("openai") || modelId.includes("text-embedding")) return "openai";
|
|
99
|
+
if (modelId.startsWith("cohere") || modelId.includes("embed-")) return "cohere";
|
|
100
|
+
return "gemini";
|
|
101
|
+
}
|
|
102
|
+
function buildEmbeddingModelId(provider, model) {
|
|
103
|
+
return `${provider}-${model}`;
|
|
104
|
+
}
|
|
105
|
+
var init_embedding_utils = __esm({
|
|
106
|
+
"src/utils/embedding-utils.ts"() {
|
|
107
|
+
}
|
|
108
|
+
});
|
|
6
109
|
var program = new Command();
|
|
7
|
-
program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.
|
|
110
|
+
program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("1.0.0-beta.11");
|
|
8
111
|
program.command("init").description("Initialize Context-RAG in your project").option("-f, --force", "Overwrite existing files").action(async (options) => {
|
|
9
112
|
console.log("\u{1F680} Initializing Context-RAG...\n");
|
|
10
113
|
try {
|
|
@@ -214,6 +317,42 @@ program.command("status").description("Check Context-RAG setup status").action(a
|
|
|
214
317
|
console.log(` ${process.env["COHERE_API_KEY"] ? "\u2705" : "\u26AA"} COHERE_API_KEY (optional)`);
|
|
215
318
|
console.log();
|
|
216
319
|
});
|
|
320
|
+
program.command("check-embeddings").description("Check for embedding model mismatch between config and database").action(async () => {
|
|
321
|
+
console.log("\u{1F50D} Checking embedding model status...\n");
|
|
322
|
+
try {
|
|
323
|
+
void await Promise.resolve().then(() => (init_embedding_utils(), embedding_utils_exports));
|
|
324
|
+
console.log("\u26A0\uFE0F Full mismatch detection requires database connection.");
|
|
325
|
+
console.log(" Use this command programmatically with your Prisma client.\n");
|
|
326
|
+
console.log("Example:");
|
|
327
|
+
console.log(' import { detectEmbeddingMismatch } from "@msbayindir/context-rag";');
|
|
328
|
+
console.log(" const mismatch = await detectEmbeddingMismatch(prisma, provider);");
|
|
329
|
+
console.log();
|
|
330
|
+
} catch (error) {
|
|
331
|
+
console.error("\u274C Error:", error.message);
|
|
332
|
+
process.exit(1);
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
program.command("reindex").description("Re-index all chunks with current embedding model").option("-c, --concurrency <number>", "Number of concurrent embedding calls", "5").option("-b, --batch-size <number>", "Batch size for processing", "50").option("-d, --document-id <id>", "Re-index specific document only").action(async (options) => {
|
|
336
|
+
console.log("\u{1F504} Starting re-indexing operation...\n");
|
|
337
|
+
console.log("Options:");
|
|
338
|
+
console.log(` Concurrency: ${options.concurrency}`);
|
|
339
|
+
console.log(` Batch size: ${options.batchSize}`);
|
|
340
|
+
if (options.documentId) {
|
|
341
|
+
console.log(` Document ID: ${options.documentId}`);
|
|
342
|
+
}
|
|
343
|
+
console.log();
|
|
344
|
+
console.log("\u26A0\uFE0F Re-indexing requires database connection and embedding provider.");
|
|
345
|
+
console.log(" Use this command programmatically:\n");
|
|
346
|
+
console.log("Example:");
|
|
347
|
+
console.log(' import { MigrationService } from "@msbayindir/context-rag";');
|
|
348
|
+
console.log(" const migrationService = new MigrationService(prisma, provider, config, logger);");
|
|
349
|
+
console.log(" const result = await migrationService.reindex({");
|
|
350
|
+
console.log(` concurrency: ${options.concurrency},`);
|
|
351
|
+
console.log(` batchSize: ${options.batchSize},`);
|
|
352
|
+
console.log(" onProgress: (p) => console.log(`${p.processed}/${p.total}`)");
|
|
353
|
+
console.log(" });");
|
|
354
|
+
console.log();
|
|
355
|
+
});
|
|
217
356
|
program.parse();
|
|
218
357
|
//# sourceMappingURL=cli.js.map
|
|
219
358
|
//# sourceMappingURL=cli.js.map
|