@msbayindir/context-rag 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +464 -0
- package/dist/bin/cli.cjs +210 -0
- package/dist/bin/cli.cjs.map +1 -0
- package/dist/bin/cli.d.cts +1 -0
- package/dist/bin/cli.d.ts +1 -0
- package/dist/bin/cli.js +187 -0
- package/dist/bin/cli.js.map +1 -0
- package/dist/index.cjs +2877 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +812 -0
- package/dist/index.d.ts +812 -0
- package/dist/index.js +2842 -0
- package/dist/index.js.map +1 -0
- package/package.json +91 -0
- package/prisma/schema.prisma +225 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Muhammed Bayindir
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
# ๐ง Context-RAG
|
|
2
|
+
|
|
3
|
+
**A powerful, multimodal RAG engine with Anthropic-style Contextual Retrieval, Gemini Files API integration, and PostgreSQL-native vector search.**
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/context-rag)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
[](https://www.typescriptlang.org/)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## โจ Key Features
|
|
12
|
+
|
|
13
|
+
| Feature | Description |
|
|
14
|
+
|---------|-------------|
|
|
15
|
+
| ๐ **Gemini Files API** | Upload PDF once, use cached URI for entire pipeline (90%+ bandwidth savings) |
|
|
16
|
+
| ๐ง **Contextual Retrieval** | Anthropic-style context generation for each chunk (improves recall by ~49%) |
|
|
17
|
+
| ๐ **Discovery Agent** | AI automatically analyzes documents and suggests optimal chunking strategies |
|
|
18
|
+
| ๐ **Multimodal Processing** | Uses Gemini Vision API to understand tables, charts, and layouts |
|
|
19
|
+
| ๐งช **Experiment System** | A/B test different models on same document for comparison |
|
|
20
|
+
| ๐ฏ **Hybrid Search** | Semantic (vector) + Keyword (full-text) search combination |
|
|
21
|
+
| ๐ **PostgreSQL Native** | No external vector DB needed, uses pgvector |
|
|
22
|
+
| โก **Batch Processing** | Concurrent processing with automatic retry |
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## ๐ฆ Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npm install context-rag
|
|
30
|
+
# or
|
|
31
|
+
pnpm add context-rag
|
|
32
|
+
# or
|
|
33
|
+
yarn add context-rag
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## ๐ ๏ธ Prerequisites
|
|
39
|
+
|
|
40
|
+
### 1. PostgreSQL with pgvector Extension
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Ubuntu/Debian
|
|
44
|
+
sudo apt install postgresql-15-pgvector
|
|
45
|
+
|
|
46
|
+
# macOS (Homebrew)
|
|
47
|
+
brew install pgvector
|
|
48
|
+
|
|
49
|
+
# Docker
|
|
50
|
+
docker run -e POSTGRES_PASSWORD=password -p 5432:5432 pgvector/pgvector:pg15
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Then enable the extension:
|
|
54
|
+
|
|
55
|
+
```sql
|
|
56
|
+
CREATE EXTENSION IF NOT EXISTS vector;
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### 2. Prisma Schema Setup
|
|
60
|
+
|
|
61
|
+
Add Context-RAG models to your `prisma/schema.prisma`:
|
|
62
|
+
|
|
63
|
+
```prisma
|
|
64
|
+
// Required: pgvector extension
|
|
65
|
+
generator client {
|
|
66
|
+
provider = "prisma-client-js"
|
|
67
|
+
previewFeatures = ["postgresqlExtensions"]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
datasource db {
|
|
71
|
+
provider = "postgresql"
|
|
72
|
+
url = env("DATABASE_URL")
|
|
73
|
+
extensions = [vector]
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Context-RAG Models (copy these to your schema)
|
|
77
|
+
model ContextRagPromptConfig {
|
|
78
|
+
id String @id @default(uuid())
|
|
79
|
+
documentType String @map("document_type")
|
|
80
|
+
name String
|
|
81
|
+
systemPrompt String @map("system_prompt") @db.Text
|
|
82
|
+
userPromptTemplate String? @map("user_prompt_template") @db.Text
|
|
83
|
+
chunkStrategy Json @map("chunk_strategy")
|
|
84
|
+
version Int @default(1)
|
|
85
|
+
isDefault Boolean @default(false) @map("is_default")
|
|
86
|
+
isActive Boolean @default(true) @map("is_active")
|
|
87
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
88
|
+
updatedAt DateTime @updatedAt @map("updated_at")
|
|
89
|
+
chunks ContextRagChunk[]
|
|
90
|
+
@@unique([documentType, version])
|
|
91
|
+
@@map("context_rag_prompt_configs")
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
model ContextRagChunk {
|
|
95
|
+
id String @id @default(uuid())
|
|
96
|
+
promptConfigId String @map("prompt_config_id")
|
|
97
|
+
promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)
|
|
98
|
+
documentId String @map("document_id")
|
|
99
|
+
chunkIndex Int @map("chunk_index")
|
|
100
|
+
chunkType String @map("chunk_type")
|
|
101
|
+
searchContent String @map("search_content") @db.Text
|
|
102
|
+
enrichedContent String? @map("enriched_content") @db.Text // Context + searchContent
|
|
103
|
+
contextText String? @map("context_text") @db.Text // Generated context only
|
|
104
|
+
searchVector Unsupported("vector(768)") @map("search_vector")
|
|
105
|
+
displayContent String @map("display_content") @db.Text
|
|
106
|
+
sourcePageStart Int @map("source_page_start")
|
|
107
|
+
sourcePageEnd Int @map("source_page_end")
|
|
108
|
+
confidenceScore Float @map("confidence_score")
|
|
109
|
+
metadata Json?
|
|
110
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
111
|
+
@@index([documentId])
|
|
112
|
+
@@index([chunkType])
|
|
113
|
+
@@map("context_rag_chunks")
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
model ContextRagDocument {
|
|
117
|
+
id String @id @default(uuid())
|
|
118
|
+
filename String
|
|
119
|
+
fileHash String @map("file_hash")
|
|
120
|
+
fileSize Int @map("file_size")
|
|
121
|
+
pageCount Int @map("page_count")
|
|
122
|
+
documentType String? @map("document_type")
|
|
123
|
+
promptConfigId String? @map("prompt_config_id")
|
|
124
|
+
experimentId String? @map("experiment_id")
|
|
125
|
+
modelName String? @map("model_name")
|
|
126
|
+
modelConfig Json? @map("model_config")
|
|
127
|
+
status String @default("PENDING")
|
|
128
|
+
completedBatches Int @default(0) @map("completed_batches")
|
|
129
|
+
failedBatches Int @default(0) @map("failed_batches")
|
|
130
|
+
totalBatches Int @default(0) @map("total_batches")
|
|
131
|
+
tokenUsageInput Int? @map("token_usage_input")
|
|
132
|
+
tokenUsageOutput Int? @map("token_usage_output")
|
|
133
|
+
tokenUsageTotal Int? @map("token_usage_total")
|
|
134
|
+
processingMs Int? @map("processing_ms")
|
|
135
|
+
error String? @db.Text
|
|
136
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
137
|
+
updatedAt DateTime @updatedAt @map("updated_at")
|
|
138
|
+
batches ContextRagBatch[]
|
|
139
|
+
@@unique([fileHash, experimentId])
|
|
140
|
+
@@map("context_rag_documents")
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
model ContextRagBatch {
|
|
144
|
+
id String @id @default(uuid())
|
|
145
|
+
documentId String @map("document_id")
|
|
146
|
+
document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)
|
|
147
|
+
batchIndex Int @map("batch_index")
|
|
148
|
+
pageStart Int @map("page_start")
|
|
149
|
+
pageEnd Int @map("page_end")
|
|
150
|
+
status String @default("PENDING")
|
|
151
|
+
tokenUsageInput Int? @map("token_usage_input")
|
|
152
|
+
tokenUsageOutput Int? @map("token_usage_output")
|
|
153
|
+
tokenUsageTotal Int? @map("token_usage_total")
|
|
154
|
+
processingMs Int? @map("processing_ms")
|
|
155
|
+
error String? @db.Text
|
|
156
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
157
|
+
updatedAt DateTime @updatedAt @map("updated_at")
|
|
158
|
+
@@unique([documentId, batchIndex])
|
|
159
|
+
@@map("context_rag_batches")
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Then run migrations:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
npx prisma migrate dev --name add-context-rag
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### 3. Environment Variables
|
|
170
|
+
|
|
171
|
+
```env
|
|
172
|
+
DATABASE_URL="postgresql://user:password@localhost:5432/mydb"
|
|
173
|
+
GEMINI_API_KEY="your-gemini-api-key"
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## ๐ Quick Start
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
import { ContextRAG } from 'context-rag';
|
|
182
|
+
import { PrismaClient } from '@prisma/client';
|
|
183
|
+
|
|
184
|
+
const prisma = new PrismaClient();
|
|
185
|
+
|
|
186
|
+
const rag = new ContextRAG({
|
|
187
|
+
prisma,
|
|
188
|
+
geminiApiKey: process.env.GEMINI_API_KEY!,
|
|
189
|
+
model: 'gemini-3-flash-preview',
|
|
190
|
+
|
|
191
|
+
// NEW: Contextual Retrieval Enhancement
|
|
192
|
+
ragEnhancement: {
|
|
193
|
+
approach: 'anthropic_contextual',
|
|
194
|
+
strategy: 'simple', // 'none' | 'simple' | 'llm'
|
|
195
|
+
},
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
// ๐ Discover optimal strategy
|
|
199
|
+
const strategy = await rag.discover({ file: './document.pdf' });
|
|
200
|
+
console.log(`Detected: ${strategy.documentType}`);
|
|
201
|
+
|
|
202
|
+
// โ
Approve and create config
|
|
203
|
+
await rag.approveStrategy(strategy.id);
|
|
204
|
+
|
|
205
|
+
// ๐ฅ Ingest document
|
|
206
|
+
const result = await rag.ingest({
|
|
207
|
+
file: './document.pdf',
|
|
208
|
+
onProgress: (status) => console.log(`Batch ${status.current}/${status.total}`),
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
// ๐ Search
|
|
212
|
+
const results = await rag.search({
|
|
213
|
+
query: 'What are the key findings?',
|
|
214
|
+
mode: 'hybrid',
|
|
215
|
+
limit: 10,
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
results.forEach((r) => {
|
|
219
|
+
console.log(`[${r.score.toFixed(2)}] ${r.chunk.displayContent.slice(0, 100)}...`);
|
|
220
|
+
});
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## ๐ง Contextual Retrieval
|
|
226
|
+
|
|
227
|
+
Context-RAG implements [Anthropic's Contextual Retrieval](https://www.anthropic.com/news/contextual-retrieval) approach using Gemini Files API.
|
|
228
|
+
|
|
229
|
+
### The Problem
|
|
230
|
+
|
|
231
|
+
A chunk like `"Value: 50 mg/dL"` alone has no context. Searching for "Cyanide test" won't find it.
|
|
232
|
+
|
|
233
|
+
### The Solution
|
|
234
|
+
|
|
235
|
+
Each chunk gets contextual information prepended:
|
|
236
|
+
|
|
237
|
+
```
|
|
238
|
+
"This chunk is from the Biochemistry Test Results table, showing
|
|
239
|
+
the Cyanide test value for patient Ahmet Yฤฑlmaz. Value: 50 mg/dL"
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### Configuration
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
const rag = new ContextRAG({
|
|
246
|
+
// ...
|
|
247
|
+
ragEnhancement: {
|
|
248
|
+
approach: 'anthropic_contextual',
|
|
249
|
+
strategy: 'llm', // Best quality, uses Gemini
|
|
250
|
+
skipChunkTypes: ['HEADING', 'IMAGE_REF'],
|
|
251
|
+
concurrencyLimit: 5,
|
|
252
|
+
},
|
|
253
|
+
});
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
| Strategy | Cost | Quality Improvement |
|
|
257
|
+
|----------|------|---------------------|
|
|
258
|
+
| `none` | $0 | Baseline |
|
|
259
|
+
| `simple` | $0 | +20% (template-based) |
|
|
260
|
+
| `llm` | ~$0.005/chunk | +49% (Gemini-generated) |
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## โ๏ธ Configuration
|
|
265
|
+
|
|
266
|
+
```typescript
|
|
267
|
+
const rag = new ContextRAG({
|
|
268
|
+
// Required
|
|
269
|
+
prisma: prismaClient,
|
|
270
|
+
geminiApiKey: 'your-api-key',
|
|
271
|
+
|
|
272
|
+
// Model selection
|
|
273
|
+
model: 'gemini-3-flash-preview',
|
|
274
|
+
embeddingModel: 'gemini-embedding-exp-03-07',
|
|
275
|
+
|
|
276
|
+
// Generation
|
|
277
|
+
generationConfig: {
|
|
278
|
+
temperature: 0.2,
|
|
279
|
+
maxOutputTokens: 16384,
|
|
280
|
+
},
|
|
281
|
+
|
|
282
|
+
// Batch processing
|
|
283
|
+
batchConfig: {
|
|
284
|
+
pagesPerBatch: 15,
|
|
285
|
+
maxConcurrency: 3,
|
|
286
|
+
maxRetries: 3,
|
|
287
|
+
},
|
|
288
|
+
|
|
289
|
+
// RAG Enhancement
|
|
290
|
+
ragEnhancement: {
|
|
291
|
+
approach: 'anthropic_contextual',
|
|
292
|
+
strategy: 'simple',
|
|
293
|
+
skipChunkTypes: ['HEADING'],
|
|
294
|
+
},
|
|
295
|
+
});
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## ๐ API Reference
|
|
301
|
+
|
|
302
|
+
### Discovery
|
|
303
|
+
|
|
304
|
+
```typescript
|
|
305
|
+
const strategy = await rag.discover({
|
|
306
|
+
file: pdfBuffer,
|
|
307
|
+
documentTypeHint: 'Medical',
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
await rag.approveStrategy(strategy.id);
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
### Ingestion
|
|
314
|
+
|
|
315
|
+
```typescript
|
|
316
|
+
const result = await rag.ingest({
|
|
317
|
+
file: pdfBuffer,
|
|
318
|
+
filename: 'report.pdf',
|
|
319
|
+
documentType: 'Medical',
|
|
320
|
+
experimentId: 'exp_v1', // For A/B testing
|
|
321
|
+
skipExisting: true,
|
|
322
|
+
onProgress: (status) => console.log(status),
|
|
323
|
+
});
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Search
|
|
327
|
+
|
|
328
|
+
```typescript
|
|
329
|
+
const results = await rag.search({
|
|
330
|
+
query: 'medication interactions',
|
|
331
|
+
mode: 'hybrid',
|
|
332
|
+
limit: 20,
|
|
333
|
+
minScore: 0.5,
|
|
334
|
+
filters: {
|
|
335
|
+
documentTypes: ['Medical'],
|
|
336
|
+
chunkTypes: ['TABLE', 'TEXT'],
|
|
337
|
+
},
|
|
338
|
+
typeBoost: {
|
|
339
|
+
TABLE: 1.5,
|
|
340
|
+
},
|
|
341
|
+
});
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
---
|
|
345
|
+
|
|
346
|
+
## ๐ค Publishing to npm
|
|
347
|
+
|
|
348
|
+
If you want to publish your own fork:
|
|
349
|
+
|
|
350
|
+
```bash
|
|
351
|
+
# 1. Login to npm
|
|
352
|
+
npm login
|
|
353
|
+
|
|
354
|
+
# 2. Build the package
|
|
355
|
+
pnpm build
|
|
356
|
+
|
|
357
|
+
# 3. Publish (first time)
|
|
358
|
+
npm publish --access public
|
|
359
|
+
|
|
360
|
+
# 4. Publish update
|
|
361
|
+
npm version patch # or minor/major
|
|
362
|
+
npm publish
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
---
|
|
366
|
+
|
|
367
|
+
## ๐งช Development
|
|
368
|
+
|
|
369
|
+
```bash
|
|
370
|
+
# Install dependencies
|
|
371
|
+
pnpm install
|
|
372
|
+
|
|
373
|
+
# Build
|
|
374
|
+
pnpm build
|
|
375
|
+
|
|
376
|
+
# Lint
|
|
377
|
+
pnpm lint
|
|
378
|
+
|
|
379
|
+
# Type check
|
|
380
|
+
pnpm typecheck
|
|
381
|
+
|
|
382
|
+
# Run demo
|
|
383
|
+
pnpm demo
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
---
|
|
387
|
+
|
|
388
|
+
## ๐ค Contributing
|
|
389
|
+
|
|
390
|
+
Contributions are welcome! Here's how to get started:
|
|
391
|
+
|
|
392
|
+
### Getting Started
|
|
393
|
+
|
|
394
|
+
1. **Fork** the repository
|
|
395
|
+
2. **Clone** your fork: `git clone https://github.com/YOUR_USERNAME/ContextRAG.git`
|
|
396
|
+
3. **Install** dependencies: `pnpm install`
|
|
397
|
+
4. **Create** a branch: `git checkout -b feature/amazing-feature`
|
|
398
|
+
|
|
399
|
+
### Making Changes
|
|
400
|
+
|
|
401
|
+
1. Make your changes
|
|
402
|
+
2. Run linting: `pnpm lint`
|
|
403
|
+
3. Run build: `pnpm build`
|
|
404
|
+
4. Test your changes locally
|
|
405
|
+
|
|
406
|
+
### Submitting a PR
|
|
407
|
+
|
|
408
|
+
1. **Commit** your changes: `git commit -m 'feat: add amazing feature'`
|
|
409
|
+
2. **Push** to your fork: `git push origin feature/amazing-feature`
|
|
410
|
+
3. Open a **Pull Request**
|
|
411
|
+
|
|
412
|
+
### Commit Convention
|
|
413
|
+
|
|
414
|
+
We use [Conventional Commits](https://www.conventionalcommits.org/):
|
|
415
|
+
|
|
416
|
+
- `feat:` New feature
|
|
417
|
+
- `fix:` Bug fix
|
|
418
|
+
- `docs:` Documentation only
|
|
419
|
+
- `refactor:` Code change that neither fixes nor adds
|
|
420
|
+
- `test:` Adding tests
|
|
421
|
+
- `chore:` Build process or auxiliary tool changes
|
|
422
|
+
|
|
423
|
+
### Code Style
|
|
424
|
+
|
|
425
|
+
- TypeScript strict mode
|
|
426
|
+
- ESLint + Prettier
|
|
427
|
+
- Meaningful variable/function names
|
|
428
|
+
- JSDoc comments for public APIs
|
|
429
|
+
|
|
430
|
+
---
|
|
431
|
+
|
|
432
|
+
## ๐ Project Structure
|
|
433
|
+
|
|
434
|
+
```
|
|
435
|
+
context-rag/
|
|
436
|
+
โโโ src/
|
|
437
|
+
โ โโโ context-rag.ts # Main facade class
|
|
438
|
+
โ โโโ engines/ # Discovery, Ingestion, Retrieval
|
|
439
|
+
โ โโโ enhancements/ # RAG Enhancement handlers
|
|
440
|
+
โ โ โโโ anthropic/ # Anthropic Contextual Retrieval
|
|
441
|
+
โ โโโ services/ # Gemini API, PDF Processor
|
|
442
|
+
โ โโโ database/ # Prisma repositories
|
|
443
|
+
โ โโโ config/ # Templates
|
|
444
|
+
โ โโโ types/ # TypeScript types
|
|
445
|
+
โ โโโ utils/ # Logger, Retry, RateLimiter
|
|
446
|
+
โ โโโ errors/ # Custom error classes
|
|
447
|
+
โโโ examples/ # Demo scripts
|
|
448
|
+
โโโ prisma/ # Reference schema
|
|
449
|
+
โโโ dist/ # Built output
|
|
450
|
+
```
|
|
451
|
+
|
|
452
|
+
---
|
|
453
|
+
|
|
454
|
+
## ๐ License
|
|
455
|
+
|
|
456
|
+
MIT ยฉ [Muhammed Bayindir](https://github.com/msbayindir)
|
|
457
|
+
|
|
458
|
+
---
|
|
459
|
+
|
|
460
|
+
## ๐ Acknowledgments
|
|
461
|
+
|
|
462
|
+
- [Anthropic](https://www.anthropic.com/) for the Contextual Retrieval research
|
|
463
|
+
- [Google](https://ai.google.dev/) for Gemini API and Files API
|
|
464
|
+
- [pgvector](https://github.com/pgvector/pgvector) for PostgreSQL vector support
|
package/dist/bin/cli.cjs
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
var commander = require('commander');
|
|
5
|
+
var fs = require('fs/promises');
|
|
6
|
+
var path = require('path');
|
|
7
|
+
|
|
8
|
+
function _interopNamespace(e) {
|
|
9
|
+
if (e && e.__esModule) return e;
|
|
10
|
+
var n = Object.create(null);
|
|
11
|
+
if (e) {
|
|
12
|
+
Object.keys(e).forEach(function (k) {
|
|
13
|
+
if (k !== 'default') {
|
|
14
|
+
var d = Object.getOwnPropertyDescriptor(e, k);
|
|
15
|
+
Object.defineProperty(n, k, d.get ? d : {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
get: function () { return e[k]; }
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
n.default = e;
|
|
23
|
+
return Object.freeze(n);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
var fs__namespace = /*#__PURE__*/_interopNamespace(fs);
|
|
27
|
+
var path__namespace = /*#__PURE__*/_interopNamespace(path);
|
|
28
|
+
|
|
29
|
+
var program = new commander.Command();
|
|
30
|
+
program.name("context-rag").description("Context-RAG CLI - Setup and management tools").version("0.1.0");
|
|
31
|
+
program.command("init").description("Initialize Context-RAG in your project").option("-f, --force", "Overwrite existing files").action(async (options) => {
|
|
32
|
+
console.log("\u{1F680} Initializing Context-RAG...\n");
|
|
33
|
+
try {
|
|
34
|
+
const prismaDir = path__namespace.join(process.cwd(), "prisma");
|
|
35
|
+
const schemaPath = path__namespace.join(prismaDir, "schema.prisma");
|
|
36
|
+
let schemaExists = false;
|
|
37
|
+
try {
|
|
38
|
+
await fs__namespace.access(schemaPath);
|
|
39
|
+
schemaExists = true;
|
|
40
|
+
} catch {
|
|
41
|
+
schemaExists = false;
|
|
42
|
+
}
|
|
43
|
+
if (!schemaExists) {
|
|
44
|
+
console.log("\u274C Prisma schema not found at prisma/schema.prisma");
|
|
45
|
+
console.log(" Please run `npx prisma init` first.\n");
|
|
46
|
+
process.exit(1);
|
|
47
|
+
}
|
|
48
|
+
const existingSchema = await fs__namespace.readFile(schemaPath, "utf-8");
|
|
49
|
+
if (existingSchema.includes("ContextRagChunk") && !options.force) {
|
|
50
|
+
console.log("\u26A0\uFE0F Context-RAG models already exist in schema.");
|
|
51
|
+
console.log(" Use --force to overwrite.\n");
|
|
52
|
+
process.exit(0);
|
|
53
|
+
}
|
|
54
|
+
if (!existingSchema.includes("postgresqlExtensions")) {
|
|
55
|
+
console.log("\u26A0\uFE0F Warning: pgvector extension not enabled.");
|
|
56
|
+
console.log(" Add the following to your schema.prisma:\n");
|
|
57
|
+
console.log(" generator client {");
|
|
58
|
+
console.log(' provider = "prisma-client-js"');
|
|
59
|
+
console.log(' previewFeatures = ["postgresqlExtensions"]');
|
|
60
|
+
console.log(" }\n");
|
|
61
|
+
console.log(" datasource db {");
|
|
62
|
+
console.log(' provider = "postgresql"');
|
|
63
|
+
console.log(' url = env("DATABASE_URL")');
|
|
64
|
+
console.log(" extensions = [vector]");
|
|
65
|
+
console.log(" }\n");
|
|
66
|
+
}
|
|
67
|
+
const contextRagModels = `
|
|
68
|
+
// ============================================
|
|
69
|
+
// Context-RAG Models
|
|
70
|
+
// ============================================
|
|
71
|
+
|
|
72
|
+
model ContextRagPromptConfig {
|
|
73
|
+
id String @id @default(uuid())
|
|
74
|
+
documentType String
|
|
75
|
+
name String
|
|
76
|
+
systemPrompt String @db.Text
|
|
77
|
+
chunkStrategy Json
|
|
78
|
+
version Int @default(1)
|
|
79
|
+
isActive Boolean @default(true)
|
|
80
|
+
isDefault Boolean @default(false)
|
|
81
|
+
createdBy String?
|
|
82
|
+
changeLog String?
|
|
83
|
+
createdAt DateTime @default(now())
|
|
84
|
+
updatedAt DateTime @updatedAt
|
|
85
|
+
|
|
86
|
+
chunks ContextRagChunk[]
|
|
87
|
+
|
|
88
|
+
@@unique([documentType, version])
|
|
89
|
+
@@index([documentType, isActive])
|
|
90
|
+
@@map("context_rag_prompt_configs")
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
model ContextRagChunk {
|
|
94
|
+
id String @id @default(uuid())
|
|
95
|
+
promptConfigId String
|
|
96
|
+
promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)
|
|
97
|
+
documentId String
|
|
98
|
+
chunkIndex Int
|
|
99
|
+
chunkType String
|
|
100
|
+
|
|
101
|
+
searchContent String @db.Text
|
|
102
|
+
searchVector Unsupported("vector(768)")
|
|
103
|
+
displayContent String @db.Text
|
|
104
|
+
|
|
105
|
+
sourcePageStart Int
|
|
106
|
+
sourcePageEnd Int
|
|
107
|
+
confidenceScore Float @default(0.5)
|
|
108
|
+
metadata Json
|
|
109
|
+
|
|
110
|
+
createdAt DateTime @default(now())
|
|
111
|
+
|
|
112
|
+
@@index([promptConfigId])
|
|
113
|
+
@@index([documentId])
|
|
114
|
+
@@index([chunkType])
|
|
115
|
+
@@map("context_rag_chunks")
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
model ContextRagDocument {
|
|
119
|
+
id String @id @default(uuid())
|
|
120
|
+
filename String
|
|
121
|
+
fileHash String @unique
|
|
122
|
+
fileSize Int
|
|
123
|
+
pageCount Int
|
|
124
|
+
documentType String?
|
|
125
|
+
status String @default("PENDING")
|
|
126
|
+
|
|
127
|
+
promptConfigId String?
|
|
128
|
+
totalBatches Int @default(0)
|
|
129
|
+
completedBatches Int @default(0)
|
|
130
|
+
failedBatches Int @default(0)
|
|
131
|
+
|
|
132
|
+
tokenUsage Json?
|
|
133
|
+
processingMs Int?
|
|
134
|
+
errorMessage String?
|
|
135
|
+
|
|
136
|
+
createdAt DateTime @default(now())
|
|
137
|
+
completedAt DateTime?
|
|
138
|
+
|
|
139
|
+
batches ContextRagBatch[]
|
|
140
|
+
|
|
141
|
+
@@index([status])
|
|
142
|
+
@@index([fileHash])
|
|
143
|
+
@@map("context_rag_documents")
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
model ContextRagBatch {
|
|
147
|
+
id String @id @default(uuid())
|
|
148
|
+
documentId String
|
|
149
|
+
document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)
|
|
150
|
+
|
|
151
|
+
batchIndex Int
|
|
152
|
+
pageStart Int
|
|
153
|
+
pageEnd Int
|
|
154
|
+
status String @default("PENDING")
|
|
155
|
+
retryCount Int @default(0)
|
|
156
|
+
lastError String?
|
|
157
|
+
|
|
158
|
+
tokenUsage Json?
|
|
159
|
+
processingMs Int?
|
|
160
|
+
|
|
161
|
+
startedAt DateTime?
|
|
162
|
+
completedAt DateTime?
|
|
163
|
+
createdAt DateTime @default(now())
|
|
164
|
+
updatedAt DateTime @updatedAt
|
|
165
|
+
|
|
166
|
+
@@index([documentId, status])
|
|
167
|
+
@@map("context_rag_batches")
|
|
168
|
+
}
|
|
169
|
+
`;
|
|
170
|
+
let newSchema = existingSchema;
|
|
171
|
+
if (options.force && existingSchema.includes("// Context-RAG Models")) {
|
|
172
|
+
const startMarker = "// ============================================\n// Context-RAG Models";
|
|
173
|
+
const startIndex = newSchema.indexOf(startMarker);
|
|
174
|
+
if (startIndex !== -1) {
|
|
175
|
+
newSchema = newSchema.substring(0, startIndex).trim();
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
newSchema = newSchema.trim() + "\n" + contextRagModels;
|
|
179
|
+
await fs__namespace.writeFile(schemaPath, newSchema);
|
|
180
|
+
console.log("\u2705 Context-RAG models added to prisma/schema.prisma\n");
|
|
181
|
+
console.log("Next steps:");
|
|
182
|
+
console.log(" 1. Run: npx prisma migrate dev --name add_context_rag");
|
|
183
|
+
console.log(" 2. Enable pgvector in PostgreSQL: CREATE EXTENSION IF NOT EXISTS vector;");
|
|
184
|
+
console.log(" 3. Start using Context-RAG!\n");
|
|
185
|
+
} catch (error) {
|
|
186
|
+
console.error("\u274C Error:", error.message);
|
|
187
|
+
process.exit(1);
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
program.command("status").description("Check Context-RAG setup status").action(async () => {
|
|
191
|
+
console.log("\u{1F50D} Checking Context-RAG status...\n");
|
|
192
|
+
const schemaPath = path__namespace.join(process.cwd(), "prisma", "schema.prisma");
|
|
193
|
+
try {
|
|
194
|
+
const schema = await fs__namespace.readFile(schemaPath, "utf-8");
|
|
195
|
+
console.log("Prisma Schema:");
|
|
196
|
+
console.log(` \u2705 schema.prisma found`);
|
|
197
|
+
console.log(` ${schema.includes("ContextRagChunk") ? "\u2705" : "\u274C"} Context-RAG models`);
|
|
198
|
+
console.log(` ${schema.includes("postgresqlExtensions") ? "\u2705" : "\u274C"} pgvector extension`);
|
|
199
|
+
console.log();
|
|
200
|
+
} catch {
|
|
201
|
+
console.log("\u274C prisma/schema.prisma not found\n");
|
|
202
|
+
}
|
|
203
|
+
console.log("Environment:");
|
|
204
|
+
console.log(` ${process.env["DATABASE_URL"] ? "\u2705" : "\u274C"} DATABASE_URL`);
|
|
205
|
+
console.log(` ${process.env["GEMINI_API_KEY"] ? "\u2705" : "\u274C"} GEMINI_API_KEY`);
|
|
206
|
+
console.log();
|
|
207
|
+
});
|
|
208
|
+
program.parse();
|
|
209
|
+
//# sourceMappingURL=cli.cjs.map
|
|
210
|
+
//# sourceMappingURL=cli.cjs.map
|