@msbayindir/context-rag 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +464 -0
- package/dist/bin/cli.cjs +210 -0
- package/dist/bin/cli.cjs.map +1 -0
- package/dist/bin/cli.d.cts +1 -0
- package/dist/bin/cli.d.ts +1 -0
- package/dist/bin/cli.js +187 -0
- package/dist/bin/cli.js.map +1 -0
- package/dist/index.cjs +2877 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +812 -0
- package/dist/index.d.ts +812 -0
- package/dist/index.js +2842 -0
- package/dist/index.js.map +1 -0
- package/package.json +91 -0
- package/prisma/schema.prisma +225 -0
package/package.json
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@msbayindir/context-rag",
|
|
3
|
+
"version": "1.0.0-beta.1",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "A powerful, multimodal RAG engine with contextual retrieval, auto-prompt discovery, and PostgreSQL-native vector search",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"module": "dist/index.mjs",
|
|
8
|
+
"types": "dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"require": "./dist/index.js",
|
|
13
|
+
"import": "./dist/index.mjs"
|
|
14
|
+
},
|
|
15
|
+
"./prisma": {
|
|
16
|
+
"require": "./dist/database/prisma/schema.prisma",
|
|
17
|
+
"import": "./dist/database/prisma/schema.prisma"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"bin": {
|
|
21
|
+
"context-rag": "./dist/bin/cli.js"
|
|
22
|
+
},
|
|
23
|
+
"files": [
|
|
24
|
+
"dist",
|
|
25
|
+
"prisma",
|
|
26
|
+
"README.md",
|
|
27
|
+
"LICENSE"
|
|
28
|
+
],
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsup",
|
|
31
|
+
"dev": "tsup --watch",
|
|
32
|
+
"test": "vitest",
|
|
33
|
+
"test:coverage": "vitest --coverage",
|
|
34
|
+
"lint": "eslint src --ext .ts",
|
|
35
|
+
"lint:fix": "eslint src --ext .ts --fix",
|
|
36
|
+
"format": "prettier --write \"src/**/*.ts\"",
|
|
37
|
+
"typecheck": "tsc --noEmit",
|
|
38
|
+
"prepublishOnly": "npm run build"
|
|
39
|
+
},
|
|
40
|
+
"keywords": [
|
|
41
|
+
"rag",
|
|
42
|
+
"retrieval-augmented-generation",
|
|
43
|
+
"vector-search",
|
|
44
|
+
"pgvector",
|
|
45
|
+
"postgresql",
|
|
46
|
+
"gemini",
|
|
47
|
+
"ai",
|
|
48
|
+
"llm",
|
|
49
|
+
"pdf",
|
|
50
|
+
"multimodal",
|
|
51
|
+
"contextual-retrieval",
|
|
52
|
+
"embedding"
|
|
53
|
+
],
|
|
54
|
+
"author": "Muhammed Bayindir",
|
|
55
|
+
"license": "MIT",
|
|
56
|
+
"repository": {
|
|
57
|
+
"type": "git",
|
|
58
|
+
"url": "https://github.com/msbayindir/ContextRAG.git"
|
|
59
|
+
},
|
|
60
|
+
"bugs": {
|
|
61
|
+
"url": "https://github.com/msbayindir/ContextRAG/issues"
|
|
62
|
+
},
|
|
63
|
+
"homepage": "https://github.com/msbayindir/ContextRAG#readme",
|
|
64
|
+
"engines": {
|
|
65
|
+
"node": ">=18.0.0"
|
|
66
|
+
},
|
|
67
|
+
"dependencies": {
|
|
68
|
+
"@google/generative-ai": "^0.21.0",
|
|
69
|
+
"@prisma/client": "^6.2.1",
|
|
70
|
+
"commander": "^12.1.0",
|
|
71
|
+
"p-limit": "^7.2.0",
|
|
72
|
+
"pdf-parse": "^1.1.1",
|
|
73
|
+
"zod": "^3.24.1"
|
|
74
|
+
},
|
|
75
|
+
"devDependencies": {
|
|
76
|
+
"@eslint/js": "^9.39.2",
|
|
77
|
+
"@types/node": "^22.10.7",
|
|
78
|
+
"@types/pdf-parse": "^1.1.4",
|
|
79
|
+
"@typescript-eslint/eslint-plugin": "^8.21.0",
|
|
80
|
+
"@typescript-eslint/parser": "^8.21.0",
|
|
81
|
+
"eslint": "^9.18.0",
|
|
82
|
+
"prettier": "^3.4.2",
|
|
83
|
+
"prisma": "^6.2.1",
|
|
84
|
+
"tsup": "^8.3.5",
|
|
85
|
+
"typescript": "^5.7.3",
|
|
86
|
+
"vitest": "^2.1.8"
|
|
87
|
+
},
|
|
88
|
+
"peerDependencies": {
|
|
89
|
+
"@prisma/client": ">=5.0.0"
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
// This is a reference schema for Context-RAG
|
|
2
|
+
// Users should copy these models to their own schema.prisma
|
|
3
|
+
// or use `npx context-rag init` to add them automatically
|
|
4
|
+
|
|
5
|
+
generator client {
|
|
6
|
+
provider = "prisma-client-js"
|
|
7
|
+
previewFeatures = ["postgresqlExtensions"]
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
datasource db {
|
|
11
|
+
provider = "postgresql"
|
|
12
|
+
url = env("DATABASE_URL")
|
|
13
|
+
extensions = [vector]
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// ============================================
|
|
17
|
+
// Context-RAG Models
|
|
18
|
+
// ============================================
|
|
19
|
+
|
|
20
|
+
/// Stores prompt configurations for different document types
|
|
21
|
+
/// Each document type can have multiple versions for A/B testing
|
|
22
|
+
model ContextRagPromptConfig {
|
|
23
|
+
id String @id @default(uuid())
|
|
24
|
+
|
|
25
|
+
/// Document type identifier (e.g., "Medical", "Legal", "Technical")
|
|
26
|
+
documentType String @map("document_type")
|
|
27
|
+
|
|
28
|
+
/// Human-readable name for this configuration
|
|
29
|
+
name String
|
|
30
|
+
|
|
31
|
+
/// System prompt used for AI processing
|
|
32
|
+
systemPrompt String @map("system_prompt") @db.Text
|
|
33
|
+
|
|
34
|
+
/// Chunking strategy configuration as JSON
|
|
35
|
+
chunkStrategy Json @map("chunk_strategy")
|
|
36
|
+
|
|
37
|
+
/// Version number for this document type (incremental)
|
|
38
|
+
version Int @default(1)
|
|
39
|
+
|
|
40
|
+
/// Whether this configuration is active
|
|
41
|
+
isActive Boolean @default(true) @map("is_active")
|
|
42
|
+
|
|
43
|
+
/// Whether this is the default config for the document type
|
|
44
|
+
isDefault Boolean @default(false) @map("is_default")
|
|
45
|
+
|
|
46
|
+
/// Who created this: 'discovery' | 'manual' | user ID
|
|
47
|
+
createdBy String? @map("created_by")
|
|
48
|
+
|
|
49
|
+
/// Reason for changes / changelog entry
|
|
50
|
+
changeLog String? @map("change_log")
|
|
51
|
+
|
|
52
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
53
|
+
updatedAt DateTime @updatedAt @map("updated_at")
|
|
54
|
+
|
|
55
|
+
/// Related chunks created with this config
|
|
56
|
+
chunks ContextRagChunk[]
|
|
57
|
+
|
|
58
|
+
@@unique([documentType, version])
|
|
59
|
+
@@index([documentType, isActive])
|
|
60
|
+
@@map("context_rag_prompt_configs")
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/// Stores vector chunks for semantic search
|
|
64
|
+
model ContextRagChunk {
|
|
65
|
+
id String @id @default(uuid())
|
|
66
|
+
|
|
67
|
+
/// Reference to the prompt config used to create this chunk
|
|
68
|
+
promptConfigId String @map("prompt_config_id")
|
|
69
|
+
promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)
|
|
70
|
+
|
|
71
|
+
/// Reference to the source document
|
|
72
|
+
documentId String @map("document_id")
|
|
73
|
+
|
|
74
|
+
/// Sequential index of this chunk within the document
|
|
75
|
+
chunkIndex Int @map("chunk_index")
|
|
76
|
+
|
|
77
|
+
/// Type of content: TEXT, TABLE, LIST, CODE, HEADING, IMAGE_REF, QUOTE, MIXED
|
|
78
|
+
chunkType String @map("chunk_type")
|
|
79
|
+
|
|
80
|
+
/// Plain text content optimized for vector search (original)
|
|
81
|
+
searchContent String @map("search_content") @db.Text
|
|
82
|
+
|
|
83
|
+
/// Enriched content: context + searchContent (used for embedding when RAG enhancement enabled)
|
|
84
|
+
enrichedContent String? @map("enriched_content") @db.Text
|
|
85
|
+
|
|
86
|
+
/// AI-generated context text only (for debugging/display)
|
|
87
|
+
contextText String? @map("context_text") @db.Text
|
|
88
|
+
|
|
89
|
+
/// Vector embedding (768 dimensions for Gemini)
|
|
90
|
+
searchVector Unsupported("vector(768)") @map("search_vector")
|
|
91
|
+
|
|
92
|
+
/// Rich Markdown content for display
|
|
93
|
+
displayContent String @map("display_content") @db.Text
|
|
94
|
+
|
|
95
|
+
/// Starting page number in source document
|
|
96
|
+
sourcePageStart Int @map("source_page_start")
|
|
97
|
+
|
|
98
|
+
/// Ending page number in source document
|
|
99
|
+
sourcePageEnd Int @map("source_page_end")
|
|
100
|
+
|
|
101
|
+
/// Confidence score from AI processing (0.0 - 1.0)
|
|
102
|
+
confidenceScore Float @default(0.5) @map("confidence_score")
|
|
103
|
+
|
|
104
|
+
/// Additional metadata as JSON
|
|
105
|
+
/// { page, pageRange, type, confidence, tokens, keywords, ... }
|
|
106
|
+
metadata Json
|
|
107
|
+
|
|
108
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
109
|
+
|
|
110
|
+
@@index([promptConfigId])
|
|
111
|
+
@@index([documentId])
|
|
112
|
+
@@index([chunkType])
|
|
113
|
+
@@index([confidenceScore])
|
|
114
|
+
@@map("context_rag_chunks")
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/// Tracks document processing state
|
|
118
|
+
model ContextRagDocument {
|
|
119
|
+
id String @id @default(uuid())
|
|
120
|
+
|
|
121
|
+
/// Original filename
|
|
122
|
+
filename String
|
|
123
|
+
|
|
124
|
+
/// SHA-256 hash for deduplication
|
|
125
|
+
fileHash String @map("file_hash")
|
|
126
|
+
|
|
127
|
+
/// File size in bytes
|
|
128
|
+
fileSize Int @map("file_size")
|
|
129
|
+
|
|
130
|
+
/// Total page count
|
|
131
|
+
pageCount Int @map("page_count")
|
|
132
|
+
|
|
133
|
+
/// Document type (matches PromptConfig.documentType)
|
|
134
|
+
documentType String? @map("document_type")
|
|
135
|
+
|
|
136
|
+
/// Experiment identifier for A/B testing models
|
|
137
|
+
/// e.g., "exp_flash_v1", "exp_pro_v2"
|
|
138
|
+
experimentId String? @map("experiment_id")
|
|
139
|
+
|
|
140
|
+
/// AI model used for processing
|
|
141
|
+
/// e.g., "gemini-3-flash-preview", "gemini-3-pro-preview"
|
|
142
|
+
modelName String? @map("model_name")
|
|
143
|
+
|
|
144
|
+
/// Model configuration as JSON
|
|
145
|
+
/// { temperature, maxOutputTokens, ... }
|
|
146
|
+
modelConfig Json? @map("model_config")
|
|
147
|
+
|
|
148
|
+
/// Processing status: PENDING, DISCOVERING, AWAITING_APPROVAL, PROCESSING, COMPLETED, FAILED, PARTIAL
|
|
149
|
+
status String @default("PENDING")
|
|
150
|
+
|
|
151
|
+
/// Reference to prompt config used for processing
|
|
152
|
+
promptConfigId String? @map("prompt_config_id")
|
|
153
|
+
|
|
154
|
+
/// Batch processing statistics
|
|
155
|
+
totalBatches Int @default(0) @map("total_batches")
|
|
156
|
+
completedBatches Int @default(0) @map("completed_batches")
|
|
157
|
+
failedBatches Int @default(0) @map("failed_batches")
|
|
158
|
+
|
|
159
|
+
/// Total token usage as JSON { input, output, total }
|
|
160
|
+
tokenUsage Json? @map("token_usage")
|
|
161
|
+
|
|
162
|
+
/// Total processing time in milliseconds
|
|
163
|
+
processingMs Int? @map("processing_ms")
|
|
164
|
+
|
|
165
|
+
/// Error message if failed
|
|
166
|
+
errorMessage String? @map("error_message")
|
|
167
|
+
|
|
168
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
169
|
+
completedAt DateTime? @map("completed_at")
|
|
170
|
+
|
|
171
|
+
/// Related batch jobs
|
|
172
|
+
batches ContextRagBatch[]
|
|
173
|
+
|
|
174
|
+
/// Unique constraint: same file + same experiment = one record
|
|
175
|
+
/// Allows same PDF to be processed with different experiments
|
|
176
|
+
@@unique([fileHash, experimentId])
|
|
177
|
+
@@index([status])
|
|
178
|
+
@@index([fileHash])
|
|
179
|
+
@@index([documentType])
|
|
180
|
+
@@index([experimentId])
|
|
181
|
+
@@map("context_rag_documents")
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/// Tracks individual batch processing jobs
|
|
185
|
+
model ContextRagBatch {
|
|
186
|
+
id String @id @default(uuid())
|
|
187
|
+
|
|
188
|
+
/// Reference to parent document
|
|
189
|
+
documentId String @map("document_id")
|
|
190
|
+
document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)
|
|
191
|
+
|
|
192
|
+
/// Sequential batch index (0-based)
|
|
193
|
+
batchIndex Int @map("batch_index")
|
|
194
|
+
|
|
195
|
+
/// Starting page of this batch
|
|
196
|
+
pageStart Int @map("page_start")
|
|
197
|
+
|
|
198
|
+
/// Ending page of this batch
|
|
199
|
+
pageEnd Int @map("page_end")
|
|
200
|
+
|
|
201
|
+
/// Batch status: PENDING, PROCESSING, RETRYING, COMPLETED, FAILED
|
|
202
|
+
status String @default("PENDING")
|
|
203
|
+
|
|
204
|
+
/// Number of retry attempts
|
|
205
|
+
retryCount Int @default(0) @map("retry_count")
|
|
206
|
+
|
|
207
|
+
/// Last error message if failed
|
|
208
|
+
lastError String? @map("last_error")
|
|
209
|
+
|
|
210
|
+
/// Token usage for this batch as JSON
|
|
211
|
+
tokenUsage Json? @map("token_usage")
|
|
212
|
+
|
|
213
|
+
/// Processing time in milliseconds
|
|
214
|
+
processingMs Int? @map("processing_ms")
|
|
215
|
+
|
|
216
|
+
startedAt DateTime? @map("started_at")
|
|
217
|
+
completedAt DateTime? @map("completed_at")
|
|
218
|
+
createdAt DateTime @default(now()) @map("created_at")
|
|
219
|
+
updatedAt DateTime @updatedAt @map("updated_at")
|
|
220
|
+
|
|
221
|
+
@@unique([documentId, batchIndex])
|
|
222
|
+
@@index([documentId, status])
|
|
223
|
+
@@index([status])
|
|
224
|
+
@@map("context_rag_batches")
|
|
225
|
+
}
|