@msbayindir/context-rag 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,91 @@
1
+ {
2
+ "name": "@msbayindir/context-rag",
3
+ "version": "1.0.0-beta.1",
4
+ "type": "module",
5
+ "description": "A powerful, multimodal RAG engine with contextual retrieval, auto-prompt discovery, and PostgreSQL-native vector search",
6
+ "main": "dist/index.js",
7
+ "module": "dist/index.mjs",
8
+ "types": "dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "require": "./dist/index.js",
13
+ "import": "./dist/index.mjs"
14
+ },
15
+ "./prisma": {
16
+ "require": "./dist/database/prisma/schema.prisma",
17
+ "import": "./dist/database/prisma/schema.prisma"
18
+ }
19
+ },
20
+ "bin": {
21
+ "context-rag": "./dist/bin/cli.js"
22
+ },
23
+ "files": [
24
+ "dist",
25
+ "prisma",
26
+ "README.md",
27
+ "LICENSE"
28
+ ],
29
+ "scripts": {
30
+ "build": "tsup",
31
+ "dev": "tsup --watch",
32
+ "test": "vitest",
33
+ "test:coverage": "vitest --coverage",
34
+ "lint": "eslint src --ext .ts",
35
+ "lint:fix": "eslint src --ext .ts --fix",
36
+ "format": "prettier --write \"src/**/*.ts\"",
37
+ "typecheck": "tsc --noEmit",
38
+ "prepublishOnly": "npm run build"
39
+ },
40
+ "keywords": [
41
+ "rag",
42
+ "retrieval-augmented-generation",
43
+ "vector-search",
44
+ "pgvector",
45
+ "postgresql",
46
+ "gemini",
47
+ "ai",
48
+ "llm",
49
+ "pdf",
50
+ "multimodal",
51
+ "contextual-retrieval",
52
+ "embedding"
53
+ ],
54
+ "author": "Muhammed Bayindir",
55
+ "license": "MIT",
56
+ "repository": {
57
+ "type": "git",
58
+ "url": "https://github.com/msbayindir/ContextRAG.git"
59
+ },
60
+ "bugs": {
61
+ "url": "https://github.com/msbayindir/ContextRAG/issues"
62
+ },
63
+ "homepage": "https://github.com/msbayindir/ContextRAG#readme",
64
+ "engines": {
65
+ "node": ">=18.0.0"
66
+ },
67
+ "dependencies": {
68
+ "@google/generative-ai": "^0.21.0",
69
+ "@prisma/client": "^6.2.1",
70
+ "commander": "^12.1.0",
71
+ "p-limit": "^7.2.0",
72
+ "pdf-parse": "^1.1.1",
73
+ "zod": "^3.24.1"
74
+ },
75
+ "devDependencies": {
76
+ "@eslint/js": "^9.39.2",
77
+ "@types/node": "^22.10.7",
78
+ "@types/pdf-parse": "^1.1.4",
79
+ "@typescript-eslint/eslint-plugin": "^8.21.0",
80
+ "@typescript-eslint/parser": "^8.21.0",
81
+ "eslint": "^9.18.0",
82
+ "prettier": "^3.4.2",
83
+ "prisma": "^6.2.1",
84
+ "tsup": "^8.3.5",
85
+ "typescript": "^5.7.3",
86
+ "vitest": "^2.1.8"
87
+ },
88
+ "peerDependencies": {
89
+ "@prisma/client": ">=5.0.0"
90
+ }
91
+ }
@@ -0,0 +1,225 @@
1
+ // This is a reference schema for Context-RAG
2
+ // Users should copy these models to their own schema.prisma
3
+ // or use `npx context-rag init` to add them automatically
4
+
5
+ generator client {
6
+ provider = "prisma-client-js"
7
+ previewFeatures = ["postgresqlExtensions"]
8
+ }
9
+
10
+ datasource db {
11
+ provider = "postgresql"
12
+ url = env("DATABASE_URL")
13
+ extensions = [vector]
14
+ }
15
+
16
+ // ============================================
17
+ // Context-RAG Models
18
+ // ============================================
19
+
20
+ /// Stores prompt configurations for different document types
21
+ /// Each document type can have multiple versions for A/B testing
22
+ model ContextRagPromptConfig {
23
+ id String @id @default(uuid())
24
+
25
+ /// Document type identifier (e.g., "Medical", "Legal", "Technical")
26
+ documentType String @map("document_type")
27
+
28
+ /// Human-readable name for this configuration
29
+ name String
30
+
31
+ /// System prompt used for AI processing
32
+ systemPrompt String @map("system_prompt") @db.Text
33
+
34
+ /// Chunking strategy configuration as JSON
35
+ chunkStrategy Json @map("chunk_strategy")
36
+
37
+ /// Version number for this document type (incremental)
38
+ version Int @default(1)
39
+
40
+ /// Whether this configuration is active
41
+ isActive Boolean @default(true) @map("is_active")
42
+
43
+ /// Whether this is the default config for the document type
44
+ isDefault Boolean @default(false) @map("is_default")
45
+
46
+ /// Who created this: 'discovery' | 'manual' | user ID
47
+ createdBy String? @map("created_by")
48
+
49
+ /// Reason for changes / changelog entry
50
+ changeLog String? @map("change_log")
51
+
52
+ createdAt DateTime @default(now()) @map("created_at")
53
+ updatedAt DateTime @updatedAt @map("updated_at")
54
+
55
+ /// Related chunks created with this config
56
+ chunks ContextRagChunk[]
57
+
58
+ @@unique([documentType, version])
59
+ @@index([documentType, isActive])
60
+ @@map("context_rag_prompt_configs")
61
+ }
62
+
63
+ /// Stores vector chunks for semantic search
64
+ model ContextRagChunk {
65
+ id String @id @default(uuid())
66
+
67
+ /// Reference to the prompt config used to create this chunk
68
+ promptConfigId String @map("prompt_config_id")
69
+ promptConfig ContextRagPromptConfig @relation(fields: [promptConfigId], references: [id], onDelete: Cascade)
70
+
71
+ /// Reference to the source document
72
+ documentId String @map("document_id")
73
+
74
+ /// Sequential index of this chunk within the document
75
+ chunkIndex Int @map("chunk_index")
76
+
77
+ /// Type of content: TEXT, TABLE, LIST, CODE, HEADING, IMAGE_REF, QUOTE, MIXED
78
+ chunkType String @map("chunk_type")
79
+
80
+ /// Plain text content optimized for vector search (original)
81
+ searchContent String @map("search_content") @db.Text
82
+
83
+ /// Enriched content: context + searchContent (used for embedding when RAG enhancement enabled)
84
+ enrichedContent String? @map("enriched_content") @db.Text
85
+
86
+ /// AI-generated context text only (for debugging/display)
87
+ contextText String? @map("context_text") @db.Text
88
+
89
+ /// Vector embedding (768 dimensions for Gemini)
90
+ searchVector Unsupported("vector(768)") @map("search_vector")
91
+
92
+ /// Rich Markdown content for display
93
+ displayContent String @map("display_content") @db.Text
94
+
95
+ /// Starting page number in source document
96
+ sourcePageStart Int @map("source_page_start")
97
+
98
+ /// Ending page number in source document
99
+ sourcePageEnd Int @map("source_page_end")
100
+
101
+ /// Confidence score from AI processing (0.0 - 1.0)
102
+ confidenceScore Float @default(0.5) @map("confidence_score")
103
+
104
+ /// Additional metadata as JSON
105
+ /// { page, pageRange, type, confidence, tokens, keywords, ... }
106
+ metadata Json
107
+
108
+ createdAt DateTime @default(now()) @map("created_at")
109
+
110
+ @@index([promptConfigId])
111
+ @@index([documentId])
112
+ @@index([chunkType])
113
+ @@index([confidenceScore])
114
+ @@map("context_rag_chunks")
115
+ }
116
+
117
+ /// Tracks document processing state
118
+ model ContextRagDocument {
119
+ id String @id @default(uuid())
120
+
121
+ /// Original filename
122
+ filename String
123
+
124
+ /// SHA-256 hash for deduplication
125
+ fileHash String @map("file_hash")
126
+
127
+ /// File size in bytes
128
+ fileSize Int @map("file_size")
129
+
130
+ /// Total page count
131
+ pageCount Int @map("page_count")
132
+
133
+ /// Document type (matches PromptConfig.documentType)
134
+ documentType String? @map("document_type")
135
+
136
+ /// Experiment identifier for A/B testing models
137
+ /// e.g., "exp_flash_v1", "exp_pro_v2"
138
+ experimentId String? @map("experiment_id")
139
+
140
+ /// AI model used for processing
141
+ /// e.g., "gemini-3-flash-preview", "gemini-3-pro-preview"
142
+ modelName String? @map("model_name")
143
+
144
+ /// Model configuration as JSON
145
+ /// { temperature, maxOutputTokens, ... }
146
+ modelConfig Json? @map("model_config")
147
+
148
+ /// Processing status: PENDING, DISCOVERING, AWAITING_APPROVAL, PROCESSING, COMPLETED, FAILED, PARTIAL
149
+ status String @default("PENDING")
150
+
151
+ /// Reference to prompt config used for processing
152
+ promptConfigId String? @map("prompt_config_id")
153
+
154
+ /// Batch processing statistics
155
+ totalBatches Int @default(0) @map("total_batches")
156
+ completedBatches Int @default(0) @map("completed_batches")
157
+ failedBatches Int @default(0) @map("failed_batches")
158
+
159
+ /// Total token usage as JSON { input, output, total }
160
+ tokenUsage Json? @map("token_usage")
161
+
162
+ /// Total processing time in milliseconds
163
+ processingMs Int? @map("processing_ms")
164
+
165
+ /// Error message if failed
166
+ errorMessage String? @map("error_message")
167
+
168
+ createdAt DateTime @default(now()) @map("created_at")
169
+ completedAt DateTime? @map("completed_at")
170
+
171
+ /// Related batch jobs
172
+ batches ContextRagBatch[]
173
+
174
+ /// Unique constraint: same file + same experiment = one record
175
+ /// Allows same PDF to be processed with different experiments
176
+ @@unique([fileHash, experimentId])
177
+ @@index([status])
178
+ @@index([fileHash])
179
+ @@index([documentType])
180
+ @@index([experimentId])
181
+ @@map("context_rag_documents")
182
+ }
183
+
184
+ /// Tracks individual batch processing jobs
185
+ model ContextRagBatch {
186
+ id String @id @default(uuid())
187
+
188
+ /// Reference to parent document
189
+ documentId String @map("document_id")
190
+ document ContextRagDocument @relation(fields: [documentId], references: [id], onDelete: Cascade)
191
+
192
+ /// Sequential batch index (0-based)
193
+ batchIndex Int @map("batch_index")
194
+
195
+ /// Starting page of this batch
196
+ pageStart Int @map("page_start")
197
+
198
+ /// Ending page of this batch
199
+ pageEnd Int @map("page_end")
200
+
201
+ /// Batch status: PENDING, PROCESSING, RETRYING, COMPLETED, FAILED
202
+ status String @default("PENDING")
203
+
204
+ /// Number of retry attempts
205
+ retryCount Int @default(0) @map("retry_count")
206
+
207
+ /// Last error message if failed
208
+ lastError String? @map("last_error")
209
+
210
+ /// Token usage for this batch as JSON
211
+ tokenUsage Json? @map("token_usage")
212
+
213
+ /// Processing time in milliseconds
214
+ processingMs Int? @map("processing_ms")
215
+
216
+ startedAt DateTime? @map("started_at")
217
+ completedAt DateTime? @map("completed_at")
218
+ createdAt DateTime @default(now()) @map("created_at")
219
+ updatedAt DateTime @updatedAt @map("updated_at")
220
+
221
+ @@unique([documentId, batchIndex])
222
+ @@index([documentId, status])
223
+ @@index([status])
224
+ @@map("context_rag_batches")
225
+ }