@msbayindir/context-rag 1.0.0-beta.9 → 2.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,16 +10,18 @@
10
10
 
11
11
  > ⚠️ **Status: Beta** — Actively used in production (medical RAG & enterprise docs), API stable, breaking changes documented.
12
12
 
13
+ > ⭐ **If this project helps you build better RAG systems, consider giving it a star!** It helps others discover this project.
14
+
13
15
  ---
14
16
 
15
17
  ## ⚡ 60-Second Quick Start
16
18
 
17
19
  ```typescript
18
- import { ContextRAG } from '@msbayindir/context-rag';
20
+ import { createContextRAG } from '@msbayindir/context-rag';
19
21
  import { PrismaClient } from '@prisma/client';
20
22
  import * as fs from 'fs';
21
23
 
22
- const rag = new ContextRAG({
24
+ const rag = createContextRAG({
23
25
  prisma: new PrismaClient(),
24
26
  geminiApiKey: process.env.GEMINI_API_KEY!,
25
27
  });
@@ -70,6 +72,7 @@ console.log(results[0].chunk.displayContent);
70
72
  | 🐘 **PostgreSQL Native** | No external vector DB needed, uses pgvector |
71
73
  | ⚡ **Batch Processing** | Concurrent processing with automatic retry |
72
74
  | 🛡️ **Enterprise Error Handling** | Correlation IDs, graceful degradation, structured logging |
75
+ | 🔌 **Dependency Injection** | SOLID-compliant architecture with interface-based DI (v2.0-beta) |
73
76
 
74
77
  ---
75
78
 
@@ -165,7 +168,7 @@ flowchart TB
165
168
  **Scenario:** Turkish medical students preparing for TUS exam with 500+ page biochemistry PDFs.
166
169
 
167
170
  ```typescript
168
- const rag = new ContextRAG({
171
+ const rag = createContextRAG({
169
172
  prisma,
170
173
  geminiApiKey: process.env.GEMINI_API_KEY!,
171
174
  ragEnhancement: {
@@ -176,13 +179,14 @@ const rag = new ContextRAG({
176
179
  });
177
180
 
178
181
  // Discovery: AI analyzes the PDF and suggests extraction strategy
179
- const discovery = await rag.discover({ file: pdfBuffer, filename: 'biochemistry.pdf' });
182
+ const discovery = await rag.discover({ file: pdfBuffer });
180
183
 
181
- // Ingest with discovered strategy
184
+ // Ingest with approved strategy
185
+ const approved = await rag.approveStrategy(discovery.id);
182
186
  await rag.ingest({
183
187
  file: pdfBuffer,
184
188
  filename: 'biochemistry.pdf',
185
- promptConfig: discovery.promptConfig, // AI-suggested prompts
189
+ promptConfigId: approved.id,
186
190
  });
187
191
 
188
192
  // Students can now ask contextual questions
@@ -227,11 +231,12 @@ const liabilityClauses = await rag.search({
227
231
  ```typescript
228
232
  // Process multiple document types
229
233
  for (const doc of ['hr-policy.pdf', 'security-guidelines.pdf', 'api-docs.pdf']) {
230
- const discovery = await rag.discover({ file: docs[doc], filename: doc });
234
+ const discovery = await rag.discover({ file: docs[doc] });
235
+ const approved = await rag.approveStrategy(discovery.id);
231
236
  await rag.ingest({
232
237
  file: docs[doc],
233
238
  filename: doc,
234
- promptConfig: discovery.promptConfig,
239
+ promptConfigId: approved.id,
235
240
  experimentId: 'knowledge-base-v1', // Group related documents
236
241
  });
237
242
  }
@@ -324,6 +329,12 @@ npx @msbayindir/context-rag init --force
324
329
 
325
330
  # Check setup status (Prisma models, pgvector, env variables)
326
331
  npx @msbayindir/context-rag status
332
+
333
+ # Check for embedding model mismatches
334
+ npx @msbayindir/context-rag check-embeddings
335
+
336
+ # Re-index documents (useful after changing embedding models)
337
+ npx @msbayindir/context-rag reindex --concurrency 5
327
338
  ```
328
339
 
329
340
  ---
@@ -399,12 +410,12 @@ COHERE_API_KEY="your-cohere-api-key"
399
410
  ## 🧩 Usage (Full Example)
400
411
 
401
412
  ```typescript
402
- import { ContextRAG } from '@msbayindir/context-rag';
413
+ import { createContextRAG } from '@msbayindir/context-rag';
403
414
  import { PrismaClient } from '@prisma/client';
404
415
 
405
416
  const prisma = new PrismaClient();
406
417
 
407
- const rag = new ContextRAG({
418
+ const rag = createContextRAG({
408
419
  prisma,
409
420
  geminiApiKey: process.env.GEMINI_API_KEY!,
410
421
  model: 'gemini-3-flash-preview',
@@ -463,7 +474,7 @@ the Cyanide test value for patient Ahmet Yılmaz. Value: 50 mg/dL"
463
474
  ### Configuration
464
475
 
465
476
  ```typescript
466
- const rag = new ContextRAG({
477
+ const rag = createContextRAG({
467
478
  // ...
468
479
  ragEnhancement: {
469
480
  approach: 'anthropic_contextual',
@@ -496,7 +507,7 @@ Reranking improves search relevance by re-scoring candidates using AI models. Ba
496
507
  ### Configuration
497
508
 
498
509
  ```typescript
499
- const rag = new ContextRAG({
510
+ const rag = createContextRAG({
500
511
  prisma,
501
512
  geminiApiKey: process.env.GEMINI_API_KEY!,
502
513
 
@@ -565,7 +576,7 @@ const result = await rag.ingest({
565
576
  ### Configuration for Selective Context Enrichment
566
577
 
567
578
  ```typescript
568
- const rag = new ContextRAG({
579
+ const rag = createContextRAG({
569
580
  prisma,
570
581
  geminiApiKey: process.env.GEMINI_API_KEY!,
571
582
 
@@ -593,35 +604,151 @@ await rag.ingest({
593
604
 
594
605
  ## ⚙️ Configuration
595
606
 
607
+
608
+ Context-RAG is highly configurable. Below is the complete list of all available options.
609
+
596
610
  ```typescript
597
- const rag = new ContextRAG({
598
- // Required
611
+ const rag = createContextRAG({
612
+ // ============================================
613
+ // CORE CONFIGURATION (Required)
614
+ // ============================================
615
+
616
+ /** Your initialized Prisma client instance */
599
617
  prisma: prismaClient,
600
- geminiApiKey: 'your-api-key',
601
618
 
602
- // Model selection
603
- model: 'gemini-3-flash-preview',
604
- embeddingModel: 'gemini-embedding-exp-03-07',
619
+ /** Gemini API Key (Required for generation and default embeddings) */
620
+ geminiApiKey: process.env.GEMINI_API_KEY!,
621
+
622
+ // ============================================
623
+ // MODEL SELECTION
624
+ // ============================================
605
625
 
606
- // Generation
626
+ /**
627
+ * Main LLM model for generation, orchestration, and RAG enhancement.
628
+ * Default: 'gemini-1.5-pro'
629
+ */
630
+ model: 'gemini-1.5-pro', // Options: 'gemini-1.5-flash', 'gemini-2.0-flash-exp', etc.
631
+
632
+ /**
633
+ * Configuration for the LLM generation (temperature, tokens, etc.)
634
+ */
607
635
  generationConfig: {
608
- temperature: 0.2,
609
- maxOutputTokens: 16384,
636
+ temperature: 0.3, // Creativity (0.0 - 1.0). Lower is more deterministic.
637
+ maxOutputTokens: 8192, // Maximum length of the generated response.
610
638
  },
611
639
 
612
- // Batch processing
640
+ // ============================================
641
+ // EMBEDDING PROVIDER (Optional)
642
+ // ============================================
643
+
644
+ /**
645
+ * Choose your embedding provider.
646
+ * Default: Uses Gemini 'text-embedding-004'
647
+ */
648
+ embeddingProvider: {
649
+ // Provider: 'gemini' | 'openai' | 'cohere'
650
+ provider: 'openai',
651
+
652
+ // Model name (specific to the provider)
653
+ model: 'text-embedding-3-small',
654
+
655
+ // API Key (if different from geminiApiKey)
656
+ apiKey: process.env.OPENAI_API_KEY,
657
+ },
658
+
659
+ // ============================================
660
+ // SYSTEM CONFIGURATION
661
+ // ============================================
662
+
663
+ /**
664
+ * Batch processing settings for ingestion.
665
+ * Adjust these based on your API rate limits.
666
+ */
613
667
  batchConfig: {
614
- pagesPerBatch: 15,
615
- maxConcurrency: 3,
616
- maxRetries: 3,
668
+ pagesPerBatch: 15, // How many pages to process in one go (Default: 15)
669
+ maxConcurrency: 3, // How many batches to run in parallel (Default: 3)
670
+ maxRetries: 3, // Retry failed batches (Default: 3)
671
+ retryDelayMs: 1000, // Initial delay before retry (Default: 1000ms)
672
+ backoffMultiplier: 2, // Exponential backoff factor (Default: 2)
673
+ },
674
+
675
+ /**
676
+ * Settings for splitting text into vector chunks.
677
+ */
678
+ chunkConfig: {
679
+ maxTokens: 500, // Maximum size of a single chunk (Default: 500)
680
+ overlapTokens: 50, // Overlap between chunks to preserve continuity (Default: 50)
681
+ },
682
+
683
+ /**
684
+ * API Rate Limiting protection.
685
+ */
686
+ rateLimitConfig: {
687
+ requestsPerMinute: 60, // Max RPM allowed (Default: 60)
688
+ adaptive: true, // Automatically slow down if 429 errors occur (Default: true)
689
+ },
690
+
691
+ /**
692
+ * System logging configuration.
693
+ */
694
+ logging: {
695
+ level: 'info', // 'debug' | 'info' | 'warn' | 'error'
696
+ structured: true, // Use JSON format for logs (Best for production tools like Datadog/CloudWatch)
697
+ },
698
+
699
+ // ============================================
700
+ // ADVANCED FEATURES
701
+ // ============================================
702
+
703
+ /**
704
+ * Reranking improves search relevance by re-scoring results.
705
+ */
706
+ rerankingConfig: {
707
+ enabled: true, // Enable automatic reranking (Default: false)
708
+ provider: 'cohere', // 'gemini' or 'cohere' (Cohere is recommended for best results)
709
+ cohereApiKey: process.env.COHERE_API_KEY, // Required if provider is 'cohere'
710
+ defaultCandidates: 50, // Retrieve top 50 from Vector DB...
711
+ defaultTopK: 10, // ...and return top 10 after reranking.
617
712
  },
618
713
 
619
- // RAG Enhancement
714
+ /**
715
+ * RAG Enhancement (Contextual Retrieval).
716
+ * Adds context to chunks before embedding them.
717
+ */
620
718
  ragEnhancement: {
719
+ // Approach: 'anthropic_contextual' (Recommended) or 'none'
621
720
  approach: 'anthropic_contextual',
622
- strategy: 'simple',
623
- skipChunkTypes: ['HEADING'],
721
+
722
+ // Strategy: 'llm' (Best Quality) or 'simple' (Template based)
723
+ strategy: 'llm',
724
+
725
+ // Model to use for generating context (Optional, defaults to main model)
726
+ // Tip: Use a cheaper model here (e.g., 'gemini-1.5-flash') to save costs.
727
+ model: 'gemini-1.5-flash',
728
+
729
+ // Prompt used to generate context (Optional, has good default)
730
+ contextPrompt: 'Situate this chunk within the document...',
731
+
732
+ // Don't waste tokens generating context for these types
733
+ skipChunkTypes: ['HEADING', 'IMAGE_REF', 'CODE'],
624
734
  },
735
+
736
+ /**
737
+ * Enable Structured Output (JSON Schema) for reliable parsing.
738
+ * Disable only if you are using a model that doesn't support it well.
739
+ * Default: true
740
+ */
741
+ useStructuredOutput: true,
742
+
743
+ /**
744
+ * Custom Chunk Type Mapping.
745
+ * Map your custom extraction types to system types for proper handling.
746
+ */
747
+ chunkTypeMapping: {
748
+ 'RECIPE': 'TEXT', // Treat 'RECIPE' as normal text
749
+ 'INGREDIENT_LIST': 'LIST', // Treat 'INGREDIENT_LIST' as a list
750
+ 'NUTRITIONAL_INFO': 'TABLE' // Treat 'NUTRITIONAL_INFO' as a table
751
+ }
625
752
  });
626
753
  ```
627
754
 
@@ -765,22 +892,77 @@ We use [Conventional Commits](https://www.conventionalcommits.org/):
765
892
  context-rag/
766
893
  ├── src/
767
894
  │ ├── context-rag.ts # Main facade class
895
+ │ ├── context-rag.factory.ts # DI Factory (v2.0-beta)
768
896
  │ ├── engines/ # Discovery, Ingestion, Retrieval
769
897
  │ ├── enhancements/ # RAG Enhancement handlers
770
898
  │ │ └── anthropic/ # Anthropic Contextual Retrieval
771
899
  │ ├── services/ # Gemini API, PDF Processor
900
+ │ ├── providers/ # Embedding providers (Gemini, OpenAI, Cohere)
772
901
  │ ├── database/ # Prisma repositories
773
- │ ├── config/ # Templates
774
- │ ├── types/ # TypeScript types
902
+ │ ├── config/ # Templates & constants
903
+ │ ├── types/ # TypeScript types & interfaces
775
904
  │ ├── utils/ # Logger, Retry, RateLimiter
776
905
  │ └── errors/ # Custom error classes
777
906
  ├── examples/ # Demo scripts
907
+ ├── tests/ # Unit & integration tests
778
908
  ├── prisma/ # Reference schema
779
909
  └── dist/ # Built output
780
910
  ```
781
911
 
782
912
  ---
783
913
 
914
+ ## 🔄 Migration Guide (v1.x → v2.0-beta)
915
+
916
+ ### Breaking Change: Factory Pattern
917
+
918
+ v2.0-beta introduces proper Dependency Injection. The `new ContextRAG()` constructor now requires dependencies.
919
+
920
+ **Before (v1.x):**
921
+ ```typescript
922
+ import { ContextRAG } from '@msbayindir/context-rag';
923
+
924
+ const rag = new ContextRAG({
925
+ prisma,
926
+ geminiApiKey: 'your-key',
927
+ });
928
+ ```
929
+
930
+ **After (v2.0-beta):**
931
+ ```typescript
932
+ import { createContextRAG } from '@msbayindir/context-rag';
933
+
934
+ const rag = createContextRAG({
935
+ prisma,
936
+ geminiApiKey: 'your-key',
937
+ });
938
+ ```
939
+
940
+ ### Custom Engine Injection (Advanced)
941
+
942
+ v2.0-beta allows injecting custom engines for advanced use cases:
943
+
944
+ ```typescript
945
+ import { ContextRAG, IngestionEngine } from '@msbayindir/context-rag';
946
+
947
+ // Create custom engine
948
+ class MyIngestionEngine extends IngestionEngine {
949
+ async ingest(options) {
950
+ console.log('Custom logic!');
951
+ return super.ingest(options);
952
+ }
953
+ }
954
+
955
+ // Inject via constructor
956
+ const rag = new ContextRAG(config, {
957
+ ingestionEngine: myCustomEngine,
958
+ retrievalEngine,
959
+ discoveryEngine,
960
+ repos: { promptConfig, document, chunk },
961
+ });
962
+ ```
963
+
964
+ ---
965
+
784
966
  ## 📄 License
785
967
 
786
968
  MIT © [Muhammed Bayindir](https://github.com/msbayindir)