@ooneex/rag 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Ooneex
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,337 @@
1
+ # @ooneex/rag
2
+
3
+ Retrieval-Augmented Generation toolkit with vector database integration, document embedding, and semantic search for AI-powered knowledge retrieval.
4
+
5
+ ![Bun](https://img.shields.io/badge/Bun-Compatible-orange?style=flat-square&logo=bun)
6
+ ![TypeScript](https://img.shields.io/badge/TypeScript-Ready-blue?style=flat-square&logo=typescript)
7
+ ![MIT License](https://img.shields.io/badge/License-MIT-yellow?style=flat-square)
8
+
9
+ ## Features
10
+
11
+ ✅ **Vector Database** - Abstract base class for building custom vector databases with LanceDB
12
+
13
+ ✅ **PDF Conversion** - Convert PDF documents into structured chunks with heading and page metadata
14
+
15
+ ✅ **Hybrid Search** - Full-text and vector-based hybrid search with RRF reranking
16
+
17
+ ✅ **OpenAI Embeddings** - Built-in support for OpenAI embedding models (ada-002, 3-small, 3-large)
18
+
19
+ ✅ **Schema Definition** - Typed schema definitions using Apache Arrow data types
20
+
21
+ ✅ **Index Management** - Create scalar (btree, bitmap, labelList) and IVF-PQ vector indexes
22
+
23
+ ✅ **Query Filtering** - Composable filter conditions with AND, OR, NOT logical operators
24
+
25
+ ✅ **Query Analysis** - Explain and analyze query plans for performance tuning
26
+
27
+ ✅ **Container Integration** - Decorator-based registration with the DI container
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ bun add @ooneex/rag
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ### Defining a Vector Database
38
+
39
+ ```typescript
40
+ import { AbstractVectorDatabase } from '@ooneex/rag';
41
+ import { Utf8 } from 'apache-arrow';
42
+ import type { EmbeddingProviderType, EmbeddingModelType, FieldValueType } from '@ooneex/rag';
43
+
44
+ type ArticleData = {
45
+ title: string;
46
+ category: string;
47
+ };
48
+
49
+ class ArticleVectorDatabase extends AbstractVectorDatabase<ArticleData> {
50
+ public getDatabaseUri(): string {
51
+ return './data/articles.lance';
52
+ }
53
+
54
+ public getEmbeddingModel(): { provider: EmbeddingProviderType; model: EmbeddingModelType['model'] } {
55
+ return { provider: 'openai', model: 'text-embedding-3-small' };
56
+ }
57
+
58
+ public getSchema(): { [K in keyof ArticleData]: FieldValueType } {
59
+ return {
60
+ title: new Utf8(),
61
+ category: new Utf8(),
62
+ };
63
+ }
64
+ }
65
+ ```
66
+
67
+ ### Connecting and Adding Data
68
+
69
+ ```typescript
70
+ const db = new ArticleVectorDatabase();
71
+ await db.connect();
72
+
73
+ const table = await db.open('articles');
74
+
75
+ await table.add([
76
+ { id: '1', text: 'Introduction to RAG systems', title: 'RAG Intro', category: 'AI' },
77
+ { id: '2', text: 'Vector databases explained', title: 'Vector DBs', category: 'Database' },
78
+ ]);
79
+ ```
80
+
81
+ ### Searching
82
+
83
+ ```typescript
84
+ const results = await table.search('retrieval augmented generation', {
85
+ limit: 5,
86
+ select: ['title', 'category'],
87
+ filter: { field: 'category', op: '=', value: 'AI' },
88
+ });
89
+
90
+ console.log(results);
91
+ ```
92
+
93
+ ### Converting PDFs to Chunks
94
+
95
+ ```typescript
96
+ import { Convertor } from '@ooneex/rag';
97
+
98
+ const convertor = new Convertor('/path/to/document.pdf');
99
+
100
+ for await (const chunk of convertor.convert({ outputDir: './output' })) {
101
+ console.log(chunk.text);
102
+ console.log(chunk.metadata.heading);
103
+ console.log(chunk.metadata.pages);
104
+ }
105
+ ```
106
+
107
+ ### Composable Filters
108
+
109
+ ```typescript
110
+ const results = await table.search('machine learning', {
111
+ limit: 10,
112
+ filter: {
113
+ AND: [
114
+ { field: 'category', op: '=', value: 'AI' },
115
+ { NOT: { field: 'title', op: 'LIKE', value: '%draft%' } },
116
+ ],
117
+ },
118
+ });
119
+ ```
120
+
121
+ ### Query Plan Analysis
122
+
123
+ ```typescript
124
+ // Explain the query plan
125
+ const plan = await table.explainPlan('search query', {
126
+ limit: 10,
127
+ verbose: true,
128
+ });
129
+ console.log(plan);
130
+
131
+ // Analyze with runtime metrics
132
+ const analysis = await table.analyzePlan('search query', {
133
+ limit: 10,
134
+ });
135
+ console.log(analysis);
136
+ ```
137
+
138
+ ## API Reference
139
+
140
+ ### Classes
141
+
142
+ #### `AbstractVectorDatabase<DataType>` (Abstract)
143
+
144
+ Abstract base class for creating vector database implementations.
145
+
146
+ **Type Parameter:**
147
+ - `DataType` - Record type for additional schema fields
148
+
149
+ **Abstract Methods:**
150
+
151
+ ##### `getDatabaseUri(): string`
152
+
153
+ Returns the URI for the LanceDB database storage.
154
+
155
+ ##### `getEmbeddingModel(): { provider: EmbeddingProviderType; model: EmbeddingModelType['model'] }`
156
+
157
+ Returns the embedding provider and model configuration.
158
+
159
+ ##### `getSchema(): { [K in keyof DataType]: FieldValueType }`
160
+
161
+ Returns the schema definition using Apache Arrow types.
162
+
163
+ **Concrete Methods:**
164
+
165
+ ##### `connect(): Promise<void>`
166
+
167
+ Connect to the LanceDB database.
168
+
169
+ ##### `getDatabase(): Connection`
170
+
171
+ Get the underlying LanceDB connection. Throws `VectorDatabaseException` if not connected.
172
+
173
+ ##### `open(name: string, options?): Promise<VectorTable<DataType>>`
174
+
175
+ Open or create a vector table. Automatically creates btree, full-text search, and IVF-PQ indexes on new tables.
176
+
177
+ **Parameters:**
178
+ - `name` - Table name
179
+ - `options.mode` - `"create"` or `"overwrite"` (default: `"overwrite"`)
180
+
181
+ ---
182
+
183
+ #### `VectorTable<DataType>`
184
+
185
+ Provides search, indexing, and data operations on a vector table.
186
+
187
+ **Methods:**
188
+
189
+ ##### `add(data): Promise<this>`
190
+
191
+ Add records to the table.
192
+
193
+ ##### `search(query, options?): Promise<DataType[]>`
194
+
195
+ Perform hybrid (vector + full-text) search with RRF reranking.
196
+
197
+ **Parameters:**
198
+ - `query` - Search query string
199
+ - `options.limit` - Maximum results (default: 10)
200
+ - `options.select` - Fields to return
201
+ - `options.filter` - Filter conditions
202
+ - `options.nprobes` - IVF partitions to search
203
+ - `options.refineFactor` - Refine step multiplier
204
+ - `options.fastSearch` - Skip un-indexed data (default: true)
205
+
206
+ ##### `createIndex(column, options?): Promise<this>`
207
+
208
+ Create a scalar index (btree, bitmap, or labelList).
209
+
210
+ ##### `createVectorIndex(column?, options?): Promise<this>`
211
+
212
+ Create an IVF-PQ vector index.
213
+
214
+ ##### `explainPlan(query, options?): Promise<string>`
215
+
216
+ Print the resolved query plan.
217
+
218
+ ##### `analyzePlan(query, options?): Promise<string>`
219
+
220
+ Execute and return a physical plan with runtime metrics.
221
+
222
+ ---
223
+
224
+ #### `Convertor`
225
+
226
+ Converts PDF documents into structured text chunks.
227
+
228
+ **Constructor:**
229
+ ```typescript
230
+ new Convertor(source: string)
231
+ ```
232
+
233
+ **Methods:**
234
+
235
+ ##### `convert(options?): AsyncGenerator<ChunkType, { json: ConvertorFileType; markdown: ConvertorFileType }>`
236
+
237
+ Convert a PDF to chunks, yielding each chunk as it is processed.
238
+
239
+ **Parameters:**
240
+ - `options.outputDir` - Output directory
241
+ - `options.password` - PDF password
242
+ - `options.imageFormat` - `"png"` or `"jpeg"`
243
+ - `options.pages` - Page range
244
+ - `options.quiet` - Suppress output
245
+
246
+ ### Types
247
+
248
+ #### `ChunkType`
249
+
250
+ ```typescript
251
+ type ChunkType = {
252
+ text: string;
253
+ metadata: {
254
+ heading: string | null;
255
+ page: number | null;
256
+ pages: number[];
257
+ source: string | null;
258
+ };
259
+ };
260
+ ```
261
+
262
+ #### `Filter<T>`
263
+
264
+ Composable filter type supporting field conditions and logical operators.
265
+
266
+ ```typescript
267
+ type Filter<T> =
268
+ | FilterCondition<T>
269
+ | { AND: Filter<T>[] }
270
+ | { OR: Filter<T>[] }
271
+ | { NOT: Filter<T> };
272
+ ```
273
+
274
+ #### `FilterCondition<T>`
275
+
276
+ Individual filter conditions with typed operators.
277
+
278
+ ```typescript
279
+ type FilterCondition<T> =
280
+ | { field: FilterField<T>; op: '>' | '>=' | '<' | '<=' | '='; value: string | number }
281
+ | { field: FilterField<T>; op: 'IN'; value: (string | number)[] }
282
+ | { field: FilterField<T>; op: 'LIKE' | 'NOT LIKE'; value: string }
283
+ | { field: FilterField<T>; op: 'IS NULL' | 'IS NOT NULL' }
284
+ | { field: FilterField<T>; op: 'IS TRUE' | 'IS NOT TRUE' | 'IS FALSE' | 'IS NOT FALSE' };
285
+ ```
286
+
287
+ #### `EmbeddingProviderType`
288
+
289
+ ```typescript
290
+ type EmbeddingProviderType = 'openai';
291
+ ```
292
+
293
+ #### `FieldValueType`
294
+
295
+ Apache Arrow types supported for schema fields: `Null`, `Bool`, `Int8`-`Int64`, `Uint8`-`Uint64`, `Float16`-`Float64`, `Utf8`, `LargeUtf8`, `Binary`, `LargeBinary`, `Decimal`, `DateDay`, `DateMillisecond`, and `EmbeddingFunction`.
296
+
297
+ ### Exceptions
298
+
299
+ #### `VectorDatabaseException`
300
+
301
+ Thrown when vector database operations fail (e.g., not connected).
302
+
303
+ #### `ConvertorException`
304
+
305
+ Thrown when PDF conversion fails.
306
+
307
+ ### Decorators
308
+
309
+ #### `@decorator.rag()`
310
+
311
+ Decorator to register RAG classes with the DI container.
312
+
313
+ ## License
314
+
315
+ This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
316
+
317
+ ## Contributing
318
+
319
+ Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
320
+
321
+ ### Development Setup
322
+
323
+ 1. Clone the repository
324
+ 2. Install dependencies: `bun install`
325
+ 3. Run tests: `bun run test`
326
+ 4. Build the project: `bun run build`
327
+
328
+ ### Guidelines
329
+
330
+ - Write tests for new features
331
+ - Follow the existing code style
332
+ - Update documentation for API changes
333
+ - Ensure all tests pass before submitting PR
334
+
335
+ ---
336
+
337
+ Made with ❤️ by the Ooneex team
@@ -0,0 +1,165 @@
1
+ import * as lancedb2 from "@lancedb/lancedb";
2
+ import { ScalarType } from "@ooneex/types";
3
+ import { Connection as Connection2 } from "@lancedb/lancedb";
4
+ import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
5
+ import { Binary, Bool, DateDay, DateMillisecond, Decimal, Float16, Float32, Float64, Int8, Int16, Int32, Int64, LargeBinary, LargeUtf8, Null, Uint8, Uint16, Uint32, Uint64, Utf8 } from "apache-arrow";
6
+ import * as lancedb from "@lancedb/lancedb";
7
+ declare class VectorTable<DataType extends Record<string, unknown>> {
8
+ private table;
9
+ private reranker;
10
+ constructor(table: lancedb.Table);
11
+ add(data: ({
12
+ id: string;
13
+ text: string;
14
+ } & DataType)[]): Promise<this>;
15
+ createIndex(column: string, options?: {
16
+ config?: ReturnType<typeof lancedb.Index.btree | typeof lancedb.Index.bitmap | typeof lancedb.Index.labelList>;
17
+ }): Promise<this>;
18
+ createVectorIndex(column?: string, options?: Partial<Parameters<lancedb.Table["createIndex"]>[1] & object>): Promise<this>;
19
+ search(query: string, options?: {
20
+ limit?: number;
21
+ select?: (keyof DataType | "id" | "text")[];
22
+ filter?: Filter<DataType>;
23
+ nprobes?: number;
24
+ refineFactor?: number;
25
+ fastSearch?: boolean;
26
+ }): Promise<DataType[]>;
27
+ explainPlan(query: string, options?: {
28
+ limit?: number;
29
+ filter?: Filter<DataType>;
30
+ verbose?: boolean;
31
+ }): Promise<string>;
32
+ analyzePlan(query: string, options?: {
33
+ limit?: number;
34
+ filter?: Filter<DataType>;
35
+ }): Promise<string>;
36
+ }
37
+ type ConvertorOptionsType = {
38
+ outputDir?: string;
39
+ password?: string;
40
+ imageFormat?: "png" | "jpeg";
41
+ pages?: string;
42
+ quiet?: boolean;
43
+ };
44
+ type ChunkType = {
45
+ text: string;
46
+ metadata: {
47
+ heading: string | null;
48
+ page: number | null;
49
+ pages: number[];
50
+ source: string | null;
51
+ };
52
+ };
53
+ type ConvertorFileType = {
54
+ name: string;
55
+ path: string;
56
+ };
57
+ type IConvertor = {
58
+ convert: (options?: ConvertorOptionsType) => AsyncGenerator<ChunkType, {
59
+ json: ConvertorFileType;
60
+ markdown: ConvertorFileType;
61
+ }>;
62
+ };
63
+ type IVectorDatabase<DataType extends Record<string, unknown>> = {
64
+ getDatabaseUri: () => string;
65
+ connect: () => Promise<void>;
66
+ getDatabase: () => Connection2;
67
+ getEmbeddingModel: () => {
68
+ provider: EmbeddingProviderType;
69
+ model: EmbeddingModelType["model"];
70
+ };
71
+ getSchema: () => { [K in keyof DataType] : FieldValueType };
72
+ open: (name: string, options?: {
73
+ mode?: "create" | "overwrite";
74
+ }) => Promise<VectorTable<DataType>>;
75
+ };
76
+ type OpenAIModelType = "text-embedding-ada-002" | "text-embedding-3-small" | "text-embedding-3-large";
77
+ type EmbeddingProviderType = "openai";
78
+ type EmbeddingModelMapType = {
79
+ openai: OpenAIModelType;
80
+ };
81
+ type EmbeddingModelType<P extends EmbeddingProviderType = EmbeddingProviderType> = {
82
+ provider: P;
83
+ model: EmbeddingModelMapType[P];
84
+ };
85
+ type FieldValueType = Null | Bool | Int8 | Int16 | Int32 | Int64 | Uint8 | Uint16 | Uint32 | Uint64 | Float16 | Float32 | Float64 | Utf8 | LargeUtf8 | Binary | LargeBinary | Decimal | DateDay | DateMillisecond | EmbeddingFunction;
86
+ type VectorDatabaseClassType = new (...args: any[]) => IVectorDatabase<any>;
87
+ type FilterField<T> = keyof T | "id" | "text";
88
+ type FilterCondition<T> = {
89
+ field: FilterField<T>;
90
+ op: ">" | ">=" | "<" | "<=" | "=";
91
+ value: string | number;
92
+ } | {
93
+ field: FilterField<T>;
94
+ op: "IN";
95
+ value: (string | number)[];
96
+ } | {
97
+ field: FilterField<T>;
98
+ op: "LIKE" | "NOT LIKE";
99
+ value: string;
100
+ } | {
101
+ field: FilterField<T>;
102
+ op: "IS NULL" | "IS NOT NULL";
103
+ value?: never;
104
+ } | {
105
+ field: FilterField<T>;
106
+ op: "IS TRUE" | "IS NOT TRUE" | "IS FALSE" | "IS NOT FALSE";
107
+ value?: never;
108
+ };
109
+ type Filter<T> = FilterCondition<T> | {
110
+ AND: Filter<T>[];
111
+ } | {
112
+ OR: Filter<T>[];
113
+ } | {
114
+ NOT: Filter<T>;
115
+ };
116
+ declare abstract class AbstractVectorDatabase<DataType extends Record<string, ScalarType>> implements IVectorDatabase<DataType> {
117
+ private db;
118
+ private embedding;
119
+ constructor();
120
+ abstract getEmbeddingModel(): {
121
+ provider: EmbeddingProviderType;
122
+ model: EmbeddingModelType["model"];
123
+ };
124
+ abstract getSchema(): { [K in keyof DataType] : FieldValueType };
125
+ connect(): Promise<void>;
126
+ getDatabase(): lancedb2.Connection;
127
+ open(name: string, options?: {
128
+ mode?: "create" | "overwrite";
129
+ }): Promise<VectorTable<DataType>>;
130
+ }
131
+ declare class Convertor implements IConvertor {
132
+ private readonly source;
133
+ constructor(source: string);
134
+ convert(options?: ConvertorOptionsType): AsyncGenerator<ChunkType, {
135
+ json: ConvertorFileType;
136
+ markdown: ConvertorFileType;
137
+ }>;
138
+ private generateChunks;
139
+ private extractContent;
140
+ private extractTexts;
141
+ }
142
+ import { Exception } from "@ooneex/exception";
143
+ declare class ConvertorException extends Exception {
144
+ constructor(message: string, data?: Record<string, unknown>);
145
+ }
146
+ import { EContainerScope } from "@ooneex/container";
147
+ declare const decorator: {
148
+ vectorDatabase: (scope?: EContainerScope) => (target: VectorDatabaseClassType) => void;
149
+ };
150
+ type DataType2 = {
151
+ name: string;
152
+ };
153
+ declare class VectorDatabase extends AbstractVectorDatabase<DataType2> {
154
+ getDatabaseUri(): string;
155
+ getEmbeddingModel(): {
156
+ provider: EmbeddingProviderType;
157
+ model: EmbeddingModelType["model"];
158
+ };
159
+ getSchema(): { [K in keyof DataType2] : FieldValueType };
160
+ }
161
+ import { Exception as Exception2 } from "@ooneex/exception";
162
+ declare class VectorDatabaseException extends Exception2 {
163
+ constructor(message: string, data?: Record<string, unknown>);
164
+ }
165
+ export { decorator, VectorTable, VectorDatabaseException, VectorDatabaseClassType, VectorDatabase, OpenAIModelType, IVectorDatabase, IConvertor, FilterField, FilterCondition, Filter, FieldValueType, EmbeddingProviderType, EmbeddingModelType, EmbeddingModelMapType, ConvertorOptionsType, ConvertorFileType, ConvertorException, Convertor, ChunkType, AbstractVectorDatabase };
package/dist/index.js ADDED
@@ -0,0 +1,6 @@
1
+ // @bun
2
+ import*as f from"@lancedb/lancedb";import"@lancedb/lancedb/embedding/openai";import{getRegistry as P,LanceSchema as R}from"@lancedb/lancedb/embedding";import{Utf8 as S}from"apache-arrow";import{Exception as O}from"@ooneex/exception";import{HttpStatus as A}from"@ooneex/http-status";class T extends O{constructor(n,o={}){super(n,{status:A.Code.InternalServerError,data:o});this.name="VectorDatabaseException"}}import*as g from"@lancedb/lancedb";var m=(n)=>{if("AND"in n)return`(${n.AND.map(m).join(" AND ")})`;if("OR"in n)return`(${n.OR.map(m).join(" OR ")})`;if("NOT"in n)return`NOT (${m(n.NOT)})`;let o=String(n.field);if(n.op==="IS NULL"||n.op==="IS NOT NULL"||n.op==="IS TRUE"||n.op==="IS NOT TRUE"||n.op==="IS FALSE"||n.op==="IS NOT FALSE")return`${o} ${n.op}`;if(n.op==="IN"){let r=n.value.map((e)=>typeof e==="string"?`'${e}'`:e);return`${o} IN (${r.join(", ")})`}if(n.op==="LIKE"||n.op==="NOT LIKE")return`${o} ${n.op} '${n.value}'`;if(n.op===">"||n.op===">="||n.op==="<"||n.op==="<="||n.op==="=")return`${o} ${n.op} ${typeof n.value==="string"?`'${n.value}'`:n.value}`;return`${o} ${n.op} ${typeof n.value==="string"?`'${n.value}'`:n.value}`};class h{table;reranker=null;constructor(n){this.table=n}async add(n){return await this.table.add(n),this}async createIndex(n,o){return await this.table.createIndex(n,o),this}async createVectorIndex(n="vector",o){return await this.table.createIndex(n,{config:g.Index.ivfPq(),...o}),this}async search(n,o){let{limit:r=10,select:e,filter:s,nprobes:a,refineFactor:i,fastSearch:p=!0}=o??{};if(!this.reranker)this.reranker=await g.rerankers.RRFReranker.create();let t=this.table.search(n,"hybrid","text").rerank(this.reranker).limit(r);if(a)t=t.nprobes(a);if(i)t=t.refineFactor(i);if(p)t=t.fastSearch();if(e)t=t.select(e);if(s)t=t.where(m(s));return t.toArray()}async explainPlan(n,o){let{limit:r=10,filter:e,verbose:s=!0}=o??{};if(!this.reranker)this.reranker=await g.rerankers.RRFReranker.create();let a=this.table.search(n,"hybrid","text").rerank(this.reranker).limit(r);if(e)a=a.where(m(e));return a.explainPlan(s)}async analyzePlan(n,o){let{limit:r=10,filter:e}=o??{};if(!this.reranker)this.reranker=await g.rerankers.RRFReranker.create();let s=this.table.search(n,"hybrid","text").rerank(this.reranker).limit(r);if(e)s=s.where(m(e));return s.analyzePlan()}}class w{db=null;embedding;constructor(){let{provider:n,model:o}=this.getEmbeddingModel();this.embedding=P().get(n)?.create({model:o})}async connect(){this.db=await f.connect(this.getDatabaseUri())}getDatabase(){if(!this.db)throw new T("Database not connected. Call connect() first.");return this.db}async open(n,o){if((await this.getDatabase().tableNames()).includes(n)){let a=await this.getDatabase().openTable(n);return new h(a)}let e=R({id:new S,text:this.embedding.sourceField(new S),vector:this.embedding.vectorField(),...this.getSchema()}),s=await this.getDatabase().createEmptyTable(n,e,{mode:"overwrite",...o});return await s.createIndex("id",{config:f.Index.btree()}),await s.createIndex("text",{config:f.Index.fts()}),await s.createIndex("vector",{config:f.Index.ivfPq()}),new h(s)}}import u from"path";import{random as l}from"@ooneex/utils";import{convert as B}from"@opendataloader/pdf";import{Exception as b}from"@ooneex/exception";import{HttpStatus as F}from"@ooneex/http-status";class c extends b{constructor(n,o={}){super(n,{status:F.Code.InternalServerError,data:o});this.name="ConvertorException"}}class v{source;constructor(n){this.source=u.join(...n.split(/[/\\]/))}async*convert(n={}){try{let o=l.nanoid(15),r=u.join(n.outputDir??"",o),{password:e,imageFormat:s,quiet:a,pages:i}=n;await B([this.source],{outputDir:r,format:"json,markdown",imageDir:u.join(r,"images"),imageOutput:"external",...e!==void 0&&{password:e},...s!==void 0&&{imageFormat:s},...a!==void 0&&{quiet:a},...i!==void 0&&{pages:i}});let p=new Bun.Glob("*"),t,d;for await(let x of p.scan(r)){if(!t&&x.endsWith(".json"))t=x;if(!d&&x.endsWith(".md"))d=x;if(t&&d)break}if(!t)throw new c("No JSON output file found after conversion",{source:this.source});if(!d)throw new c("No Markdown output file found after conversion",{source:this.source});let C=u.join(r,t),k=await Bun.file(C).json(),I=k["file name"]??u.basename(this.source);yield*this.generateChunks(k.kids??[],I);let j=`${l.nanoid(15)}.json`,E=`${l.nanoid(15)}.md`,$=u.join(r,d),D=u.join(r,j),y=u.join(r,E);return await Promise.all([Bun.write(D,Bun.file(C)),Bun.write(y,Bun.file($))]),await Promise.all([Bun.file(C).delete(),Bun.file($).delete()]),{json:{name:j,path:D},markdown:{name:E,path:y}}}catch(o){if(o instanceof c)throw o;throw new c(o instanceof Error?o.message:"PDF conversion with chunking failed",{source:this.source})}}*generateChunks(n,o){let r=null,e=[],s=null,a=new Set;for(let i of n){let p=i.type;if(!p)continue;if(p==="heading"){if(e.length>0)yield{text:e.join(`
3
+ `),metadata:{heading:r,page:s,pages:Array.from(a),source:o}};let t=this.extractContent(i);r=t,e=t?[t]:[],s=i["page number"]??null,a=new Set(s!==null?[s]:[]);continue}if(p==="paragraph"||p==="list"){let t=this.extractContent(i);if(t){e.push(t);let d=i["page number"];if(d!==void 0)a.add(d)}}}if(e.length>0)yield{text:e.join(`
4
+ `),metadata:{heading:r,page:s,pages:Array.from(a),source:o}}}extractContent(n){let o=Array.from(this.extractTexts(n));return o.length>0?o.join(" "):null}*extractTexts(n){let o=n.content;if(o){yield o;return}let r=n.kids;if(r)for(let e of r)yield*this.extractTexts(e)}}import{container as L,EContainerScope as V}from"@ooneex/container";var M={vectorDatabase:(n=V.Singleton)=>{return(o)=>{L.add(o,n)}}};import{Utf8 as G}from"apache-arrow";class N extends w{getDatabaseUri(){return""}getEmbeddingModel(){return{provider:"openai",model:"text-embedding-ada-002"}}getSchema(){return{name:new G}}}export{M as decorator,h as VectorTable,T as VectorDatabaseException,N as VectorDatabase,c as ConvertorException,v as Convertor,w as AbstractVectorDatabase};
5
+
6
+ //# debugId=FB64C08DA10152BA64756E2164756E21
@@ -0,0 +1,17 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["src/AbstractVectorDatabase.ts", "src/VectorDatabaseException.ts", "src/VectorTable.ts", "src/utils.ts", "src/Convertor.ts", "src/ConvertorException.ts", "src/decorators.ts", "src/VectorDatabase.ts"],
4
+ "sourcesContent": [
5
+ "import * as lancedb from \"@lancedb/lancedb\";\nimport \"@lancedb/lancedb/embedding/openai\";\nimport type { EmbeddingFunction } from \"@lancedb/lancedb/embedding\";\nimport { getRegistry, LanceSchema } from \"@lancedb/lancedb/embedding\";\nimport type { ScalarType } from \"@ooneex/types\";\nimport { Utf8 } from \"apache-arrow\";\nimport type { EmbeddingModelType, EmbeddingProviderType, FieldValueType, IVectorDatabase } from \"./types.ts\";\nimport { VectorDatabaseException } from \"./VectorDatabaseException.ts\";\nimport { VectorTable } from \"./VectorTable.ts\";\n\nexport abstract class AbstractVectorDatabase<DataType extends Record<string, ScalarType>>\n implements IVectorDatabase<DataType>\n{\n private db: lancedb.Connection | null = null;\n private embedding: EmbeddingFunction;\n\n constructor() {\n const { provider, model } = this.getEmbeddingModel();\n this.embedding = getRegistry().get(provider)?.create({ model }) as EmbeddingFunction;\n }\n\n public abstract getDatabaseUri(): string;\n public abstract getEmbeddingModel(): { provider: EmbeddingProviderType; model: EmbeddingModelType[\"model\"] };\n public abstract getSchema(): { [K in keyof DataType]: FieldValueType };\n\n public async connect(): Promise<void> {\n this.db = await lancedb.connect(this.getDatabaseUri());\n }\n\n public getDatabase(): lancedb.Connection {\n if (!this.db) {\n throw new VectorDatabaseException(\"Database not connected. Call connect() first.\");\n }\n\n return this.db;\n }\n\n public async open(name: string, options?: { mode?: \"create\" | \"overwrite\" }): Promise<VectorTable<DataType>> {\n const tableNames = await this.getDatabase().tableNames();\n if (tableNames.includes(name)) {\n const table = await this.getDatabase().openTable(name);\n return new VectorTable<DataType>(table);\n }\n\n const schema = LanceSchema({\n id: new Utf8(),\n text: this.embedding.sourceField(new Utf8()),\n vector: this.embedding.vectorField(),\n ...this.getSchema(),\n });\n const table = await this.getDatabase().createEmptyTable(name, schema, { mode: \"overwrite\", ...options });\n await table.createIndex(\"id\", { config: lancedb.Index.btree() });\n await table.createIndex(\"text\", { config: lancedb.Index.fts() });\n await table.createIndex(\"vector\", { config: lancedb.Index.ivfPq() });\n\n return new VectorTable<DataType>(table);\n }\n}\n",
6
+ "import { Exception } from \"@ooneex/exception\";\nimport { HttpStatus } from \"@ooneex/http-status\";\n\nexport class VectorDatabaseException extends Exception {\n constructor(message: string, data: Record<string, unknown> = {}) {\n super(message, {\n status: HttpStatus.Code.InternalServerError,\n data,\n });\n this.name = \"VectorDatabaseException\";\n }\n}\n",
7
+ "import * as lancedb from \"@lancedb/lancedb\";\nimport type { Filter } from \"./types.ts\";\nimport { buildFilter } from \"./utils.ts\";\n\nexport class VectorTable<DataType extends Record<string, unknown>> {\n private table: lancedb.Table;\n private reranker: Awaited<ReturnType<typeof lancedb.rerankers.RRFReranker.create>> | null = null;\n\n constructor(table: lancedb.Table) {\n this.table = table;\n }\n\n public async add(data: ({ id: string; text: string } & DataType)[]): Promise<this> {\n await this.table.add(data);\n\n return this;\n }\n\n // Create a scalar index (btree, bitmap, or labelList) on a column used in filters.\n public async createIndex(\n column: string,\n options?: {\n config?: ReturnType<typeof lancedb.Index.btree | typeof lancedb.Index.bitmap | typeof lancedb.Index.labelList>;\n },\n ): Promise<this> {\n await this.table.createIndex(column, options);\n\n return this;\n }\n\n // Create an IVF PQ vector index for approximate nearest neighbor search.\n public async createVectorIndex(\n column = \"vector\",\n options?: Partial<Parameters<lancedb.Table[\"createIndex\"]>[1] & object>,\n ): Promise<this> {\n await this.table.createIndex(column, {\n config: lancedb.Index.ivfPq(),\n ...options,\n });\n\n return this;\n }\n\n public async search(\n query: string,\n options?: {\n limit?: number;\n select?: (keyof DataType | \"id\" | \"text\")[];\n filter?: Filter<DataType>;\n // Number of IVF partitions to search. Higher values improve recall but reduce speed.\n nprobes?: number;\n // Multiplier for additional candidate rows during IVF PQ refine step to improve recall accuracy.\n refineFactor?: number;\n // Skip un-indexed data for faster queries when indices are up to date.\n fastSearch?: boolean;\n },\n ): Promise<DataType[]> {\n const { limit = 10, select, filter, nprobes, refineFactor, fastSearch = true } = options ?? {};\n\n if (!this.reranker) {\n this.reranker = await lancedb.rerankers.RRFReranker.create();\n }\n\n let vectorQuery = (this.table.search(query, \"hybrid\", \"text\") as lancedb.VectorQuery)\n .rerank(this.reranker)\n .limit(limit);\n\n if (nprobes) {\n vectorQuery = vectorQuery.nprobes(nprobes);\n }\n\n if (refineFactor) {\n vectorQuery = vectorQuery.refineFactor(refineFactor);\n }\n\n if (fastSearch) {\n vectorQuery = vectorQuery.fastSearch();\n }\n\n if (select) {\n vectorQuery = vectorQuery.select(select as string[]);\n }\n\n if (filter) {\n vectorQuery = vectorQuery.where(buildFilter(filter));\n }\n\n return vectorQuery.toArray();\n }\n\n // Print the resolved query plan to identify slow queries and missing indices.\n public async explainPlan(\n query: string,\n options?: {\n limit?: number;\n filter?: Filter<DataType>;\n verbose?: boolean;\n },\n ): Promise<string> {\n const { limit = 10, filter, verbose = true } = options ?? {};\n\n if (!this.reranker) {\n this.reranker = await lancedb.rerankers.RRFReranker.create();\n }\n\n let vectorQuery = (this.table.search(query, \"hybrid\", \"text\") as lancedb.VectorQuery)\n .rerank(this.reranker)\n .limit(limit);\n\n if (filter) {\n vectorQuery = vectorQuery.where(buildFilter(filter));\n }\n\n return vectorQuery.explainPlan(verbose);\n }\n\n // Execute the query and return a physical plan annotated with runtime metrics.\n public async analyzePlan(\n query: string,\n options?: {\n limit?: number;\n filter?: Filter<DataType>;\n },\n ): Promise<string> {\n const { limit = 10, filter } = options ?? {};\n\n if (!this.reranker) {\n this.reranker = await lancedb.rerankers.RRFReranker.create();\n }\n\n let vectorQuery = (this.table.search(query, \"hybrid\", \"text\") as lancedb.VectorQuery)\n .rerank(this.reranker)\n .limit(limit);\n\n if (filter) {\n vectorQuery = vectorQuery.where(buildFilter(filter));\n }\n\n return vectorQuery.analyzePlan();\n }\n}\n",
8
+ "import type { Filter } from \"./types.ts\";\n\nexport const buildFilter = <T>(filter: Filter<T>): string => {\n if (\"AND\" in filter) {\n return `(${filter.AND.map(buildFilter).join(\" AND \")})`;\n }\n if (\"OR\" in filter) {\n return `(${filter.OR.map(buildFilter).join(\" OR \")})`;\n }\n if (\"NOT\" in filter) {\n return `NOT (${buildFilter(filter.NOT)})`;\n }\n\n const col = String(filter.field);\n\n if (\n filter.op === \"IS NULL\" ||\n filter.op === \"IS NOT NULL\" ||\n filter.op === \"IS TRUE\" ||\n filter.op === \"IS NOT TRUE\" ||\n filter.op === \"IS FALSE\" ||\n filter.op === \"IS NOT FALSE\"\n ) {\n return `${col} ${filter.op}`;\n }\n\n if (filter.op === \"IN\") {\n const values = filter.value.map((v) => (typeof v === \"string\" ? `'${v}'` : v));\n return `${col} IN (${values.join(\", \")})`;\n }\n\n if (filter.op === \"LIKE\" || filter.op === \"NOT LIKE\") {\n return `${col} ${filter.op} '${filter.value}'`;\n }\n\n if (filter.op === \">\" || filter.op === \">=\" || filter.op === \"<\" || filter.op === \"<=\" || filter.op === \"=\") {\n return `${col} ${filter.op} ${typeof filter.value === \"string\" ? `'${filter.value}'` : filter.value}`;\n }\n\n return `${col} ${filter.op} ${typeof filter.value === \"string\" ? `'${filter.value}'` : filter.value}`;\n};\n",
9
+ "import path from \"node:path\";\nimport { random } from \"@ooneex/utils\";\nimport { convert } from \"@opendataloader/pdf\";\nimport { ConvertorException } from \"./ConvertorException\";\nimport type { ChunkType, ConvertorFileType, ConvertorOptionsType, IConvertor } from \"./types\";\n\nexport class Convertor implements IConvertor {\n private readonly source: string;\n\n constructor(source: string) {\n this.source = path.join(...source.split(/[/\\\\]/));\n }\n\n public async *convert(\n options: ConvertorOptionsType = {},\n ): AsyncGenerator<ChunkType, { json: ConvertorFileType; markdown: ConvertorFileType }> {\n try {\n const subDir = random.nanoid(15);\n const outputDir = path.join(options.outputDir ?? \"\", subDir);\n const { password, imageFormat, quiet, pages } = options;\n\n await convert([this.source], {\n outputDir,\n format: \"json,markdown\",\n imageDir: path.join(outputDir, \"images\"),\n imageOutput: \"external\",\n ...(password !== undefined && { password }),\n ...(imageFormat !== undefined && { imageFormat }),\n ...(quiet !== undefined && { quiet }),\n ...(pages !== undefined && { pages }),\n });\n\n const glob = new Bun.Glob(\"*\");\n let jsonFile: string | undefined;\n let mdFile: string | undefined;\n for await (const file of glob.scan(outputDir)) {\n if (!jsonFile && file.endsWith(\".json\")) jsonFile = file;\n if (!mdFile && file.endsWith(\".md\")) mdFile = file;\n if (jsonFile && mdFile) break;\n }\n\n if (!jsonFile) {\n throw new ConvertorException(\"No JSON output file found after conversion\", { source: this.source });\n }\n\n if (!mdFile) {\n throw new ConvertorException(\"No Markdown output file found after conversion\", { source: this.source });\n }\n\n const jsonPath = path.join(outputDir, jsonFile);\n const doc = await Bun.file(jsonPath).json();\n const fileName = doc[\"file name\"] ?? path.basename(this.source);\n\n yield* this.generateChunks(doc.kids ?? [], fileName);\n\n const renamedJson = `${random.nanoid(15)}.json`;\n const renamedMd = `${random.nanoid(15)}.md`;\n\n const mdPath = path.join(outputDir, mdFile);\n const renamedJsonPath = path.join(outputDir, renamedJson);\n const renamedMdPath = path.join(outputDir, renamedMd);\n\n await Promise.all([Bun.write(renamedJsonPath, Bun.file(jsonPath)), Bun.write(renamedMdPath, Bun.file(mdPath))]);\n\n await Promise.all([Bun.file(jsonPath).delete(), Bun.file(mdPath).delete()]);\n\n return {\n json: { name: renamedJson, path: renamedJsonPath },\n markdown: { name: renamedMd, path: renamedMdPath },\n };\n } catch (error) {\n if (error instanceof ConvertorException) throw error;\n throw new ConvertorException(error instanceof Error ? error.message : \"PDF conversion with chunking failed\", {\n source: this.source,\n });\n }\n }\n\n private *generateChunks(kids: Record<string, unknown>[], source: string): Generator<ChunkType> {\n let currentHeading: string | null = null;\n let currentContent: string[] = [];\n let startPage: number | null = null;\n let pageSet = new Set<number>();\n\n for (const element of kids) {\n const type = element.type as string | undefined;\n if (!type) continue;\n\n if (type === \"heading\") {\n if (currentContent.length > 0) {\n yield {\n text: currentContent.join(\"\\n\"),\n metadata: { heading: currentHeading, page: startPage, pages: Array.from(pageSet), source },\n };\n }\n const content = this.extractContent(element);\n currentHeading = content;\n currentContent = content ? [content] : [];\n startPage = (element[\"page number\"] as number) ?? null;\n pageSet = new Set(startPage !== null ? [startPage] : []);\n continue;\n }\n\n if (type === \"paragraph\" || type === \"list\") {\n const content = this.extractContent(element);\n if (content) {\n currentContent.push(content);\n const page = element[\"page number\"] as number | undefined;\n if (page !== undefined) {\n pageSet.add(page);\n }\n }\n }\n }\n\n if (currentContent.length > 0) {\n yield {\n text: currentContent.join(\"\\n\"),\n metadata: { heading: currentHeading, page: startPage, pages: Array.from(pageSet), source },\n };\n }\n }\n\n private extractContent(element: Record<string, unknown>): string | null {\n const parts = Array.from(this.extractTexts(element));\n return parts.length > 0 ? parts.join(\" \") : null;\n }\n\n private *extractTexts(element: Record<string, unknown>): Generator<string> {\n const content = element.content as string | undefined;\n if (content) {\n yield content;\n return;\n }\n\n const kids = element.kids as Record<string, unknown>[] | undefined;\n if (kids) {\n for (const kid of kids) {\n yield* this.extractTexts(kid);\n }\n }\n }\n}\n",
10
+ "import { Exception } from \"@ooneex/exception\";\nimport { HttpStatus } from \"@ooneex/http-status\";\n\nexport class ConvertorException extends Exception {\n constructor(message: string, data: Record<string, unknown> = {}) {\n super(message, {\n status: HttpStatus.Code.InternalServerError,\n data,\n });\n this.name = \"ConvertorException\";\n }\n}\n",
11
+ "import { container, EContainerScope } from \"@ooneex/container\";\nimport type { VectorDatabaseClassType } from \"./types\";\n\nexport const decorator = {\n vectorDatabase: (scope: EContainerScope = EContainerScope.Singleton) => {\n return (target: VectorDatabaseClassType): void => {\n container.add(target, scope);\n };\n },\n};\n",
12
+ "import { AbstractVectorDatabase } from \"./AbstractVectorDatabase.ts\";\nimport type { EmbeddingModelType, EmbeddingProviderType, FieldValueType } from \"./types.ts\";\nimport { Utf8 } from \"apache-arrow\";\n\ntype DataType = {\n name: string;\n}\n\nexport class VectorDatabase extends AbstractVectorDatabase<DataType> {\n public getDatabaseUri(): string {\n return \"\";\n }\n\n public getEmbeddingModel(): { provider: EmbeddingProviderType; model: EmbeddingModelType[\"model\"] } {\n return { provider: \"openai\", model: \"text-embedding-ada-002\" };\n }\n\n public getSchema(): { [K in keyof DataType]: FieldValueType } {\n return {\n name: new Utf8(),\n };\n }\n}\n"
13
+ ],
14
+ "mappings": ";AAAA,mCACA,0CAEA,sBAAS,iBAAa,mCAEtB,eAAS,qBCLT,oBAAS,0BACT,qBAAS,4BAEF,MAAM,UAAgC,CAAU,CACrD,WAAW,CAAC,EAAiB,EAAgC,CAAC,EAAG,CAC/D,MAAM,EAAS,CACb,OAAQ,EAAW,KAAK,oBACxB,MACF,CAAC,EACD,KAAK,KAAO,0BAEhB,CCXA,mCCEO,IAAM,EAAc,CAAI,IAA8B,CAC3D,GAAI,QAAS,EACX,MAAO,IAAI,EAAO,IAAI,IAAI,CAAW,EAAE,KAAK,OAAO,KAErD,GAAI,OAAQ,EACV,MAAO,IAAI,EAAO,GAAG,IAAI,CAAW,EAAE,KAAK,MAAM,KAEnD,GAAI,QAAS,EACX,MAAO,QAAQ,EAAY,EAAO,GAAG,KAGvC,IAAM,EAAM,OAAO,EAAO,KAAK,EAE/B,GACE,EAAO,KAAO,WACd,EAAO,KAAO,eACd,EAAO,KAAO,WACd,EAAO,KAAO,eACd,EAAO,KAAO,YACd,EAAO,KAAO,eAEd,MAAO,GAAG,KAAO,EAAO,KAG1B,GAAI,EAAO,KAAO,KAAM,CACtB,IAAM,EAAS,EAAO,MAAM,IAAI,CAAC,IAAO,OAAO,IAAM,SAAW,IAAI,KAAO,CAAE,EAC7E,MAAO,GAAG,SAAW,EAAO,KAAK,IAAI,KAGvC,GAAI,EAAO,KAAO,QAAU,EAAO,KAAO,WACxC,MAAO,GAAG,KAAO,EAAO,OAAO,EAAO,SAGxC,GAAI,EAAO,KAAO,KAAO,EAAO,KAAO,MAAQ,EAAO,KAAO,KAAO,EAAO,KAAO,MAAQ,EAAO,KAAO,IACtG,MAAO,GAAG,KAAO,EAAO,MAAM,OAAO,EAAO,QAAU,SAAW,IAAI,EAAO,SAAW,EAAO,QAGhG,MAAO,GAAG,KAAO,EAAO,MAAM,OAAO,EAAO,QAAU,SAAW,IAAI,EAAO,SAAW,EAAO,SDnCzF,MAAM,CAAsD,CACzD,MACA,SAAoF,KAE5F,WAAW,CAAC,EAAsB,CAChC,KAAK,MAAQ,OAGF,IAAG,CAAC,EAAkE,CAGjF,OAFA,MAAM,KAAK,MAAM,IAAI,CAAI,EAElB,UAII,YAAW,CACtB,EACA,EAGe,CAGf,OAFA,MAAM,KAAK,MAAM,YAAY,EAAQ,CAAO,EAErC,UAII,kBAAiB,CAC5B,EAAS,SACT,EACe,CAMf,OALA,MAAM,KAAK,MAAM,YAAY,EAAQ,CACnC,OAAgB,QAAM,MAAM,KACzB,CACL,CAAC,EAEM,UAGI,OAAM,CACjB,EACA,EAWqB,CACrB,IAAQ,QAAQ,GAAI,SAAQ,SAAQ,UAAS,eAAc,aAAa,IAAS,GAAW,CAAC,EAE7F,GAAI,CAAC,KAAK,SACR,KAAK,SAAW,MAAc,YAAU,YAAY,OAAO,EAG7D,IAAI,EAAe,KAAK,MAAM,OAAO,EAAO,SAAU,MAAM,EACzD,OAAO,KAAK,QAAQ,EACpB,MAAM,CAAK,EAEd,GAAI,EACF,EAAc,EAAY,QAAQ,CAAO,EAG3C,GAAI,EACF,EAAc,EAAY,aAAa,CAAY,EAGrD,GAAI,EACF,EAAc,EAAY,WAAW,EAGvC,GAAI,EACF,EAAc,EAAY,OAAO,CAAkB,EAGrD,GAAI,EACF,EAAc,EAAY,MAAM,EAAY,CAAM,CAAC,EAGrD,OAAO,EAAY,QAAQ,OAIhB,YAAW,CACtB,EACA,EAKiB,CACjB,IAAQ,QAAQ,GAAI,SAAQ,UAAU,IAAS,GAAW,CAAC,EAE3D,GAAI,CAAC,KAAK,SACR,KAAK,SAAW,MAAc,YAAU,YAAY,OAAO,EAG7D,IAAI,EAAe,KAAK,MAAM,OAAO,EAAO,SAAU,MAAM,EACzD,OAAO,KAAK,QAAQ,EACpB,MAAM,CAAK,EAEd,GAAI,EACF,EAAc,EAAY,MAAM,EAAY,CAAM,CAAC,EAGrD,OAAO,EAAY,YAAY,CAAO,OAI3B,YAAW,CACtB,EACA,EAIiB,CACjB,IAAQ,QAAQ,GAAI,UAAW,GAAW,CAAC,EAE3C,GAAI,CAAC,KAAK,SACR,KAAK,SAAW,MAAc,YAAU,YAAY,OAAO,EAG7D,IAAI,EAAe,KAAK,MAAM,OAAO,EAAO,SAAU,MAAM,EACzD,OAAO,KAAK,QAAQ,EACpB,MAAM,CAAK,EAEd,GAAI,EACF,EAAc,EAAY,MAAM,EAAY,CAAM,CAAC,EAGrD,OAAO,EAAY,YAAY,EAEnC,CFlIO,MAAe,CAEtB,CACU,GAAgC,KAChC,UAER,WAAW,EAAG,CACZ,IAAQ,WAAU,SAAU,KAAK,kBAAkB,EACnD,KAAK,UAAY,EAAY,EAAE,IAAI,CAAQ,GAAG,OAAO,CAAE,OAAM,CAAC,OAOnD,QAAO,EAAkB,CACpC,KAAK,GAAK,MAAc,UAAQ,KAAK,eAAe,CAAC,EAGhD,WAAW,EAAuB,CACvC,GAAI,CAAC,KAAK,GACR,MAAM,IAAI,EAAwB,+CAA+C,EAGnF,OAAO,KAAK,QAGD,KAAI,CAAC,EAAc,EAA6E,CAE3G,IADmB,MAAM,KAAK,YAAY,EAAE,WAAW,GACxC,SAAS,CAAI,EAAG,CAC7B,IAAM,EAAQ,MAAM,KAAK,YAAY,EAAE,UAAU,CAAI,EACrD,OAAO,IAAI,EAAsB,CAAK,EAGxC,IAAM,EAAS,EAAY,CACzB,GAAI,IAAI,EACR,KAAM,KAAK,UAAU,YAAY,IAAI,CAAM,EAC3C,OAAQ,KAAK,UAAU,YAAY,KAChC,KAAK,UAAU,CACpB,CAAC,EACK,EAAQ,MAAM,KAAK,YAAY,EAAE,iBAAiB,EAAM,EAAQ,CAAE,KAAM,eAAgB,CAAQ,CAAC,EAKvG,OAJA,MAAM,EAAM,YAAY,KAAM,CAAE,OAAgB,QAAM,MAAM,CAAE,CAAC,EAC/D,MAAM,EAAM,YAAY,OAAQ,CAAE,OAAgB,QAAM,IAAI,CAAE,CAAC,EAC/D,MAAM,EAAM,YAAY,SAAU,CAAE,OAAgB,QAAM,MAAM,CAAE,CAAC,EAE5D,IAAI,EAAsB,CAAK,EAE1C,CIzDA,oBACA,iBAAS,sBACT,kBAAS,4BCFT,oBAAS,0BACT,qBAAS,4BAEF,MAAM,UAA2B,CAAU,CAChD,WAAW,CAAC,EAAiB,EAAgC,CAAC,EAAG,CAC/D,MAAM,EAAS,CACb,OAAQ,EAAW,KAAK,oBACxB,MACF,CAAC,EACD,KAAK,KAAO,qBAEhB,CDLO,MAAM,CAAgC,CAC1B,OAEjB,WAAW,CAAC,EAAgB,CAC1B,KAAK,OAAS,EAAK,KAAK,GAAG,EAAO,MAAM,OAAO,CAAC,QAGpC,OAAO,CACnB,EAAgC,CAAC,EACoD,CACrF,GAAI,CACF,IAAM,EAAS,EAAO,OAAO,EAAE,EACzB,EAAY,EAAK,KAAK,EAAQ,WAAa,GAAI,CAAM,GACnD,WAAU,cAAa,QAAO,SAAU,EAEhD,MAAM,EAAQ,CAAC,KAAK,MAAM,EAAG,CAC3B,YACA,OAAQ,gBACR,SAAU,EAAK,KAAK,EAAW,QAAQ,EACvC,YAAa,cACT,IAAa,QAAa,CAAE,UAAS,KACrC,IAAgB,QAAa,CAAE,aAAY,KAC3C,IAAU,QAAa,CAAE,OAAM,KAC/B,IAAU,QAAa,CAAE,OAAM,CACrC,CAAC,EAED,IAAM,EAAO,IAAI,IAAI,KAAK,GAAG,EACzB,EACA,EACJ,cAAiB,KAAQ,EAAK,KAAK,CAAS,EAAG,CAC7C,GAAI,CAAC,GAAY,EAAK,SAAS,OAAO,EAAG,EAAW,EACpD,GAAI,CAAC,GAAU,EAAK,SAAS,KAAK,EAAG,EAAS,EAC9C,GAAI,GAAY,EAAQ,MAG1B,GAAI,CAAC,EACH,MAAM,IAAI,EAAmB,6CAA8C,CAAE,OAAQ,KAAK,MAAO,CAAC,EAGpG,GAAI,CAAC,EACH,MAAM,IAAI,EAAmB,iDAAkD,CAAE,OAAQ,KAAK,MAAO,CAAC,EAGxG,IAAM,EAAW,EAAK,KAAK,EAAW,CAAQ,EACxC,EAAM,MAAM,IAAI,KAAK,CAAQ,EAAE,KAAK,EACpC,EAAW,EAAI,cAAgB,EAAK,SAAS,KAAK,MAAM,EAE9D,MAAO,KAAK,eAAe,EAAI,MAAQ,CAAC,EAAG,CAAQ,EAEnD,IAAM,EAAc,GAAG,EAAO,OAAO,EAAE,SACjC,EAAY,GAAG,EAAO,OAAO,EAAE,OAE/B,EAAS,EAAK,KAAK,EAAW,CAAM,EACpC,EAAkB,EAAK,KAAK,EAAW,CAAW,EAClD,EAAgB,EAAK,KAAK,EAAW,CAAS,EAMpD,OAJA,MAAM,QAAQ,IAAI,CAAC,IAAI,MAAM,EAAiB,IAAI,KAAK,CAAQ,CAAC,EAAG,IAAI,MAAM,EAAe,IAAI,KAAK,CAAM,CAAC,CAAC,CAAC,EAE9G,MAAM,QAAQ,IAAI,CAAC,IAAI,KAAK,CAAQ,EAAE,OAAO,EAAG,IAAI,KAAK,CAAM,EAAE,OAAO,CAAC,CAAC,EAEnE,CACL,KAAM,CAAE,KAAM,EAAa,KAAM,CAAgB,EACjD,SAAU,CAAE,KAAM,EAAW,KAAM,CAAc,CACnD,EACA,MAAO,EAAO,CACd,GAAI,aAAiB,EAAoB,MAAM,EAC/C,MAAM,IAAI,EAAmB,aAAiB,MAAQ,EAAM,QAAU,sCAAuC,CAC3G,OAAQ,KAAK,MACf,CAAC,IAII,cAAc,CAAC,EAAiC,EAAsC,CAC7F,IAAI,EAAgC,KAChC,EAA2B,CAAC,EAC5B,EAA2B,KAC3B,EAAU,IAAI,IAElB,QAAW,KAAW,EAAM,CAC1B,IAAM,EAAO,EAAQ,KACrB,GAAI,CAAC,EAAM,SAEX,GAAI,IAAS,UAAW,CACtB,GAAI,EAAe,OAAS,EAC1B,KAAM,CACJ,KAAM,EAAe,KAAK;AAAA,CAAI,EAC9B,SAAU,CAAE,QAAS,EAAgB,KAAM,EAAW,MAAO,MAAM,KAAK,CAAO,EAAG,QAAO,CAC3F,EAEF,IAAM,EAAU,KAAK,eAAe,CAAO,EAC3C,EAAiB,EACjB,EAAiB,EAAU,CAAC,CAAO,EAAI,CAAC,EACxC,EAAa,EAAQ,gBAA6B,KAClD,EAAU,IAAI,IAAI,IAAc,KAAO,CAAC,CAAS,EAAI,CAAC,CAAC,EACvD,SAGF,GAAI,IAAS,aAAe,IAAS,OAAQ,CAC3C,IAAM,EAAU,KAAK,eAAe,CAAO,EAC3C,GAAI,EAAS,CACX,EAAe,KAAK,CAAO,EAC3B,IAAM,EAAO,EAAQ,eACrB,GAAI,IAAS,OACX,EAAQ,IAAI,CAAI,IAMxB,GAAI,EAAe,OAAS,EAC1B,KAAM,CACJ,KAAM,EAAe,KAAK;AAAA,CAAI,EAC9B,SAAU,CAAE,QAAS,EAAgB,KAAM,EAAW,MAAO,MAAM,KAAK,CAAO,EAAG,QAAO,CAC3F,EAII,cAAc,CAAC,EAAiD,CACtE,IAAM,EAAQ,MAAM,KAAK,KAAK,aAAa,CAAO,CAAC,EACnD,OAAO,EAAM,OAAS,EAAI,EAAM,KAAK,GAAG,EAAI,MAGrC,YAAY,CAAC,EAAqD,CACzE,IAAM,EAAU,EAAQ,QACxB,GAAI,EAAS,CACX,MAAM,EACN,OAGF,IAAM,EAAO,EAAQ,KACrB,GAAI,EACF,QAAW,KAAO,EAChB,MAAO,KAAK,aAAa,CAAG,EAIpC,CE9IA,oBAAS,qBAAW,0BAGb,IAAM,EAAY,CACvB,eAAgB,CAAC,EAAyB,EAAgB,YAAc,CACtE,MAAO,CAAC,IAA0C,CAChD,EAAU,IAAI,EAAQ,CAAK,GAGjC,ECPA,eAAS,qBAMF,MAAM,UAAuB,CAAiC,CAC5D,cAAc,EAAW,CAC9B,MAAO,GAGF,iBAAiB,EAA4E,CAClG,MAAO,CAAE,SAAU,SAAU,MAAO,wBAAyB,EAGxD,SAAS,EAA8C,CAC5D,MAAO,CACL,KAAM,IAAI,CACZ,EAEJ",
15
+ "debugId": "FB64C08DA10152BA64756E2164756E21",
16
+ "names": []
17
+ }
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "@ooneex/rag",
3
+ "description": "Retrieval-Augmented Generation toolkit with vector database integration, document embedding, and semantic search for AI-powered knowledge retrieval",
4
+ "version": "1.0.0",
5
+ "type": "module",
6
+ "files": [
7
+ "dist",
8
+ "LICENSE",
9
+ "README.md",
10
+ "package.json"
11
+ ],
12
+ "module": "./dist/index.js",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": {
16
+ "import": {
17
+ "types": "./dist/index.d.ts",
18
+ "default": "./dist/index.js"
19
+ }
20
+ },
21
+ "./package.json": "./package.json"
22
+ },
23
+ "license": "MIT",
24
+ "scripts": {
25
+ "test": "bun test tests",
26
+ "build": "bunup",
27
+ "lint": "tsgo --noEmit && bunx biome lint",
28
+ "npm:publish": "bun publish --tolerate-republish --access public"
29
+ },
30
+ "keywords": [],
31
+ "dependencies": {
32
+ "@lancedb/lancedb": "^0.27.1",
33
+ "@ooneex/exception": "0.0.18",
34
+ "@ooneex/container": "0.0.19",
35
+ "@ooneex/http-status": "0.0.18",
36
+ "@ooneex/utils": "0.1.1",
37
+ "@opendataloader/pdf": "^2.0.2",
38
+ "apache-arrow": "^21.1.0"
39
+ },
40
+ "devDependencies": {
41
+ "@ooneex/types": "0.0.19"
42
+ }
43
+ }