voctar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +102 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +29 -0
- package/dist/index.js.map +1 -0
- package/dist/src/chunking/index.d.ts +48 -0
- package/dist/src/chunking/index.d.ts.map +1 -0
- package/dist/src/chunking/index.js +123 -0
- package/dist/src/chunking/index.js.map +1 -0
- package/dist/src/chunking/strategies/fixed.d.ts +14 -0
- package/dist/src/chunking/strategies/fixed.d.ts.map +1 -0
- package/dist/src/chunking/strategies/fixed.js +111 -0
- package/dist/src/chunking/strategies/fixed.js.map +1 -0
- package/dist/src/chunking/strategies/paragraph.d.ts +6 -0
- package/dist/src/chunking/strategies/paragraph.d.ts.map +1 -0
- package/dist/src/chunking/strategies/paragraph.js +84 -0
- package/dist/src/chunking/strategies/paragraph.js.map +1 -0
- package/dist/src/chunking/strategies/recursive.d.ts +17 -0
- package/dist/src/chunking/strategies/recursive.d.ts.map +1 -0
- package/dist/src/chunking/strategies/recursive.js +192 -0
- package/dist/src/chunking/strategies/recursive.js.map +1 -0
- package/dist/src/chunking/strategies/semantic.d.ts +96 -0
- package/dist/src/chunking/strategies/semantic.d.ts.map +1 -0
- package/dist/src/chunking/strategies/semantic.js +587 -0
- package/dist/src/chunking/strategies/semantic.js.map +1 -0
- package/dist/src/chunking/strategies/sentence.d.ts +7 -0
- package/dist/src/chunking/strategies/sentence.d.ts.map +1 -0
- package/dist/src/chunking/strategies/sentence.js +116 -0
- package/dist/src/chunking/strategies/sentence.js.map +1 -0
- package/dist/src/chunking/types.d.ts +45 -0
- package/dist/src/chunking/types.d.ts.map +1 -0
- package/dist/src/chunking/types.js +4 -0
- package/dist/src/chunking/types.js.map +1 -0
- package/dist/src/chunking/utils/tokenizer.d.ts +10 -0
- package/dist/src/chunking/utils/tokenizer.d.ts.map +1 -0
- package/dist/src/chunking/utils/tokenizer.js +50 -0
- package/dist/src/chunking/utils/tokenizer.js.map +1 -0
- package/dist/src/providers/embeddings/index.d.ts +3 -0
- package/dist/src/providers/embeddings/index.d.ts.map +1 -0
- package/dist/src/providers/embeddings/index.js +7 -0
- package/dist/src/providers/embeddings/index.js.map +1 -0
- package/dist/src/providers/embeddings/openai.d.ts +21 -0
- package/dist/src/providers/embeddings/openai.d.ts.map +1 -0
- package/dist/src/providers/embeddings/openai.js +86 -0
- package/dist/src/providers/embeddings/openai.js.map +1 -0
- package/dist/src/providers/index.d.ts +3 -0
- package/dist/src/providers/index.d.ts.map +1 -0
- package/dist/src/providers/index.js +20 -0
- package/dist/src/providers/index.js.map +1 -0
- package/dist/src/providers/stores/index.d.ts +6 -0
- package/dist/src/providers/stores/index.d.ts.map +1 -0
- package/dist/src/providers/stores/index.js +11 -0
- package/dist/src/providers/stores/index.js.map +1 -0
- package/dist/src/providers/stores/memory.d.ts +18 -0
- package/dist/src/providers/stores/memory.d.ts.map +1 -0
- package/dist/src/providers/stores/memory.js +169 -0
- package/dist/src/providers/stores/memory.js.map +1 -0
- package/dist/src/providers/stores/qdrant.d.ts +28 -0
- package/dist/src/providers/stores/qdrant.d.ts.map +1 -0
- package/dist/src/providers/stores/qdrant.js +223 -0
- package/dist/src/providers/stores/qdrant.js.map +1 -0
- package/dist/src/providers/stores/sqlite.d.ts +38 -0
- package/dist/src/providers/stores/sqlite.d.ts.map +1 -0
- package/dist/src/providers/stores/sqlite.js +306 -0
- package/dist/src/providers/stores/sqlite.js.map +1 -0
- package/dist/src/types.d.ts +111 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +32 -0
- package/dist/src/types.js.map +1 -0
- package/dist/src/vector.d.ts +74 -0
- package/dist/src/vector.d.ts.map +1 -0
- package/dist/src/vector.js +505 -0
- package/dist/src/vector.js.map +1 -0
- package/docs/API.md +361 -0
- package/docs/CHUNKING.md +280 -0
- package/docs/CUSTOM_PROVIDERS.md +101 -0
- package/docs/README.md +11 -0
- package/docs/STORAGE_BACKENDS.md +189 -0
- package/docs/assets/vectar.png +0 -0
- package/package.json +46 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# Custom Providers
|
|
2
|
+
|
|
3
|
+
Voctar supports custom providers for embeddings and storage.
|
|
4
|
+
|
|
5
|
+
## Use Custom Providers
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { Voctar } from 'voctar';
|
|
9
|
+
|
|
10
|
+
const vector = new Voctar({
|
|
11
|
+
embedding: {
|
|
12
|
+
type: 'custom',
|
|
13
|
+
provider: myEmbeddingProvider,
|
|
14
|
+
},
|
|
15
|
+
store: {
|
|
16
|
+
type: 'custom',
|
|
17
|
+
provider: myVectorStoreProvider,
|
|
18
|
+
},
|
|
19
|
+
});
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Custom Embedding Provider
|
|
23
|
+
|
|
24
|
+
Implement the `EmbeddingProvider` interface:
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
import type { EmbeddingProvider } from 'voctar';
|
|
28
|
+
|
|
29
|
+
export class MyEmbeddingProvider implements EmbeddingProvider {
|
|
30
|
+
async embed(text: string): Promise<number[]> {
|
|
31
|
+
// Return one embedding vector for one text
|
|
32
|
+
return [/* ... */];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
36
|
+
// Return one vector per input text (same order)
|
|
37
|
+
return texts.map(() => [/* ... */]);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
getDimension(): number {
|
|
41
|
+
return 1536;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
getModelName(): string {
|
|
45
|
+
return 'my-embedding-model';
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
getTokenLimit(): number {
|
|
49
|
+
return 8192;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Custom Store Provider
|
|
55
|
+
|
|
56
|
+
Implement the `VectorStoreProvider` interface:
|
|
57
|
+
|
|
58
|
+
```typescript
|
|
59
|
+
import type {
|
|
60
|
+
VectorStoreProvider,
|
|
61
|
+
VectorPoint,
|
|
62
|
+
SearchOptions,
|
|
63
|
+
SearchResult,
|
|
64
|
+
CollectionConfig,
|
|
65
|
+
} from 'voctar';
|
|
66
|
+
|
|
67
|
+
export class MyVectorStoreProvider implements VectorStoreProvider {
|
|
68
|
+
async ensureCollection(name: string, dimension: number, config?: CollectionConfig): Promise<void> {
|
|
69
|
+
// Create collection/index if missing
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async upsert(collection: string, points: VectorPoint[]): Promise<void> {
|
|
73
|
+
// Insert or update vectors
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async search(collection: string, vector: number[], options: SearchOptions): Promise<SearchResult[]> {
|
|
77
|
+
// Return scored results in descending relevance
|
|
78
|
+
return [];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async delete(collection: string, ids: string[]): Promise<void> {
|
|
82
|
+
// Delete matching IDs
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async deleteCollection(collection: string): Promise<void> {
|
|
86
|
+
// Drop collection/index
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async getIdsByFilter(collection: string, filter: Record<string, any>, limit?: number): Promise<string[]> {
|
|
90
|
+
// Return IDs that match filter
|
|
91
|
+
return [];
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Integration Tips
|
|
97
|
+
|
|
98
|
+
- Keep `embedBatch()` order stable with input order.
|
|
99
|
+
- Ensure `getDimension()` matches vectors returned by `embed()`/`embedBatch()`.
|
|
100
|
+
- Normalize errors with useful messages so callers can debug quickly.
|
|
101
|
+
- Implement filter behavior consistently in `search()` and `getIdsByFilter()`.
|
package/docs/README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Documentation Index
|
|
2
|
+
|
|
3
|
+
The canonical getting-started guide now lives in the root [`README.md`](../README.md).
|
|
4
|
+
|
|
5
|
+
Use this folder for focused topics:
|
|
6
|
+
|
|
7
|
+
- [API Reference](./API.md)
|
|
8
|
+
- [Custom Providers](./CUSTOM_PROVIDERS.md)
|
|
9
|
+
- [Storage Backends](./STORAGE_BACKENDS.md)
|
|
10
|
+
- [Chunking](./CHUNKING.md)
|
|
11
|
+
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Voctar Storage Backends
|
|
2
|
+
|
|
3
|
+
This guide covers the available storage backends in Voctar and when to use each one.
|
|
4
|
+
|
|
5
|
+
Voctar is config-first:
|
|
6
|
+
|
|
7
|
+
- your app chooses the backend,
|
|
8
|
+
- your app reads env vars (if any),
|
|
9
|
+
- your app passes explicit config to `new Vectar(...)`.
|
|
10
|
+
|
|
11
|
+
## Available Backends
|
|
12
|
+
|
|
13
|
+
Voctar supports:
|
|
14
|
+
|
|
15
|
+
- `sqlite`
|
|
16
|
+
- `qdrant`
|
|
17
|
+
- `memory`
|
|
18
|
+
- `custom`
|
|
19
|
+
|
|
20
|
+
## Quick Selection Guide
|
|
21
|
+
|
|
22
|
+
- Use `sqlite` for local dev and simple production workloads.
|
|
23
|
+
- Use `qdrant` for larger datasets, higher throughput, or multi-instance deployments.
|
|
24
|
+
- Use `memory` for tests and short-lived demos only.
|
|
25
|
+
- Use `custom` when integrating an internal or third-party vector store.
|
|
26
|
+
|
|
27
|
+
## SQLite Backend
|
|
28
|
+
|
|
29
|
+
Best for:
|
|
30
|
+
|
|
31
|
+
- local development,
|
|
32
|
+
- prototypes,
|
|
33
|
+
- single-node deployments.
|
|
34
|
+
|
|
35
|
+
Pros:
|
|
36
|
+
|
|
37
|
+
- no external service,
|
|
38
|
+
- persistent file-based storage,
|
|
39
|
+
- simplest setup and operations.
|
|
40
|
+
|
|
41
|
+
Trade-offs:
|
|
42
|
+
|
|
43
|
+
- limited horizontal scaling,
|
|
44
|
+
- shared file contention under high concurrency.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
|
|
48
|
+
```typescript
|
|
49
|
+
import { Voctar } from 'voctar';
|
|
50
|
+
|
|
51
|
+
const vector = new Voctar({
|
|
52
|
+
embedding: {
|
|
53
|
+
type: 'openai',
|
|
54
|
+
apiKey: process.env.OPENAI_API_KEY!,
|
|
55
|
+
},
|
|
56
|
+
store: {
|
|
57
|
+
type: 'sqlite',
|
|
58
|
+
path: './data/vector.db',
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
In-memory SQLite (testing only):
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
store: {
|
|
67
|
+
type: 'sqlite',
|
|
68
|
+
path: ':memory:',
|
|
69
|
+
inMemory: true,
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Qdrant Backend
|
|
74
|
+
|
|
75
|
+
Best for:
|
|
76
|
+
|
|
77
|
+
- medium and large datasets,
|
|
78
|
+
- high query volume,
|
|
79
|
+
- distributed deployments.
|
|
80
|
+
|
|
81
|
+
Pros:
|
|
82
|
+
|
|
83
|
+
- purpose-built vector DB,
|
|
84
|
+
- strong scale characteristics,
|
|
85
|
+
- rich filtering support.
|
|
86
|
+
|
|
87
|
+
Trade-offs:
|
|
88
|
+
|
|
89
|
+
- extra service to operate,
|
|
90
|
+
- network hop adds operational complexity.
|
|
91
|
+
|
|
92
|
+
Example:
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
import { Voctar } from 'voctar';
|
|
96
|
+
|
|
97
|
+
const vector = new Voctar({
|
|
98
|
+
embedding: {
|
|
99
|
+
type: 'openai',
|
|
100
|
+
apiKey: process.env.OPENAI_API_KEY!,
|
|
101
|
+
},
|
|
102
|
+
store: {
|
|
103
|
+
type: 'qdrant',
|
|
104
|
+
url: process.env.QDRANT_URL!,
|
|
105
|
+
port: process.env.QDRANT_PORT ? Number(process.env.QDRANT_PORT) : 6333,
|
|
106
|
+
apiKey: process.env.QDRANT_API_KEY || undefined,
|
|
107
|
+
timeout: 30000,
|
|
108
|
+
checkCompatibility: false,
|
|
109
|
+
},
|
|
110
|
+
});
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## In-Memory Backend
|
|
114
|
+
|
|
115
|
+
Best for:
|
|
116
|
+
|
|
117
|
+
- unit tests,
|
|
118
|
+
- quick local examples.
|
|
119
|
+
|
|
120
|
+
Trade-offs:
|
|
121
|
+
|
|
122
|
+
- data is lost on restart,
|
|
123
|
+
- unsuitable for production persistence.
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
import { Voctar } from 'voctar';
|
|
129
|
+
|
|
130
|
+
const vector = new Voctar({
|
|
131
|
+
embedding: {
|
|
132
|
+
type: 'openai',
|
|
133
|
+
apiKey: process.env.OPENAI_API_KEY!,
|
|
134
|
+
},
|
|
135
|
+
store: {
|
|
136
|
+
type: 'memory',
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Custom Backend
|
|
142
|
+
|
|
143
|
+
Use this when you need full control over storage behavior.
|
|
144
|
+
|
|
145
|
+
The provider must implement `VectorStoreProvider`.
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
|
|
149
|
+
```typescript
|
|
150
|
+
import { Voctar } from 'voctar';
|
|
151
|
+
|
|
152
|
+
const vector = new Voctar({
|
|
153
|
+
embedding: {
|
|
154
|
+
type: 'openai',
|
|
155
|
+
apiKey: process.env.OPENAI_API_KEY!,
|
|
156
|
+
},
|
|
157
|
+
store: {
|
|
158
|
+
type: 'custom',
|
|
159
|
+
provider: myVectorStoreProvider,
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
See [`CUSTOM_PROVIDERS.md`](./CUSTOM_PROVIDERS.md) for full interface details.
|
|
165
|
+
|
|
166
|
+
## Environment Variable Pattern (App-Owned)
|
|
167
|
+
|
|
168
|
+
Voctar does not auto-load env vars, but many apps use a selector like this:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
VECTOR_STORE=sqlite # sqlite | qdrant | memory
|
|
172
|
+
SQLITE_PATH=./data/vector.db
|
|
173
|
+
QDRANT_URL=http://localhost
|
|
174
|
+
QDRANT_PORT=6333
|
|
175
|
+
QDRANT_API_KEY=your_api_key
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Then in app bootstrap:
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
const storeType = process.env.VECTOR_STORE ?? 'sqlite';
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Migration and Operations Notes
|
|
185
|
+
|
|
186
|
+
- Start with `sqlite` if you are early-stage.
|
|
187
|
+
- Move to `qdrant` when dataset size, traffic, or deployment topology requires it.
|
|
188
|
+
- Back up SQLite database files regularly.
|
|
189
|
+
- For Qdrant, use snapshots/backups supported by your Qdrant setup.
|
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "voctar",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "TypeScript library with RAG primitives for vector embeddings, chunking, storing and retrieval.",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"files": [
|
|
8
|
+
"dist",
|
|
9
|
+
"README.md",
|
|
10
|
+
"docs"
|
|
11
|
+
],
|
|
12
|
+
"scripts": {
|
|
13
|
+
"build": "tsc -p tsconfig.json",
|
|
14
|
+
"clean": "rm -rf dist",
|
|
15
|
+
"prepublishOnly": "yarn clean && yarn build"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [
|
|
18
|
+
"rag",
|
|
19
|
+
"vector",
|
|
20
|
+
"vector-database",
|
|
21
|
+
"embeddings",
|
|
22
|
+
"chunking",
|
|
23
|
+
"semantic-search",
|
|
24
|
+
"semantic-retrieval",
|
|
25
|
+
"typescript",
|
|
26
|
+
"qdrant",
|
|
27
|
+
"sqlite"
|
|
28
|
+
],
|
|
29
|
+
"license": "MIT",
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=18"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@qdrant/js-client-rest": "^1.13.0",
|
|
35
|
+
"better-sqlite3": "^12.4.1",
|
|
36
|
+
"openai": "^6.3.0",
|
|
37
|
+
"tiktoken": "^1.0.22",
|
|
38
|
+
"uuid": "^9.0.1"
|
|
39
|
+
},
|
|
40
|
+
"devDependencies": {
|
|
41
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
42
|
+
"@types/node": "^24.0.0",
|
|
43
|
+
"@types/uuid": "^9.0.8",
|
|
44
|
+
"typescript": "^5.8.3"
|
|
45
|
+
}
|
|
46
|
+
}
|