voctar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +102 -0
  3. package/dist/index.d.ts +6 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +29 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/src/chunking/index.d.ts +48 -0
  8. package/dist/src/chunking/index.d.ts.map +1 -0
  9. package/dist/src/chunking/index.js +123 -0
  10. package/dist/src/chunking/index.js.map +1 -0
  11. package/dist/src/chunking/strategies/fixed.d.ts +14 -0
  12. package/dist/src/chunking/strategies/fixed.d.ts.map +1 -0
  13. package/dist/src/chunking/strategies/fixed.js +111 -0
  14. package/dist/src/chunking/strategies/fixed.js.map +1 -0
  15. package/dist/src/chunking/strategies/paragraph.d.ts +6 -0
  16. package/dist/src/chunking/strategies/paragraph.d.ts.map +1 -0
  17. package/dist/src/chunking/strategies/paragraph.js +84 -0
  18. package/dist/src/chunking/strategies/paragraph.js.map +1 -0
  19. package/dist/src/chunking/strategies/recursive.d.ts +17 -0
  20. package/dist/src/chunking/strategies/recursive.d.ts.map +1 -0
  21. package/dist/src/chunking/strategies/recursive.js +192 -0
  22. package/dist/src/chunking/strategies/recursive.js.map +1 -0
  23. package/dist/src/chunking/strategies/semantic.d.ts +96 -0
  24. package/dist/src/chunking/strategies/semantic.d.ts.map +1 -0
  25. package/dist/src/chunking/strategies/semantic.js +587 -0
  26. package/dist/src/chunking/strategies/semantic.js.map +1 -0
  27. package/dist/src/chunking/strategies/sentence.d.ts +7 -0
  28. package/dist/src/chunking/strategies/sentence.d.ts.map +1 -0
  29. package/dist/src/chunking/strategies/sentence.js +116 -0
  30. package/dist/src/chunking/strategies/sentence.js.map +1 -0
  31. package/dist/src/chunking/types.d.ts +45 -0
  32. package/dist/src/chunking/types.d.ts.map +1 -0
  33. package/dist/src/chunking/types.js +4 -0
  34. package/dist/src/chunking/types.js.map +1 -0
  35. package/dist/src/chunking/utils/tokenizer.d.ts +10 -0
  36. package/dist/src/chunking/utils/tokenizer.d.ts.map +1 -0
  37. package/dist/src/chunking/utils/tokenizer.js +50 -0
  38. package/dist/src/chunking/utils/tokenizer.js.map +1 -0
  39. package/dist/src/providers/embeddings/index.d.ts +3 -0
  40. package/dist/src/providers/embeddings/index.d.ts.map +1 -0
  41. package/dist/src/providers/embeddings/index.js +7 -0
  42. package/dist/src/providers/embeddings/index.js.map +1 -0
  43. package/dist/src/providers/embeddings/openai.d.ts +21 -0
  44. package/dist/src/providers/embeddings/openai.d.ts.map +1 -0
  45. package/dist/src/providers/embeddings/openai.js +86 -0
  46. package/dist/src/providers/embeddings/openai.js.map +1 -0
  47. package/dist/src/providers/index.d.ts +3 -0
  48. package/dist/src/providers/index.d.ts.map +1 -0
  49. package/dist/src/providers/index.js +20 -0
  50. package/dist/src/providers/index.js.map +1 -0
  51. package/dist/src/providers/stores/index.d.ts +6 -0
  52. package/dist/src/providers/stores/index.d.ts.map +1 -0
  53. package/dist/src/providers/stores/index.js +11 -0
  54. package/dist/src/providers/stores/index.js.map +1 -0
  55. package/dist/src/providers/stores/memory.d.ts +18 -0
  56. package/dist/src/providers/stores/memory.d.ts.map +1 -0
  57. package/dist/src/providers/stores/memory.js +169 -0
  58. package/dist/src/providers/stores/memory.js.map +1 -0
  59. package/dist/src/providers/stores/qdrant.d.ts +28 -0
  60. package/dist/src/providers/stores/qdrant.d.ts.map +1 -0
  61. package/dist/src/providers/stores/qdrant.js +223 -0
  62. package/dist/src/providers/stores/qdrant.js.map +1 -0
  63. package/dist/src/providers/stores/sqlite.d.ts +38 -0
  64. package/dist/src/providers/stores/sqlite.d.ts.map +1 -0
  65. package/dist/src/providers/stores/sqlite.js +306 -0
  66. package/dist/src/providers/stores/sqlite.js.map +1 -0
  67. package/dist/src/types.d.ts +111 -0
  68. package/dist/src/types.d.ts.map +1 -0
  69. package/dist/src/types.js +32 -0
  70. package/dist/src/types.js.map +1 -0
  71. package/dist/src/vector.d.ts +74 -0
  72. package/dist/src/vector.d.ts.map +1 -0
  73. package/dist/src/vector.js +505 -0
  74. package/dist/src/vector.js.map +1 -0
  75. package/docs/API.md +361 -0
  76. package/docs/CHUNKING.md +280 -0
  77. package/docs/CUSTOM_PROVIDERS.md +101 -0
  78. package/docs/README.md +11 -0
  79. package/docs/STORAGE_BACKENDS.md +189 -0
  80. package/docs/assets/vectar.png +0 -0
  81. package/package.json +46 -0
@@ -0,0 +1,101 @@
1
+ # Custom Providers
2
+
3
+ Voctar supports custom providers for embeddings and storage.
4
+
5
+ ## Use Custom Providers
6
+
7
+ ```typescript
8
+ import { Voctar } from 'voctar';
9
+
10
+ const vector = new Voctar({
11
+ embedding: {
12
+ type: 'custom',
13
+ provider: myEmbeddingProvider,
14
+ },
15
+ store: {
16
+ type: 'custom',
17
+ provider: myVectorStoreProvider,
18
+ },
19
+ });
20
+ ```
21
+
22
+ ## Custom Embedding Provider
23
+
24
+ Implement the `EmbeddingProvider` interface:
25
+
26
+ ```typescript
27
+ import type { EmbeddingProvider } from 'voctar';
28
+
29
+ export class MyEmbeddingProvider implements EmbeddingProvider {
30
+ async embed(text: string): Promise<number[]> {
31
+ // Return one embedding vector for one text
32
+ return [/* ... */];
33
+ }
34
+
35
+ async embedBatch(texts: string[]): Promise<number[][]> {
36
+ // Return one vector per input text (same order)
37
+ return texts.map(() => [/* ... */]);
38
+ }
39
+
40
+ getDimension(): number {
41
+ return 1536;
42
+ }
43
+
44
+ getModelName(): string {
45
+ return 'my-embedding-model';
46
+ }
47
+
48
+ getTokenLimit(): number {
49
+ return 8192;
50
+ }
51
+ }
52
+ ```
53
+
54
+ ## Custom Store Provider
55
+
56
+ Implement the `VectorStoreProvider` interface:
57
+
58
+ ```typescript
59
+ import type {
60
+ VectorStoreProvider,
61
+ VectorPoint,
62
+ SearchOptions,
63
+ SearchResult,
64
+ CollectionConfig,
65
+ } from 'voctar';
66
+
67
+ export class MyVectorStoreProvider implements VectorStoreProvider {
68
+ async ensureCollection(name: string, dimension: number, config?: CollectionConfig): Promise<void> {
69
+ // Create collection/index if missing
70
+ }
71
+
72
+ async upsert(collection: string, points: VectorPoint[]): Promise<void> {
73
+ // Insert or update vectors
74
+ }
75
+
76
+ async search(collection: string, vector: number[], options: SearchOptions): Promise<SearchResult[]> {
77
+ // Return scored results in descending relevance
78
+ return [];
79
+ }
80
+
81
+ async delete(collection: string, ids: string[]): Promise<void> {
82
+ // Delete matching IDs
83
+ }
84
+
85
+ async deleteCollection(collection: string): Promise<void> {
86
+ // Drop collection/index
87
+ }
88
+
89
+ async getIdsByFilter(collection: string, filter: Record<string, any>, limit?: number): Promise<string[]> {
90
+ // Return IDs that match filter
91
+ return [];
92
+ }
93
+ }
94
+ ```
95
+
96
+ ## Integration Tips
97
+
98
+ - Keep `embedBatch()` order stable with input order.
99
+ - Ensure `getDimension()` matches vectors returned by `embed()`/`embedBatch()`.
100
+ - Normalize errors with useful messages so callers can debug quickly.
101
+ - Implement filter behavior consistently in `search()` and `getIdsByFilter()`.
package/docs/README.md ADDED
@@ -0,0 +1,11 @@
1
+ # Documentation Index
2
+
3
+ The canonical getting-started guide now lives in the root [`README.md`](../README.md).
4
+
5
+ Use this folder for focused topics:
6
+
7
+ - [API Reference](./API.md)
8
+ - [Custom Providers](./CUSTOM_PROVIDERS.md)
9
+ - [Storage Backends](./STORAGE_BACKENDS.md)
10
+ - [Chunking](./CHUNKING.md)
11
+
@@ -0,0 +1,189 @@
1
+ # Voctar Storage Backends
2
+
3
+ This guide covers the available storage backends in Voctar and when to use each one.
4
+
5
+ Voctar is config-first:
6
+
7
+ - your app chooses the backend,
8
+ - your app reads env vars (if any),
9
+ - your app passes explicit config to `new Vectar(...)`.
10
+
11
+ ## Available Backends
12
+
13
+ Voctar supports:
14
+
15
+ - `sqlite`
16
+ - `qdrant`
17
+ - `memory`
18
+ - `custom`
19
+
20
+ ## Quick Selection Guide
21
+
22
+ - Use `sqlite` for local dev and simple production workloads.
23
+ - Use `qdrant` for larger datasets, higher throughput, or multi-instance deployments.
24
+ - Use `memory` for tests and short-lived demos only.
25
+ - Use `custom` when integrating an internal or third-party vector store.
26
+
27
+ ## SQLite Backend
28
+
29
+ Best for:
30
+
31
+ - local development,
32
+ - prototypes,
33
+ - single-node deployments.
34
+
35
+ Pros:
36
+
37
+ - no external service,
38
+ - persistent file-based storage,
39
+ - simplest setup and operations.
40
+
41
+ Trade-offs:
42
+
43
+ - limited horizontal scaling,
44
+ - shared file contention under high concurrency.
45
+
46
+ Example:
47
+
48
+ ```typescript
49
+ import { Voctar } from 'voctar';
50
+
51
+ const vector = new Voctar({
52
+ embedding: {
53
+ type: 'openai',
54
+ apiKey: process.env.OPENAI_API_KEY!,
55
+ },
56
+ store: {
57
+ type: 'sqlite',
58
+ path: './data/vector.db',
59
+ },
60
+ });
61
+ ```
62
+
63
+ In-memory SQLite (testing only):
64
+
65
+ ```typescript
66
+ store: {
67
+ type: 'sqlite',
68
+ path: ':memory:',
69
+ inMemory: true,
70
+ }
71
+ ```
72
+
73
+ ## Qdrant Backend
74
+
75
+ Best for:
76
+
77
+ - medium and large datasets,
78
+ - high query volume,
79
+ - distributed deployments.
80
+
81
+ Pros:
82
+
83
+ - purpose-built vector DB,
84
+ - strong scale characteristics,
85
+ - rich filtering support.
86
+
87
+ Trade-offs:
88
+
89
+ - extra service to operate,
90
+ - network hop adds operational complexity.
91
+
92
+ Example:
93
+
94
+ ```typescript
95
+ import { Voctar } from 'voctar';
96
+
97
+ const vector = new Voctar({
98
+ embedding: {
99
+ type: 'openai',
100
+ apiKey: process.env.OPENAI_API_KEY!,
101
+ },
102
+ store: {
103
+ type: 'qdrant',
104
+ url: process.env.QDRANT_URL!,
105
+ port: process.env.QDRANT_PORT ? Number(process.env.QDRANT_PORT) : 6333,
106
+ apiKey: process.env.QDRANT_API_KEY || undefined,
107
+ timeout: 30000,
108
+ checkCompatibility: false,
109
+ },
110
+ });
111
+ ```
112
+
113
+ ## In-Memory Backend
114
+
115
+ Best for:
116
+
117
+ - unit tests,
118
+ - quick local examples.
119
+
120
+ Trade-offs:
121
+
122
+ - data is lost on restart,
123
+ - unsuitable for production persistence.
124
+
125
+ Example:
126
+
127
+ ```typescript
128
+ import { Voctar } from 'voctar';
129
+
130
+ const vector = new Voctar({
131
+ embedding: {
132
+ type: 'openai',
133
+ apiKey: process.env.OPENAI_API_KEY!,
134
+ },
135
+ store: {
136
+ type: 'memory',
137
+ },
138
+ });
139
+ ```
140
+
141
+ ## Custom Backend
142
+
143
+ Use this when you need full control over storage behavior.
144
+
145
+ The provider must implement `VectorStoreProvider`.
146
+
147
+ Example:
148
+
149
+ ```typescript
150
+ import { Voctar } from 'voctar';
151
+
152
+ const vector = new Voctar({
153
+ embedding: {
154
+ type: 'openai',
155
+ apiKey: process.env.OPENAI_API_KEY!,
156
+ },
157
+ store: {
158
+ type: 'custom',
159
+ provider: myVectorStoreProvider,
160
+ },
161
+ });
162
+ ```
163
+
164
+ See [`CUSTOM_PROVIDERS.md`](./CUSTOM_PROVIDERS.md) for full interface details.
165
+
166
+ ## Environment Variable Pattern (App-Owned)
167
+
168
+ Voctar does not auto-load env vars, but many apps use a selector like this:
169
+
170
+ ```bash
171
+ VECTOR_STORE=sqlite # sqlite | qdrant | memory
172
+ SQLITE_PATH=./data/vector.db
173
+ QDRANT_URL=http://localhost
174
+ QDRANT_PORT=6333
175
+ QDRANT_API_KEY=your_api_key
176
+ ```
177
+
178
+ Then in app bootstrap:
179
+
180
+ ```typescript
181
+ const storeType = process.env.VECTOR_STORE ?? 'sqlite';
182
+ ```
183
+
184
+ ## Migration and Operations Notes
185
+
186
+ - Start with `sqlite` if you are early-stage.
187
+ - Move to `qdrant` when dataset size, traffic, or deployment topology requires it.
188
+ - Back up SQLite database files regularly.
189
+ - For Qdrant, use snapshots/backups supported by your Qdrant setup.
Binary file
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "voctar",
3
+ "version": "0.1.0",
4
+ "description": "TypeScript library with RAG primitives for vector embeddings, chunking, storing and retrieval.",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "files": [
8
+ "dist",
9
+ "README.md",
10
+ "docs"
11
+ ],
12
+ "scripts": {
13
+ "build": "tsc -p tsconfig.json",
14
+ "clean": "rm -rf dist",
15
+ "prepublishOnly": "yarn clean && yarn build"
16
+ },
17
+ "keywords": [
18
+ "rag",
19
+ "vector",
20
+ "vector-database",
21
+ "embeddings",
22
+ "chunking",
23
+ "semantic-search",
24
+ "semantic-retrieval",
25
+ "typescript",
26
+ "qdrant",
27
+ "sqlite"
28
+ ],
29
+ "license": "MIT",
30
+ "engines": {
31
+ "node": ">=18"
32
+ },
33
+ "dependencies": {
34
+ "@qdrant/js-client-rest": "^1.13.0",
35
+ "better-sqlite3": "^12.4.1",
36
+ "openai": "^6.3.0",
37
+ "tiktoken": "^1.0.22",
38
+ "uuid": "^9.0.1"
39
+ },
40
+ "devDependencies": {
41
+ "@types/better-sqlite3": "^7.6.13",
42
+ "@types/node": "^24.0.0",
43
+ "@types/uuid": "^9.0.8",
44
+ "typescript": "^5.8.3"
45
+ }
46
+ }