@ixo/data-store 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.eslintrc.js +9 -0
  2. package/.prettierignore +3 -0
  3. package/.prettierrc.js +4 -0
  4. package/.turbo/turbo-build.log +4 -0
  5. package/CHANGELOG.md +7 -0
  6. package/README.md +276 -0
  7. package/dist/airtable-store/index.d.ts +42 -0
  8. package/dist/airtable-store/index.d.ts.map +1 -0
  9. package/dist/airtable-store/index.js +79 -0
  10. package/dist/chroma/chroma-data-store.d.ts +74 -0
  11. package/dist/chroma/chroma-data-store.d.ts.map +1 -0
  12. package/dist/chroma/chroma-data-store.js +196 -0
  13. package/dist/chroma/chroma-data-store.test.d.ts +2 -0
  14. package/dist/chroma/chroma-data-store.test.d.ts.map +1 -0
  15. package/dist/chroma/chroma-data-store.test.js +176 -0
  16. package/dist/chroma/chroma.types.d.ts +24 -0
  17. package/dist/chroma/chroma.types.d.ts.map +1 -0
  18. package/dist/chroma/chroma.types.js +2 -0
  19. package/dist/chroma/embedding-function.d.ts +14 -0
  20. package/dist/chroma/embedding-function.d.ts.map +1 -0
  21. package/dist/chroma/embedding-function.js +26 -0
  22. package/dist/chroma/index.d.ts +3 -0
  23. package/dist/chroma/index.d.ts.map +1 -0
  24. package/dist/chroma/index.js +18 -0
  25. package/dist/chroma/utils.d.ts +6 -0
  26. package/dist/chroma/utils.d.ts.map +1 -0
  27. package/dist/chroma/utils.js +31 -0
  28. package/dist/index.d.ts +4 -0
  29. package/dist/index.d.ts.map +1 -0
  30. package/dist/index.js +19 -0
  31. package/dist/types/index.d.ts +3 -0
  32. package/dist/types/index.d.ts.map +1 -0
  33. package/dist/types/index.js +18 -0
  34. package/dist/types/structured-data-store.d.ts +24 -0
  35. package/dist/types/structured-data-store.d.ts.map +1 -0
  36. package/dist/types/structured-data-store.js +2 -0
  37. package/dist/types/vector-db-data-store.d.ts +58 -0
  38. package/dist/types/vector-db-data-store.d.ts.map +1 -0
  39. package/dist/types/vector-db-data-store.js +10 -0
  40. package/dist/utils/index.d.ts +2 -0
  41. package/dist/utils/index.d.ts.map +1 -0
  42. package/dist/utils/index.js +17 -0
  43. package/dist/utils/with-report-error.d.ts +5 -0
  44. package/dist/utils/with-report-error.d.ts.map +1 -0
  45. package/dist/utils/with-report-error.js +21 -0
  46. package/jest.config.js +6 -0
  47. package/package.json +49 -0
  48. package/src/airtable-store/index.ts +141 -0
  49. package/src/chroma/chroma-data-store.test.ts +210 -0
  50. package/src/chroma/chroma-data-store.ts +254 -0
  51. package/src/chroma/chroma.types.ts +28 -0
  52. package/src/chroma/embedding-function.ts +26 -0
  53. package/src/chroma/index.ts +2 -0
  54. package/src/chroma/utils.ts +40 -0
  55. package/src/index.ts +4 -0
  56. package/src/types/index.ts +2 -0
  57. package/src/types/structured-data-store.ts +34 -0
  58. package/src/types/vector-db-data-store.ts +78 -0
  59. package/src/utils/index.ts +1 -0
  60. package/src/utils/with-report-error.ts +18 -0
  61. package/tsconfig.json +7 -0
@@ -0,0 +1,254 @@
1
+ import {
2
+ ChromaClient,
3
+ type Collection,
4
+ type Embeddings,
5
+ type Metadata,
6
+ type QueryRecordsParams,
7
+ } from 'chromadb';
8
+ import {
9
+ VectorDBDataStore,
10
+ type IVectorStoreDocument,
11
+ type IVectorStoreDocumentWithEmbeddings,
12
+ type IVectorStoreOptions,
13
+ type IVectorStoreQueryOptions,
14
+ } from '../types/vector-db-data-store';
15
+ import { type IChromaMetadataFilter } from './chroma.types';
16
+ import { OpenAIEmbeddingFunction } from './embedding-function';
17
+ import { createVectorStoreSearchResult } from './utils';
18
+
19
+ /**
20
+ * ChromaDataStore class for managing vector storage and retrieval using ChromaDB
21
+ */
22
+ class ChromaDataStore extends VectorDBDataStore {
23
+ private readonly client: ChromaClient;
24
+ private collection: Collection;
25
+
26
+ /**
27
+ * Creates a new ChromaDataStore instance
28
+ * @param options - Configuration options for the vector store
29
+ * @throws Error if OPENAI_API_KEY is not set and no embedding function is provided
30
+ */
31
+ constructor(options: IVectorStoreOptions) {
32
+ if (
33
+ typeof process.env.OPENAI_API_KEY !== 'string' &&
34
+ !options.embeddingFunction
35
+ ) {
36
+ throw new Error(
37
+ 'OPENAI_API_KEY is not set and no embedding function is provided',
38
+ );
39
+ }
40
+ options.embeddingFunction =
41
+ options.embeddingFunction ||
42
+ new OpenAIEmbeddingFunction({
43
+ openai_api_key: process.env.OPENAI_API_KEY ?? '',
44
+ openai_model: 'text-embedding-3-small',
45
+ });
46
+ options.url = options.url || 'http://localhost:8000';
47
+ options.collectionName = options.collectionName || 'default-vector-store';
48
+ super(options);
49
+ this.client = new ChromaClient({
50
+ path: options.url,
51
+ });
52
+
53
+ // this is a workaround to avoid setting collection as optional
54
+ this.collection = undefined as unknown as Collection;
55
+ }
56
+
57
+ /**
58
+ * Checks if the ChromaDB collection is initialized
59
+ * @throws Error if collection is not initialized
60
+ */
61
+ private checkIsInitialized(): void {
62
+ if (!this.collection as unknown) {
63
+ throw new Error('ChromaDataStore is not initialized');
64
+ }
65
+ }
66
+
67
+ /**
68
+ * Initializes the ChromaDB collection
69
+ */
70
+ async init(): Promise<void> {
71
+ if (this.collection as unknown) {
72
+ return;
73
+ }
74
+ const collection = await this.client.getOrCreateCollection({
75
+ name: this.options.collectionName,
76
+ embeddingFunction: this.options.embeddingFunction,
77
+ });
78
+ this.collection = collection;
79
+ }
80
+
81
+ /**
82
+ * Queries the vector store using text
83
+ * @param query - Text query to search for
84
+ * @param options - Query options including filters and top-k results
85
+ * @returns Array of matching documents
86
+ */
87
+ async query(
88
+ query: string,
89
+ options?: IVectorStoreQueryOptions<IChromaMetadataFilter>,
90
+ ): Promise<IVectorStoreDocument[]> {
91
+ this.checkIsInitialized();
92
+ const params: QueryRecordsParams = {
93
+ queryTexts: [query],
94
+ nResults: options?.topK || 10,
95
+ };
96
+ if (options?.filters) {
97
+ params.where = options.filters;
98
+ }
99
+ const result = await this.collection.query(params);
100
+ return createVectorStoreSearchResult(result);
101
+ }
102
+
103
+ /**
104
+ * Queries the vector store and filters results by similarity threshold
105
+ * @param query - Text query to search for
106
+ * @param options - Query options including similarity threshold
107
+ * @returns Array of documents meeting the similarity threshold
108
+ */
109
+ async queryWithSimilarity(
110
+ query: string,
111
+ options?: IVectorStoreQueryOptions<IChromaMetadataFilter> & {
112
+ similarityThreshold: number;
113
+ },
114
+ ): Promise<IVectorStoreDocument[]> {
115
+ this.checkIsInitialized();
116
+ const result = await this.query(query, options);
117
+ return result.filter(
118
+ (doc) =>
119
+ (doc.score && doc.score >= (options?.similarityThreshold ?? 0.5)) ??
120
+ false,
121
+ );
122
+ }
123
+
124
+ /**
125
+ * Upserts (inserts or updates) documents into the vector store
126
+ * @param documents - Array of documents to upsert
127
+ * @throws Error if any document is missing an ID
128
+ */
129
+ async upsert(documents: IVectorStoreDocument[]): Promise<void> {
130
+ this.checkIsInitialized();
131
+ const [ids, contents, metadatas]: [string[], string[], Metadata[]] =
132
+ documents.reduce<[string[], string[], Metadata[]]>(
133
+ (acc, doc) => {
134
+ const id = doc.id;
135
+ const metadata = doc.metadata ?? {};
136
+ const content = doc.content;
137
+ if (!id) {
138
+ throw new Error('Document ID is required');
139
+ }
140
+ acc[0].push(id);
141
+ acc[1].push(content);
142
+ acc[2].push(metadata);
143
+ return acc;
144
+ },
145
+ [[], [], []],
146
+ );
147
+
148
+ await this.collection.upsert({
149
+ ids,
150
+ documents: contents,
151
+ metadatas,
152
+ });
153
+ }
154
+
155
+ async addDocumentsWithEmbeddings(
156
+ documents: IVectorStoreDocumentWithEmbeddings[],
157
+ ): Promise<void> {
158
+ this.checkIsInitialized();
159
+ const [ids, embeddings, metadatas, contents]: [
160
+ string[],
161
+ number[][],
162
+ Metadata[],
163
+ string[],
164
+ ] = documents.reduce<[string[], Embeddings, Metadata[], string[]]>(
165
+ (acc, doc) => {
166
+ const id = doc.id;
167
+ const metadata = doc.metadata ?? {};
168
+ const embedding = doc.embedding;
169
+ if (!id) {
170
+ throw new Error('Document ID is required');
171
+ }
172
+ if (embedding.length < 1536) {
173
+ throw new Error('Embedding is required');
174
+ }
175
+ acc[0].push(id);
176
+ acc[1].push(embedding);
177
+ acc[2].push(metadata);
178
+ acc[3].push(doc.content);
179
+ return acc;
180
+ },
181
+ [[], [], [], []],
182
+ );
183
+
184
+ await this.collection.add({
185
+ ids,
186
+ embeddings,
187
+ metadatas,
188
+ documents: contents,
189
+ });
190
+ }
191
+
192
+ /**
193
+ * Deletes documents from the vector store by their IDs
194
+ * @param ids - Array of document IDs to delete
195
+ */
196
+ async delete(ids: string[]): Promise<void> {
197
+ this.checkIsInitialized();
198
+ await this.collection.delete({
199
+ ids,
200
+ });
201
+ }
202
+
203
+ /**
204
+ * Retrieves a document by its ID
205
+ * @param id - Document ID to retrieve
206
+ * @returns The document if found, null otherwise
207
+ */
208
+ async getById(id: string): Promise<IVectorStoreDocument | null> {
209
+ const result = await this.collection.get({
210
+ ids: [id],
211
+ });
212
+ return result.documents.at(0)
213
+ ? {
214
+ id: result.ids.at(0)?.toString() ?? '',
215
+ content: result.documents.at(0) ?? '',
216
+ metadata: result.metadatas.at(0) ?? {},
217
+ score: undefined,
218
+ }
219
+ : null;
220
+ }
221
+
222
+ /**
223
+ * Queries the vector store using a pre-computed embedding vector
224
+ * @param vector - The embedding vector to search with
225
+ * @param options - Query options including top-k results
226
+ * @returns Array of matching documents
227
+ */
228
+ async queryByVector(
229
+ vector: number[],
230
+ options?: IVectorStoreQueryOptions,
231
+ ): Promise<IVectorStoreDocument[]> {
232
+ this.checkIsInitialized();
233
+ const result = await this.collection.query({
234
+ queryEmbeddings: [vector],
235
+ nResults: options?.topK || 10,
236
+ });
237
+ return createVectorStoreSearchResult(result);
238
+ }
239
+
240
+ /**
241
+ * Updates the metadata for a document by its ID
242
+ * @param ids - Array of document IDs to update
243
+ * @param metadata - Metadata to update
244
+ */
245
+ async updateMetadata(ids: string[], metadatas: Metadata[]): Promise<void> {
246
+ this.checkIsInitialized();
247
+ await this.collection.update({
248
+ ids,
249
+ metadatas,
250
+ });
251
+ }
252
+ }
253
+
254
+ export { ChromaDataStore };
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Chroma-specific document content filter operators
3
+ */
4
+ export interface IChromaDocumentFilter {
5
+ $contains?: string;
6
+ $not_contains?: string;
7
+ }
8
+
9
+ /**
10
+ * Chroma-specific metadata filter operators
11
+ */
12
+ export type IChromaMetadataFilter = Record<
13
+ string,
14
+ string | number | boolean | null | IChromaMetadataOperators
15
+ >;
16
+
17
+ export interface IChromaMetadataOperators {
18
+ $eq?: unknown;
19
+ $ne?: unknown;
20
+ $gt?: number;
21
+ $gte?: number;
22
+ $lt?: number;
23
+ $lte?: number;
24
+ $in?: unknown[];
25
+ $nin?: unknown[];
26
+ $and?: IChromaMetadataFilter[];
27
+ $or?: IChromaMetadataFilter[];
28
+ }
@@ -0,0 +1,26 @@
1
+ import { type IEmbeddingFunction } from 'chromadb';
2
+ import OpenAI from 'openai';
3
+
4
+ interface IOpenAIEmbeddingFunctionOptions {
5
+ openai_api_key: string;
6
+ openai_model: string;
7
+ openai_organization_id?: string;
8
+ }
9
+
10
+ export class OpenAIEmbeddingFunction implements IEmbeddingFunction {
11
+ private readonly openai: OpenAI;
12
+ constructor(private readonly options: IOpenAIEmbeddingFunctionOptions) {
13
+ this.openai = new OpenAI({
14
+ apiKey: options.openai_api_key,
15
+ organization: options.openai_organization_id,
16
+ });
17
+ }
18
+
19
+ async generate(texts: string[]): Promise<number[][]> {
20
+ const embeddings = await this.openai.embeddings.create({
21
+ model: this.options.openai_model,
22
+ input: texts,
23
+ });
24
+ return embeddings.data.map((embedding) => embedding.embedding);
25
+ }
26
+ }
@@ -0,0 +1,2 @@
1
+ export * from './chroma-data-store';
2
+ export * from './chroma.types';
@@ -0,0 +1,40 @@
1
+ import { type Collection } from 'chromadb';
2
+ import { type IVectorStoreDocument } from 'types/vector-db-data-store';
3
+
4
+ type MultiQueryResponse = Awaited<
5
+ ReturnType<typeof Collection.prototype.query>
6
+ >;
7
+
8
+ export const createVectorStoreSearchResult = (
9
+ result: MultiQueryResponse,
10
+ ): IVectorStoreDocument[] => {
11
+ const scores = result.distances
12
+ ? convertCosineDistancesToScores(result.distances)
13
+ : undefined;
14
+ const metadatas = result.metadatas.flat().filter(Boolean);
15
+ return result.documents.flat().map((document, idx) => ({
16
+ id: result.ids.flat().at(idx)?.toString() ?? '',
17
+ content: document ?? '',
18
+ metadata: metadatas.at(idx) ?? {},
19
+ score: scores?.at(idx),
20
+ }));
21
+ };
22
+
23
+ /**
24
+ * Converts a 2D array of cosine distances into a flattened array of similarity scores.
25
+ * @param distances - A 2D array of cosine distances (e.g., [[1.04, 1.24], [0.8, 1.5]]).
26
+ * @returns A flattened array of similarity scores (e.g., [0.48, 0.38, 0.6, 0.25]).
27
+ */
28
+ function convertCosineDistancesToScores(distances: number[][]): number[] {
29
+ return distances.flat().map((distance) => {
30
+ // 1. Validate that the distance is within the valid range [0, 2].
31
+ if (distance < 0 || distance > 2) {
32
+ throw new Error(
33
+ `Cosine distance must be between 0 and 2. Received: ${distance}`,
34
+ );
35
+ }
36
+
37
+ // 2. Convert cosine distance (d) to similarity score (s): s = 1 - (d / 2).
38
+ return 1 - distance / 2;
39
+ });
40
+ }
package/src/index.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from './airtable-store';
2
+ export * from './chroma';
3
+
4
+ export * from './types';
@@ -0,0 +1,2 @@
1
+ export * from './structured-data-store';
2
+ export * from './vector-db-data-store';
@@ -0,0 +1,34 @@
1
+ interface IQueryParams<T> {
2
+ filterByFormula?: string;
3
+ maxRecords?: number;
4
+ pageSize?: number;
5
+ fields?: (keyof T)[];
6
+ view?: string;
7
+ }
8
+
9
+ export interface IDataStore<T> {
10
+ getAllRecords: (
11
+ tableName: string,
12
+ selectOptions: IQueryParams<T>,
13
+ ) => Promise<T[]>;
14
+ /**
15
+ * Get a record by its ID or throw an error if not found
16
+ */
17
+ getRecord: (tableName: string, recordId: string) => Promise<T>;
18
+ createRecord: (tableName: string, recordData: T) => Promise<T>;
19
+ updateRecord: (
20
+ tableName: string,
21
+ recordId: string,
22
+ recordData: T,
23
+ ) => Promise<T>;
24
+ batchUpdateRecords: (
25
+ tableName: string,
26
+ records: { id: string; fields: T }[],
27
+ ) => Promise<T[]>;
28
+ deleteRecord: (tableName: string, recordId: string) => Promise<T>;
29
+ getRecordByField: (
30
+ tableName: string,
31
+ fieldName: string,
32
+ fieldValue: string,
33
+ ) => Promise<T[]>;
34
+ }
@@ -0,0 +1,78 @@
1
+ import { type IEmbeddingFunction, type Metadata } from 'chromadb';
2
+
3
+ /**
4
+ * Represents a document in the vector store.
5
+ */
6
+ export interface IVectorStoreDocument {
7
+ /**
8
+ * Unique identifier for the document.
9
+ */
10
+ id: string;
11
+
12
+ /**
13
+ * The content or data of the document.
14
+ */
15
+ content: string;
16
+
17
+ /**
18
+ * Optional metadata associated with the document.
19
+ */
20
+ metadata?: Metadata;
21
+
22
+ /**
23
+ * The score of the document.
24
+ */
25
+ score?: number;
26
+ }
27
+
28
+ export interface IVectorStoreDocumentWithEmbeddings
29
+ extends IVectorStoreDocument {
30
+ embedding: number[];
31
+ }
32
+
33
+ /**
34
+ * Options for querying the vector store.
35
+ */
36
+ export interface IVectorStoreQueryOptions<
37
+ Filters extends Record<string, unknown> = Record<string, unknown>,
38
+ > {
39
+ /**
40
+ * The number of top results to return.
41
+ */
42
+ topK?: number;
43
+
44
+ /**
45
+ * Filters to apply based on document metadata.
46
+ */
47
+ filters?: Filters;
48
+ }
49
+
50
+ export interface IVectorStoreOptions {
51
+ collectionName: string;
52
+ url: string;
53
+ embeddingFunction?: IEmbeddingFunction;
54
+ }
55
+
56
+ export abstract class VectorDBDataStore {
57
+ constructor(protected readonly options: IVectorStoreOptions) {}
58
+
59
+ abstract upsert(documents: IVectorStoreDocument[]): Promise<void>;
60
+ abstract delete(ids: string[]): Promise<void>;
61
+ abstract queryByVector(
62
+ vector: number[],
63
+ options?: IVectorStoreQueryOptions,
64
+ ): Promise<IVectorStoreDocument[]>;
65
+ abstract query(
66
+ query: string,
67
+ options?: IVectorStoreQueryOptions,
68
+ ): Promise<IVectorStoreDocument[]>;
69
+ abstract getById(id: string): Promise<IVectorStoreDocument | null>;
70
+ abstract queryWithSimilarity(
71
+ query: string,
72
+ options?: IVectorStoreQueryOptions & { similarityThreshold: number },
73
+ ): Promise<IVectorStoreDocument[]>;
74
+ abstract addDocumentsWithEmbeddings(
75
+ documents: IVectorStoreDocumentWithEmbeddings[],
76
+ ): Promise<void>;
77
+ abstract init(): Promise<void>;
78
+ }
@@ -0,0 +1 @@
1
+ export * from './with-report-error';
@@ -0,0 +1,18 @@
1
+ import { Logger } from '@ixo/logger';
2
+
3
+ /**
4
+ * should report using the logger and throw the error if the promise is rejected
5
+ */
6
+ export const withReportError = async <T>(promise: Promise<T>): Promise<T> => {
7
+ try {
8
+ const res = await promise;
9
+ return res;
10
+ } catch (error) {
11
+ const errorMessage =
12
+ error instanceof Error
13
+ ? error.message
14
+ : 'Something went wrong in AirtableDataStore';
15
+ Logger.error(errorMessage, error);
16
+ throw error;
17
+ }
18
+ };
package/tsconfig.json ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "extends": "@ixo/typescript-config/base.json",
3
+ "compilerOptions": {
4
+ "baseUrl": "./src",
5
+ "outDir": "./dist"
6
+ }
7
+ }