@superlinked/sie-chroma 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2 -25
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +0 -8
- package/dist/index.d.ts +0 -8
- package/dist/index.js +4 -26
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -72,17 +72,7 @@ var SIEEmbeddingFunction = class {
|
|
|
72
72
|
outputTypes: ["dense"]
|
|
73
73
|
};
|
|
74
74
|
const results = await this.client.encode(this.model, items, options);
|
|
75
|
-
return results.map((result) =>
|
|
76
|
-
}
|
|
77
|
-
/**
|
|
78
|
-
* Extract dense embedding from encode result.
|
|
79
|
-
*/
|
|
80
|
-
extractDense(result) {
|
|
81
|
-
const dense = result.dense;
|
|
82
|
-
if (!dense) {
|
|
83
|
-
throw new Error("Encode result missing dense embedding");
|
|
84
|
-
}
|
|
85
|
-
return (0, import_sie_sdk.toNumberArray)(dense);
|
|
75
|
+
return results.map((result) => (0, import_sie_sdk.denseEmbedding)(result));
|
|
86
76
|
}
|
|
87
77
|
/**
|
|
88
78
|
* Close the underlying client connection.
|
|
@@ -136,7 +126,7 @@ var SIESparseEmbeddingFunction = class {
|
|
|
136
126
|
outputTypes: ["sparse"]
|
|
137
127
|
};
|
|
138
128
|
const results = await this.client.encode(this.model, items, options);
|
|
139
|
-
return results.map((result) =>
|
|
129
|
+
return results.map((result) => (0, import_sie_sdk.sparseEmbedding)(result));
|
|
140
130
|
}
|
|
141
131
|
/**
|
|
142
132
|
* Generate sparse embeddings as dict format (token_id -> weight).
|
|
@@ -160,19 +150,6 @@ var SIESparseEmbeddingFunction = class {
|
|
|
160
150
|
return dict;
|
|
161
151
|
});
|
|
162
152
|
}
|
|
163
|
-
/**
|
|
164
|
-
* Extract sparse embedding from encode result.
|
|
165
|
-
*/
|
|
166
|
-
extractSparse(result) {
|
|
167
|
-
const sparse = result.sparse;
|
|
168
|
-
if (!sparse) {
|
|
169
|
-
return { indices: [], values: [] };
|
|
170
|
-
}
|
|
171
|
-
return {
|
|
172
|
-
indices: (0, import_sie_sdk.toNumberArray)(sparse.indices),
|
|
173
|
-
values: (0, import_sie_sdk.toNumberArray)(sparse.values)
|
|
174
|
-
};
|
|
175
|
-
}
|
|
176
153
|
/**
|
|
177
154
|
* Close the underlying client connection.
|
|
178
155
|
*/
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embedding functions for ChromaDB.\n *\n * Provides custom embedding functions that use SIE for generating embeddings:\n * - SIEEmbeddingFunction: Dense embeddings for standard Chroma collections\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const client = new ChromaClient();\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"Hello world\", \"Goodbye world\"],\n * });\n * ```\n */\n\nimport {\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n toNumberArray,\n} from \"@superlinked/sie-sdk\";\nimport type { IEmbeddingFunction } from \"chromadb\";\n\n/**\n * Configuration options for SIEEmbeddingFunction.\n */\nexport interface SIEEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n */\n client?: SIEClient;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Embedding function using SIE for ChromaDB collections.\n *\n * This class implements ChromaDB's IEmbeddingFunction interface,\n * allowing SIE to generate embeddings for document storage and retrieval.\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const client = new ChromaClient();\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * // Add documents (embeddings generated automatically)\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"First document\", \"Second document\"],\n * });\n *\n * // Query (query embedding generated automatically)\n * const results = await collection.query({\n * queryTexts: [\"search query\"],\n * nResults: 5,\n * });\n * ```\n */\nexport class SIEEmbeddingFunction implements IEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIEEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this._client = client;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate embeddings for documents.\n *\n * This method is called by ChromaDB when adding documents or querying.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of embedding vectors.\n */\n async generate(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractDense(result));\n }\n\n /**\n * Extract dense embedding from encode result.\n */\n private extractDense(result: EncodeResult): number[] {\n const dense = result.dense;\n if (!dense) {\n throw new Error(\"Encode result missing dense embedding\");\n }\n return toNumberArray(dense);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Sparse embedding representation for ChromaDB hybrid search.\n */\nexport interface SparseEmbedding {\n indices: number[];\n values: number[];\n}\n\n/**\n * Configuration options for SIESparseEmbeddingFunction.\n */\nexport interface SIESparseEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse embedding function using SIE for ChromaDB hybrid search.\n *\n * Generates sparse embeddings that can be used with ChromaDB's hybrid\n * search capabilities. Returns embeddings as {indices, values} pairs.\n *\n * @example\n * ```typescript\n * import { SIESparseEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const sparseEf = new SIESparseEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const embeddings = await sparseEf.generate([\"Hello world\"]);\n * console.log(embeddings[0].indices); // [1, 5, 10, ...]\n * console.log(embeddings[0].values); // [0.5, 0.3, 0.2, ...]\n * ```\n */\nexport class SIESparseEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate sparse embeddings for documents.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings with indices and values.\n */\n async generate(texts: string[]): Promise<SparseEmbedding[]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Generate sparse embeddings as dict format (token_id -> weight).\n *\n * This format is compatible with some ChromaDB hybrid search configurations.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings as {[tokenId]: weight} dicts.\n */\n async generateAsDict(texts: string[]): Promise<Record<number, number>[]> {\n const embeddings = await this.generate(texts);\n return embeddings.map((emb) => {\n const dict: Record<number, number> = {};\n for (let i = 0; i < emb.indices.length; i++) {\n const idx = emb.indices[i];\n const val = emb.values[i];\n if (idx !== undefined && val !== undefined) {\n dict[idx] = val;\n }\n }\n return dict;\n });\n }\n\n /**\n * Extract sparse embedding from encode result.\n */\n private extractSparse(result: EncodeResult): SparseEmbedding {\n const sparse = result.sparse;\n if (!sparse) {\n return { indices: [], values: [] };\n }\n\n return {\n indices: toNumberArray(sparse.indices),\n values: toNumberArray(sparse.values),\n };\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA6BA,qBAMO;AAuEA,IAAM,uBAAN,MAAyD;AAAA,EAC7C;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAuC,CAAC,GAAG;AACrD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,UAAU;AACf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,SAAS,OAAsC;AACnD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,IACvB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,aAAa,MAAM,CAAC;AAAA,EAC9E;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,QAAgC;AACnD,UAAM,QAAQ,OAAO;AACrB,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AACA,eAAO,8BAAc,KAAK;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA0DO,IAAM,6BAAN,MAAiC;AAAA,EACrB;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAA6C,CAAC,GAAG;AAC3D,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,SAAS,OAA6C;AAC1D,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,IACxB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,eAAe,OAAoD;AACvE,UAAM,aAAa,MAAM,KAAK,SAAS,KAAK;AAC5C,WAAO,WAAW,IAAI,CAAC,QAAQ;AAC7B,YAAM,OAA+B,CAAC;AACtC,eAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,QAAQ,KAAK;AAC3C,cAAM,MAAM,IAAI,QAAQ,CAAC;AACzB,cAAM,MAAM,IAAI,OAAO,CAAC;AACxB,YAAI,QAAQ,UAAa,QAAQ,QAAW;AAC1C,eAAK,GAAG,IAAI;AAAA,QACd;AAAA,MACF;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,QAAuC;AAC3D,UAAM,SAAS,OAAO;AACtB,QAAI,CAAC,QAAQ;AACX,aAAO,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAAA,IACnC;AAEA,WAAO;AAAA,MACL,aAAS,8BAAc,OAAO,OAAO;AAAA,MACrC,YAAQ,8BAAc,OAAO,MAAM;AAAA,IACrC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embedding functions for ChromaDB.\n *\n * Provides custom embedding functions that use SIE for generating embeddings:\n * - SIEEmbeddingFunction: Dense embeddings for standard Chroma collections\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const client = new ChromaClient();\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"Hello world\", \"Goodbye world\"],\n * });\n * ```\n */\n\nimport {\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n denseEmbedding,\n sparseEmbedding,\n} from \"@superlinked/sie-sdk\";\nimport type { IEmbeddingFunction } from \"chromadb\";\n\n/**\n * Configuration options for SIEEmbeddingFunction.\n */\nexport interface SIEEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n */\n client?: SIEClient;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Embedding function using SIE for ChromaDB collections.\n *\n * This class implements ChromaDB's IEmbeddingFunction interface,\n * allowing SIE to generate embeddings for document storage and retrieval.\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const client = new ChromaClient();\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * // Add documents (embeddings generated automatically)\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"First document\", \"Second document\"],\n * });\n *\n * // Query (query embedding generated automatically)\n * const results = await collection.query({\n * queryTexts: [\"search query\"],\n * nResults: 5,\n * });\n * ```\n */\nexport class SIEEmbeddingFunction implements IEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIEEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this._client = client;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate embeddings for documents.\n *\n * This method is called by ChromaDB when adding documents or querying.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of embedding vectors.\n */\n async generate(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => denseEmbedding(result));\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Sparse embedding representation for ChromaDB hybrid search.\n */\nexport interface SparseEmbedding {\n indices: number[];\n values: number[];\n}\n\n/**\n * Configuration options for SIESparseEmbeddingFunction.\n */\nexport interface SIESparseEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse embedding function using SIE for ChromaDB hybrid search.\n *\n * Generates sparse embeddings that can be used with ChromaDB's hybrid\n * search capabilities. Returns embeddings as {indices, values} pairs.\n *\n * @example\n * ```typescript\n * import { SIESparseEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const sparseEf = new SIESparseEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const embeddings = await sparseEf.generate([\"Hello world\"]);\n * console.log(embeddings[0].indices); // [1, 5, 10, ...]\n * console.log(embeddings[0].values); // [0.5, 0.3, 0.2, ...]\n * ```\n */\nexport class SIESparseEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate sparse embeddings for documents.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings with indices and values.\n */\n async generate(texts: string[]): Promise<SparseEmbedding[]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Generate sparse embeddings as dict format (token_id -> weight).\n *\n * This format is compatible with some ChromaDB hybrid search configurations.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings as {[tokenId]: weight} dicts.\n */\n async generateAsDict(texts: string[]): Promise<Record<number, number>[]> {\n const embeddings = await this.generate(texts);\n return embeddings.map((emb) => {\n const dict: Record<number, number> = {};\n for (let i = 0; i < emb.indices.length; i++) {\n const idx = emb.indices[i];\n const val = emb.values[i];\n if (idx !== undefined && val !== undefined) {\n dict[idx] = val;\n }\n }\n return dict;\n });\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA6BA,qBAOO;AAuEA,IAAM,uBAAN,MAAyD;AAAA,EAC7C;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAuC,CAAC,GAAG;AACrD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,UAAU;AACf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,SAAS,OAAsC;AACnD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,IACvB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,+BAAe,MAAM,CAAC;AAAA,EAC3E;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA0DO,IAAM,6BAAN,MAAiC;AAAA,EACrB;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAA6C,CAAC,GAAG;AAC3D,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,SAAS,OAA6C;AAC1D,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,IACxB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,gCAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,eAAe,OAAoD;AACvE,UAAM,aAAa,MAAM,KAAK,SAAS,KAAK;AAC5C,WAAO,WAAW,IAAI,CAAC,QAAQ;AAC7B,YAAM,OAA+B,CAAC;AACtC,eAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,QAAQ,KAAK;AAC3C,cAAM,MAAM,IAAI,QAAQ,CAAC;AACzB,cAAM,MAAM,IAAI,OAAO,CAAC;AACxB,YAAI,QAAQ,UAAa,QAAQ,QAAW;AAC1C,eAAK,GAAG,IAAI;AAAA,QACd;AAAA,MACF;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
|
package/dist/index.d.cts
CHANGED
|
@@ -112,10 +112,6 @@ declare class SIEEmbeddingFunction implements IEmbeddingFunction {
|
|
|
112
112
|
* @returns Array of embedding vectors.
|
|
113
113
|
*/
|
|
114
114
|
generate(texts: string[]): Promise<number[][]>;
|
|
115
|
-
/**
|
|
116
|
-
* Extract dense embedding from encode result.
|
|
117
|
-
*/
|
|
118
|
-
private extractDense;
|
|
119
115
|
/**
|
|
120
116
|
* Close the underlying client connection.
|
|
121
117
|
*/
|
|
@@ -198,10 +194,6 @@ declare class SIESparseEmbeddingFunction {
|
|
|
198
194
|
* @returns Array of sparse embeddings as {[tokenId]: weight} dicts.
|
|
199
195
|
*/
|
|
200
196
|
generateAsDict(texts: string[]): Promise<Record<number, number>[]>;
|
|
201
|
-
/**
|
|
202
|
-
* Extract sparse embedding from encode result.
|
|
203
|
-
*/
|
|
204
|
-
private extractSparse;
|
|
205
197
|
/**
|
|
206
198
|
* Close the underlying client connection.
|
|
207
199
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -112,10 +112,6 @@ declare class SIEEmbeddingFunction implements IEmbeddingFunction {
|
|
|
112
112
|
* @returns Array of embedding vectors.
|
|
113
113
|
*/
|
|
114
114
|
generate(texts: string[]): Promise<number[][]>;
|
|
115
|
-
/**
|
|
116
|
-
* Extract dense embedding from encode result.
|
|
117
|
-
*/
|
|
118
|
-
private extractDense;
|
|
119
115
|
/**
|
|
120
116
|
* Close the underlying client connection.
|
|
121
117
|
*/
|
|
@@ -198,10 +194,6 @@ declare class SIESparseEmbeddingFunction {
|
|
|
198
194
|
* @returns Array of sparse embeddings as {[tokenId]: weight} dicts.
|
|
199
195
|
*/
|
|
200
196
|
generateAsDict(texts: string[]): Promise<Record<number, number>[]>;
|
|
201
|
-
/**
|
|
202
|
-
* Extract sparse embedding from encode result.
|
|
203
|
-
*/
|
|
204
|
-
private extractSparse;
|
|
205
197
|
/**
|
|
206
198
|
* Close the underlying client connection.
|
|
207
199
|
*/
|
package/dist/index.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
2
|
import {
|
|
3
3
|
SIEClient,
|
|
4
|
-
|
|
4
|
+
denseEmbedding,
|
|
5
|
+
sparseEmbedding
|
|
5
6
|
} from "@superlinked/sie-sdk";
|
|
6
7
|
var SIEEmbeddingFunction = class {
|
|
7
8
|
model;
|
|
@@ -50,17 +51,7 @@ var SIEEmbeddingFunction = class {
|
|
|
50
51
|
outputTypes: ["dense"]
|
|
51
52
|
};
|
|
52
53
|
const results = await this.client.encode(this.model, items, options);
|
|
53
|
-
return results.map((result) =>
|
|
54
|
-
}
|
|
55
|
-
/**
|
|
56
|
-
* Extract dense embedding from encode result.
|
|
57
|
-
*/
|
|
58
|
-
extractDense(result) {
|
|
59
|
-
const dense = result.dense;
|
|
60
|
-
if (!dense) {
|
|
61
|
-
throw new Error("Encode result missing dense embedding");
|
|
62
|
-
}
|
|
63
|
-
return toNumberArray(dense);
|
|
54
|
+
return results.map((result) => denseEmbedding(result));
|
|
64
55
|
}
|
|
65
56
|
/**
|
|
66
57
|
* Close the underlying client connection.
|
|
@@ -114,7 +105,7 @@ var SIESparseEmbeddingFunction = class {
|
|
|
114
105
|
outputTypes: ["sparse"]
|
|
115
106
|
};
|
|
116
107
|
const results = await this.client.encode(this.model, items, options);
|
|
117
|
-
return results.map((result) =>
|
|
108
|
+
return results.map((result) => sparseEmbedding(result));
|
|
118
109
|
}
|
|
119
110
|
/**
|
|
120
111
|
* Generate sparse embeddings as dict format (token_id -> weight).
|
|
@@ -138,19 +129,6 @@ var SIESparseEmbeddingFunction = class {
|
|
|
138
129
|
return dict;
|
|
139
130
|
});
|
|
140
131
|
}
|
|
141
|
-
/**
|
|
142
|
-
* Extract sparse embedding from encode result.
|
|
143
|
-
*/
|
|
144
|
-
extractSparse(result) {
|
|
145
|
-
const sparse = result.sparse;
|
|
146
|
-
if (!sparse) {
|
|
147
|
-
return { indices: [], values: [] };
|
|
148
|
-
}
|
|
149
|
-
return {
|
|
150
|
-
indices: toNumberArray(sparse.indices),
|
|
151
|
-
values: toNumberArray(sparse.values)
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
132
|
/**
|
|
155
133
|
* Close the underlying client connection.
|
|
156
134
|
*/
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embedding functions for ChromaDB.\n *\n * Provides custom embedding functions that use SIE for generating embeddings:\n * - SIEEmbeddingFunction: Dense embeddings for standard Chroma collections\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const client = new ChromaClient();\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"Hello world\", \"Goodbye world\"],\n * });\n * ```\n */\n\nimport {\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n toNumberArray,\n} from \"@superlinked/sie-sdk\";\nimport type { IEmbeddingFunction } from \"chromadb\";\n\n/**\n * Configuration options for SIEEmbeddingFunction.\n */\nexport interface SIEEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n */\n client?: SIEClient;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Embedding function using SIE for ChromaDB collections.\n *\n * This class implements ChromaDB's IEmbeddingFunction interface,\n * allowing SIE to generate embeddings for document storage and retrieval.\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const client = new ChromaClient();\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * // Add documents (embeddings generated automatically)\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"First document\", \"Second document\"],\n * });\n *\n * // Query (query embedding generated automatically)\n * const results = await collection.query({\n * queryTexts: [\"search query\"],\n * nResults: 5,\n * });\n * ```\n */\nexport class SIEEmbeddingFunction implements IEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIEEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this._client = client;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate embeddings for documents.\n *\n * This method is called by ChromaDB when adding documents or querying.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of embedding vectors.\n */\n async generate(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractDense(result));\n }\n\n /**\n * Extract dense embedding from encode result.\n */\n private extractDense(result: EncodeResult): number[] {\n const dense = result.dense;\n if (!dense) {\n throw new Error(\"Encode result missing dense embedding\");\n }\n return toNumberArray(dense);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Sparse embedding representation for ChromaDB hybrid search.\n */\nexport interface SparseEmbedding {\n indices: number[];\n values: number[];\n}\n\n/**\n * Configuration options for SIESparseEmbeddingFunction.\n */\nexport interface SIESparseEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse embedding function using SIE for ChromaDB hybrid search.\n *\n * Generates sparse embeddings that can be used with ChromaDB's hybrid\n * search capabilities. Returns embeddings as {indices, values} pairs.\n *\n * @example\n * ```typescript\n * import { SIESparseEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const sparseEf = new SIESparseEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const embeddings = await sparseEf.generate([\"Hello world\"]);\n * console.log(embeddings[0].indices); // [1, 5, 10, ...]\n * console.log(embeddings[0].values); // [0.5, 0.3, 0.2, ...]\n * ```\n */\nexport class SIESparseEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate sparse embeddings for documents.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings with indices and values.\n */\n async generate(texts: string[]): Promise<SparseEmbedding[]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Generate sparse embeddings as dict format (token_id -> weight).\n *\n * This format is compatible with some ChromaDB hybrid search configurations.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings as {[tokenId]: weight} dicts.\n */\n async generateAsDict(texts: string[]): Promise<Record<number, number>[]> {\n const embeddings = await this.generate(texts);\n return embeddings.map((emb) => {\n const dict: Record<number, number> = {};\n for (let i = 0; i < emb.indices.length; i++) {\n const idx = emb.indices[i];\n const val = emb.values[i];\n if (idx !== undefined && val !== undefined) {\n dict[idx] = val;\n }\n }\n return dict;\n });\n }\n\n /**\n * Extract sparse embedding from encode result.\n */\n private extractSparse(result: EncodeResult): SparseEmbedding {\n const sparse = result.sparse;\n if (!sparse) {\n return { indices: [], values: [] };\n }\n\n return {\n indices: toNumberArray(sparse.indices),\n values: toNumberArray(sparse.values),\n };\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";AA6BA;AAAA,EAGE;AAAA,EAEA;AAAA,OACK;AAuEA,IAAM,uBAAN,MAAyD;AAAA,EAC7C;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAuC,CAAC,GAAG;AACrD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,UAAU;AACf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,SAAS,OAAsC;AACnD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,IACvB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,aAAa,MAAM,CAAC;AAAA,EAC9E;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,QAAgC;AACnD,UAAM,QAAQ,OAAO;AACrB,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AACA,WAAO,cAAc,KAAK;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA0DO,IAAM,6BAAN,MAAiC;AAAA,EACrB;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAA6C,CAAC,GAAG;AAC3D,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,SAAS,OAA6C;AAC1D,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,IACxB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,eAAe,OAAoD;AACvE,UAAM,aAAa,MAAM,KAAK,SAAS,KAAK;AAC5C,WAAO,WAAW,IAAI,CAAC,QAAQ;AAC7B,YAAM,OAA+B,CAAC;AACtC,eAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,QAAQ,KAAK;AAC3C,cAAM,MAAM,IAAI,QAAQ,CAAC;AACzB,cAAM,MAAM,IAAI,OAAO,CAAC;AACxB,YAAI,QAAQ,UAAa,QAAQ,QAAW;AAC1C,eAAK,GAAG,IAAI;AAAA,QACd;AAAA,MACF;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,QAAuC;AAC3D,UAAM,SAAS,OAAO;AACtB,QAAI,CAAC,QAAQ;AACX,aAAO,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAAA,IACnC;AAEA,WAAO;AAAA,MACL,SAAS,cAAc,OAAO,OAAO;AAAA,MACrC,QAAQ,cAAc,OAAO,MAAM;AAAA,IACrC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embedding functions for ChromaDB.\n *\n * Provides custom embedding functions that use SIE for generating embeddings:\n * - SIEEmbeddingFunction: Dense embeddings for standard Chroma collections\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const client = new ChromaClient();\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"Hello world\", \"Goodbye world\"],\n * });\n * ```\n */\n\nimport {\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n denseEmbedding,\n sparseEmbedding,\n} from \"@superlinked/sie-sdk\";\nimport type { IEmbeddingFunction } from \"chromadb\";\n\n/**\n * Configuration options for SIEEmbeddingFunction.\n */\nexport interface SIEEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n */\n client?: SIEClient;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Embedding function using SIE for ChromaDB collections.\n *\n * This class implements ChromaDB's IEmbeddingFunction interface,\n * allowing SIE to generate embeddings for document storage and retrieval.\n *\n * @example\n * ```typescript\n * import { ChromaClient } from \"chromadb\";\n * import { SIEEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const embeddingFunction = new SIEEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const client = new ChromaClient();\n * const collection = await client.createCollection({\n * name: \"my_collection\",\n * embeddingFunction,\n * });\n *\n * // Add documents (embeddings generated automatically)\n * await collection.add({\n * ids: [\"doc1\", \"doc2\"],\n * documents: [\"First document\", \"Second document\"],\n * });\n *\n * // Query (query embedding generated automatically)\n * const results = await collection.query({\n * queryTexts: [\"search query\"],\n * nResults: 5,\n * });\n * ```\n */\nexport class SIEEmbeddingFunction implements IEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIEEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this._client = client;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate embeddings for documents.\n *\n * This method is called by ChromaDB when adding documents or querying.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of embedding vectors.\n */\n async generate(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => denseEmbedding(result));\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Sparse embedding representation for ChromaDB hybrid search.\n */\nexport interface SparseEmbedding {\n indices: number[];\n values: number[];\n}\n\n/**\n * Configuration options for SIESparseEmbeddingFunction.\n */\nexport interface SIESparseEmbeddingFunctionOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse embedding function using SIE for ChromaDB hybrid search.\n *\n * Generates sparse embeddings that can be used with ChromaDB's hybrid\n * search capabilities. Returns embeddings as {indices, values} pairs.\n *\n * @example\n * ```typescript\n * import { SIESparseEmbeddingFunction } from \"@superlinked/sie-chroma\";\n *\n * const sparseEf = new SIESparseEmbeddingFunction({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const embeddings = await sparseEf.generate([\"Hello world\"]);\n * console.log(embeddings[0].indices); // [1, 5, 10, ...]\n * console.log(embeddings[0].values); // [0.5, 0.3, 0.2, ...]\n * ```\n */\nexport class SIESparseEmbeddingFunction {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEmbeddingFunctionOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Generate sparse embeddings for documents.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings with indices and values.\n */\n async generate(texts: string[]): Promise<SparseEmbedding[]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Generate sparse embeddings as dict format (token_id -> weight).\n *\n * This format is compatible with some ChromaDB hybrid search configurations.\n *\n * @param texts - Array of document texts to embed.\n * @returns Array of sparse embeddings as {[tokenId]: weight} dicts.\n */\n async generateAsDict(texts: string[]): Promise<Record<number, number>[]> {\n const embeddings = await this.generate(texts);\n return embeddings.map((emb) => {\n const dict: Record<number, number> = {};\n for (let i = 0; i < emb.indices.length; i++) {\n const idx = emb.indices[i];\n const val = emb.values[i];\n if (idx !== undefined && val !== undefined) {\n dict[idx] = val;\n }\n }\n return dict;\n });\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";AA6BA;AAAA,EAGE;AAAA,EAEA;AAAA,EACA;AAAA,OACK;AAuEA,IAAM,uBAAN,MAAyD;AAAA,EAC7C;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAuC,CAAC,GAAG;AACrD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,UAAU;AACf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,SAAS,OAAsC;AACnD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,IACvB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,eAAe,MAAM,CAAC;AAAA,EAC3E;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA0DO,IAAM,6BAAN,MAAiC;AAAA,EACrB;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAA6C,CAAC,GAAG;AAC3D,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,SAAS,OAA6C;AAC1D,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,IACxB;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,gBAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,eAAe,OAAoD;AACvE,UAAM,aAAa,MAAM,KAAK,SAAS,KAAK;AAC5C,WAAO,WAAW,IAAI,CAAC,QAAQ;AAC7B,YAAM,OAA+B,CAAC;AACtC,eAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,QAAQ,KAAK;AAC3C,cAAM,MAAM,IAAI,QAAQ,CAAC;AACzB,cAAM,MAAM,IAAI,OAAO,CAAC;AACxB,YAAI,QAAQ,UAAa,QAAQ,QAAW;AAC1C,eAAK,GAAG,IAAI;AAAA,QACd;AAAA,MACF;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@superlinked/sie-chroma",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "SIE embedding function for ChromaDB",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"dist"
|
|
18
18
|
],
|
|
19
19
|
"dependencies": {
|
|
20
|
-
"@superlinked/sie-sdk": "0.
|
|
20
|
+
"@superlinked/sie-sdk": "0.2.0"
|
|
21
21
|
},
|
|
22
22
|
"peerDependencies": {
|
|
23
23
|
"chromadb": ">=1.9.0"
|