@superlinked/sie-langchain 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +189 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +241 -9
- package/dist/index.d.ts +241 -9
- package/dist/index.js +192 -31
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -21,11 +21,192 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
23
|
SIEEmbeddings: () => SIEEmbeddings,
|
|
24
|
+
SIEExtractor: () => SIEExtractor,
|
|
25
|
+
SIEReranker: () => SIEReranker,
|
|
24
26
|
SIESparseEncoder: () => SIESparseEncoder
|
|
25
27
|
});
|
|
26
28
|
module.exports = __toCommonJS(index_exports);
|
|
27
29
|
var import_embeddings = require("@langchain/core/embeddings");
|
|
30
|
+
var import_sie_sdk3 = require("@superlinked/sie-sdk");
|
|
31
|
+
|
|
32
|
+
// src/rerankers.ts
|
|
33
|
+
var import_document_compressors = require("@langchain/core/retrievers/document_compressors");
|
|
28
34
|
var import_sie_sdk = require("@superlinked/sie-sdk");
|
|
35
|
+
var SIEReranker = class extends import_document_compressors.BaseDocumentCompressor {
|
|
36
|
+
model;
|
|
37
|
+
topK;
|
|
38
|
+
_client;
|
|
39
|
+
_ownsClient;
|
|
40
|
+
baseUrl;
|
|
41
|
+
clientOptions;
|
|
42
|
+
constructor(params = {}) {
|
|
43
|
+
super();
|
|
44
|
+
const {
|
|
45
|
+
baseUrl = "http://localhost:8080",
|
|
46
|
+
model = "jinaai/jina-reranker-v2-base-multilingual",
|
|
47
|
+
client,
|
|
48
|
+
topK,
|
|
49
|
+
gpu,
|
|
50
|
+
timeout = 18e4
|
|
51
|
+
} = params;
|
|
52
|
+
this.baseUrl = baseUrl;
|
|
53
|
+
this.model = model;
|
|
54
|
+
this.topK = topK;
|
|
55
|
+
this._client = client;
|
|
56
|
+
this._ownsClient = !client;
|
|
57
|
+
this.clientOptions = {
|
|
58
|
+
timeout,
|
|
59
|
+
gpu
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Get or create the SIEClient.
|
|
64
|
+
*/
|
|
65
|
+
get client() {
|
|
66
|
+
if (!this._client) {
|
|
67
|
+
this._client = new import_sie_sdk.SIEClient(this.baseUrl, this.clientOptions);
|
|
68
|
+
}
|
|
69
|
+
return this._client;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Rerank documents by relevance to query.
|
|
73
|
+
*
|
|
74
|
+
* @param documents - Documents to rerank.
|
|
75
|
+
* @param query - Query to rank documents against.
|
|
76
|
+
* @returns Reranked documents with relevance_score in metadata, sorted by score descending.
|
|
77
|
+
*/
|
|
78
|
+
async compressDocuments(documents, query) {
|
|
79
|
+
if (documents.length === 0) {
|
|
80
|
+
return [];
|
|
81
|
+
}
|
|
82
|
+
const queryItem = { text: query };
|
|
83
|
+
const docItems = documents.map((doc) => ({ text: doc.pageContent }));
|
|
84
|
+
const result = await this.client.score(this.model, queryItem, docItems);
|
|
85
|
+
const reranked = [];
|
|
86
|
+
for (const entry of result.scores) {
|
|
87
|
+
const idx = Number.parseInt(entry.itemId, 10);
|
|
88
|
+
const doc = documents[idx];
|
|
89
|
+
if (doc) {
|
|
90
|
+
reranked.push({
|
|
91
|
+
pageContent: doc.pageContent,
|
|
92
|
+
metadata: { ...doc.metadata, relevance_score: entry.score },
|
|
93
|
+
id: doc.id
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (this.topK !== void 0) {
|
|
98
|
+
return reranked.slice(0, this.topK);
|
|
99
|
+
}
|
|
100
|
+
return reranked;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Close the underlying client connection.
|
|
104
|
+
*/
|
|
105
|
+
async close() {
|
|
106
|
+
if (this._client && this._ownsClient) {
|
|
107
|
+
await this._client.close();
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
// src/extractors.ts
|
|
113
|
+
var import_tools = require("@langchain/core/tools");
|
|
114
|
+
var import_sie_sdk2 = require("@superlinked/sie-sdk");
|
|
115
|
+
var SIEExtractor = class extends import_tools.Tool {
|
|
116
|
+
name;
|
|
117
|
+
description;
|
|
118
|
+
model;
|
|
119
|
+
labels;
|
|
120
|
+
threshold;
|
|
121
|
+
_client;
|
|
122
|
+
_ownsClient;
|
|
123
|
+
baseUrl;
|
|
124
|
+
clientOptions;
|
|
125
|
+
constructor(params = {}) {
|
|
126
|
+
const toolName = params.name ?? "sie_extract";
|
|
127
|
+
const toolDescription = params.description ?? "Extract structured information from text. Input should be text to analyze. Returns JSON with entities, relations, classifications, and detected objects.";
|
|
128
|
+
super({});
|
|
129
|
+
this.name = toolName;
|
|
130
|
+
this.description = toolDescription;
|
|
131
|
+
const {
|
|
132
|
+
baseUrl = "http://localhost:8080",
|
|
133
|
+
model = "urchade/gliner_multi-v2.1",
|
|
134
|
+
client,
|
|
135
|
+
labels = ["person", "organization", "location"],
|
|
136
|
+
threshold,
|
|
137
|
+
gpu,
|
|
138
|
+
timeout = 18e4
|
|
139
|
+
} = params;
|
|
140
|
+
this.baseUrl = baseUrl;
|
|
141
|
+
this.model = model;
|
|
142
|
+
this.labels = labels;
|
|
143
|
+
this.threshold = threshold;
|
|
144
|
+
this._client = client;
|
|
145
|
+
this._ownsClient = !client;
|
|
146
|
+
this.clientOptions = {
|
|
147
|
+
timeout,
|
|
148
|
+
gpu
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Get or create the SIEClient.
|
|
153
|
+
*/
|
|
154
|
+
get client() {
|
|
155
|
+
if (!this._client) {
|
|
156
|
+
this._client = new import_sie_sdk2.SIEClient(this.baseUrl, this.clientOptions);
|
|
157
|
+
}
|
|
158
|
+
return this._client;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Extract structured information from text.
|
|
162
|
+
*
|
|
163
|
+
* @param text - Text to extract from.
|
|
164
|
+
* @returns JSON string with entities, relations, classifications, and objects.
|
|
165
|
+
*/
|
|
166
|
+
async _call(text) {
|
|
167
|
+
const extractOptions = {
|
|
168
|
+
labels: this.labels
|
|
169
|
+
};
|
|
170
|
+
if (this.threshold !== void 0) {
|
|
171
|
+
extractOptions.threshold = this.threshold;
|
|
172
|
+
}
|
|
173
|
+
const result = await this.client.extract(this.model, { text }, extractOptions);
|
|
174
|
+
return JSON.stringify({
|
|
175
|
+
entities: result.entities.map((e) => ({
|
|
176
|
+
text: e.text,
|
|
177
|
+
label: e.label,
|
|
178
|
+
score: e.score,
|
|
179
|
+
...e.start !== void 0 && { start: e.start },
|
|
180
|
+
...e.end !== void 0 && { end: e.end }
|
|
181
|
+
})),
|
|
182
|
+
relations: result.relations.map((r) => ({
|
|
183
|
+
head: r.head,
|
|
184
|
+
tail: r.tail,
|
|
185
|
+
relation: r.relation,
|
|
186
|
+
score: r.score
|
|
187
|
+
})),
|
|
188
|
+
classifications: result.classifications.map((c) => ({
|
|
189
|
+
label: c.label,
|
|
190
|
+
score: c.score
|
|
191
|
+
})),
|
|
192
|
+
objects: result.objects.map((o) => ({
|
|
193
|
+
label: o.label,
|
|
194
|
+
score: o.score,
|
|
195
|
+
bbox: o.bbox
|
|
196
|
+
}))
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Close the underlying client connection.
|
|
201
|
+
*/
|
|
202
|
+
async close() {
|
|
203
|
+
if (this._client && this._ownsClient) {
|
|
204
|
+
await this._client.close();
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
};
|
|
208
|
+
|
|
209
|
+
// src/index.ts
|
|
29
210
|
var SIEEmbeddings = class extends import_embeddings.Embeddings {
|
|
30
211
|
model;
|
|
31
212
|
instruction;
|
|
@@ -63,7 +244,7 @@ var SIEEmbeddings = class extends import_embeddings.Embeddings {
|
|
|
63
244
|
get client() {
|
|
64
245
|
if (!this._client) {
|
|
65
246
|
const baseUrl = this.baseUrl ?? "http://localhost:8080";
|
|
66
|
-
this._client = new
|
|
247
|
+
this._client = new import_sie_sdk3.SIEClient(baseUrl, this.clientOptions);
|
|
67
248
|
}
|
|
68
249
|
return this._client;
|
|
69
250
|
}
|
|
@@ -85,7 +266,7 @@ var SIEEmbeddings = class extends import_embeddings.Embeddings {
|
|
|
85
266
|
isQuery: false
|
|
86
267
|
};
|
|
87
268
|
const results = await this.client.encode(this.model, items, options);
|
|
88
|
-
return results.map((result) =>
|
|
269
|
+
return results.map((result) => (0, import_sie_sdk3.denseEmbedding)(result));
|
|
89
270
|
}
|
|
90
271
|
/**
|
|
91
272
|
* Embed a single query text.
|
|
@@ -103,17 +284,7 @@ var SIEEmbeddings = class extends import_embeddings.Embeddings {
|
|
|
103
284
|
isQuery: true
|
|
104
285
|
};
|
|
105
286
|
const result = await this.client.encode(this.model, { text }, options);
|
|
106
|
-
return
|
|
107
|
-
}
|
|
108
|
-
/**
|
|
109
|
-
* Extract dense embedding from encode result.
|
|
110
|
-
*/
|
|
111
|
-
extractDense(result) {
|
|
112
|
-
const dense = result.dense;
|
|
113
|
-
if (!dense) {
|
|
114
|
-
throw new Error("Encode result missing dense embedding");
|
|
115
|
-
}
|
|
116
|
-
return (0, import_sie_sdk.toNumberArray)(dense);
|
|
287
|
+
return (0, import_sie_sdk3.denseEmbedding)(result);
|
|
117
288
|
}
|
|
118
289
|
/**
|
|
119
290
|
* Close the underlying client connection.
|
|
@@ -148,7 +319,7 @@ var SIESparseEncoder = class {
|
|
|
148
319
|
*/
|
|
149
320
|
get client() {
|
|
150
321
|
if (!this._client) {
|
|
151
|
-
this._client = new
|
|
322
|
+
this._client = new import_sie_sdk3.SIEClient(this.baseUrl, this.clientOptions);
|
|
152
323
|
}
|
|
153
324
|
return this._client;
|
|
154
325
|
}
|
|
@@ -168,7 +339,7 @@ var SIESparseEncoder = class {
|
|
|
168
339
|
isQuery: true
|
|
169
340
|
};
|
|
170
341
|
const results = await this.client.encode(this.model, items, options);
|
|
171
|
-
return results.map((result) =>
|
|
342
|
+
return results.map((result) => (0, import_sie_sdk3.sparseEmbedding)(result));
|
|
172
343
|
}
|
|
173
344
|
/**
|
|
174
345
|
* Encode document texts to sparse vectors.
|
|
@@ -186,20 +357,7 @@ var SIESparseEncoder = class {
|
|
|
186
357
|
isQuery: false
|
|
187
358
|
};
|
|
188
359
|
const results = await this.client.encode(this.model, items, options);
|
|
189
|
-
return results.map((result) =>
|
|
190
|
-
}
|
|
191
|
-
/**
|
|
192
|
-
* Extract sparse embedding from encode result.
|
|
193
|
-
*/
|
|
194
|
-
extractSparse(result) {
|
|
195
|
-
const sparse = result.sparse;
|
|
196
|
-
if (!sparse) {
|
|
197
|
-
return { indices: [], values: [] };
|
|
198
|
-
}
|
|
199
|
-
return {
|
|
200
|
-
indices: (0, import_sie_sdk.toNumberArray)(sparse.indices),
|
|
201
|
-
values: (0, import_sie_sdk.toNumberArray)(sparse.values)
|
|
202
|
-
};
|
|
360
|
+
return results.map((result) => (0, import_sie_sdk3.sparseEmbedding)(result));
|
|
203
361
|
}
|
|
204
362
|
/**
|
|
205
363
|
* Close the underlying client connection.
|
|
@@ -213,6 +371,8 @@ var SIESparseEncoder = class {
|
|
|
213
371
|
// Annotate the CommonJS export names for ESM import in node:
|
|
214
372
|
0 && (module.exports = {
|
|
215
373
|
SIEEmbeddings,
|
|
374
|
+
SIEExtractor,
|
|
375
|
+
SIEReranker,
|
|
216
376
|
SIESparseEncoder
|
|
217
377
|
});
|
|
218
378
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n toNumberArray,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractDense(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return this.extractDense(result as EncodeResult);\n }\n\n /**\n * Extract dense embedding from encode result.\n */\n private extractDense(result: EncodeResult): number[] {\n const dense = result.dense;\n if (!dense) {\n throw new Error(\"Encode result missing dense embedding\");\n }\n return toNumberArray(dense);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Extract sparse embedding from encode result.\n */\n private extractSparse(result: EncodeResult): { indices: number[]; values: number[] } {\n const sparse = result.sparse;\n if (!sparse) {\n return { indices: [], values: [] };\n }\n\n return {\n indices: toNumberArray(sparse.indices),\n values: toNumberArray(sparse.values),\n };\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAqBA,wBAAkD;AAClD,qBAOO;AA8EA,IAAM,gBAAN,cAA4B,6BAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAI,yBAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,aAAa,MAAM,CAAC;AAAA,EAC9E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,WAAO,KAAK,aAAa,MAAsB;AAAA,EACjD;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,QAAgC;AACnD,UAAM,QAAQ,OAAO;AACrB,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AACA,eAAO,8BAAc,KAAK;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,QAA+D;AACnF,UAAM,SAAS,OAAO;AACtB,QAAI,CAAC,QAAQ;AACX,aAAO,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAAA,IACnC;AAEA,WAAO;AAAA,MACL,aAAS,8BAAc,OAAO,OAAO;AAAA,MACrC,YAAQ,8BAAc,OAAO,MAAM;AAAA,IACrC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/rerankers.ts","../src/extractors.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n * - SIEReranker: Cross-encoder reranking for retrieval pipelines\n * - SIEExtractor: Entity extraction tool for agents\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n denseEmbedding,\n sparseEmbedding,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => denseEmbedding(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return denseEmbedding(result as EncodeResult);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\nexport { SIEReranker, type SIERerankerParams } from \"./rerankers.js\";\nexport { SIEExtractor, type SIEExtractorParams } from \"./extractors.js\";\n","/**\n * SIE reranker integration for LangChain.js\n *\n * Provides document reranking using SIE's score endpoint:\n * - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n * ```\n */\n\nimport type { DocumentInterface } from \"@langchain/core/documents\";\nimport { BaseDocumentCompressor } from \"@langchain/core/retrievers/document_compressors\";\nimport { SIEClient, type SIEClientOptions } from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEReranker.\n */\nexport interface SIERerankerParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Reranker model name/ID.\n * @default \"jinaai/jina-reranker-v2-base-multilingual\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Number of top documents to return. If undefined, returns all documents.\n */\n topK?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain document compressor using SIE's reranking.\n *\n * Wraps SIEClient.score() to implement BaseDocumentCompressor.\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * // Rerank retrieved documents\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n *\n * // Use in a retrieval pipeline\n * import { ContextualCompressionRetriever } from \"langchain/retrievers/contextual_compression\";\n *\n * const compressionRetriever = new ContextualCompressionRetriever({\n * baseCompressor: reranker,\n * baseRetriever: vectorStoreRetriever,\n * });\n * ```\n */\nexport class SIEReranker extends BaseDocumentCompressor {\n private readonly model: string;\n private readonly topK?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIERerankerParams = {}) {\n super();\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"jinaai/jina-reranker-v2-base-multilingual\",\n client,\n topK,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.topK = topK;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Rerank documents by relevance to query.\n *\n * @param documents - Documents to rerank.\n * @param query - Query to rank documents against.\n * @returns Reranked documents with relevance_score in metadata, sorted by score descending.\n */\n async compressDocuments(\n documents: DocumentInterface[],\n query: string,\n ): Promise<DocumentInterface[]> {\n if (documents.length === 0) {\n return [];\n }\n\n const queryItem = { text: query };\n const docItems = documents.map((doc) => ({ text: doc.pageContent }));\n\n const result = await this.client.score(this.model, queryItem, docItems);\n\n // Map score entries back to documents with relevance_score in metadata.\n // ScoreResult.scores are already sorted by score descending.\n const reranked: DocumentInterface[] = [];\n for (const entry of result.scores) {\n const idx = Number.parseInt(entry.itemId, 10);\n const doc = documents[idx];\n if (doc) {\n reranked.push({\n pageContent: doc.pageContent,\n metadata: { ...doc.metadata, relevance_score: entry.score },\n id: doc.id,\n });\n }\n }\n\n if (this.topK !== undefined) {\n return reranked.slice(0, this.topK);\n }\n return reranked;\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n","/**\n * SIE extraction tool for LangChain.js\n *\n * Provides extraction using SIE's extract endpoint:\n * - SIEExtractor: Extraction tool implementing LangChain Tool\n *\n * Returns entities, relations, classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * const extractor = new SIEExtractor({\n * baseUrl: \"http://localhost:8080\",\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n *\n * const result = await extractor.invoke(\"John Smith works at Acme Corp in NYC\");\n * const parsed = JSON.parse(result);\n * console.log(parsed.entities);\n * console.log(parsed.relations);\n * ```\n */\n\nimport { Tool } from \"@langchain/core/tools\";\nimport {\n type ExtractOptions,\n type ExtractResult,\n SIEClient,\n type SIEClientOptions,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEExtractor.\n */\nexport interface SIEExtractorParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Extraction model name/ID.\n * @default \"urchade/gliner_multi-v2.1\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Labels to extract (entity types, relation types, or classification labels).\n * @default [\"person\", \"organization\", \"location\"]\n */\n labels?: string[];\n\n /**\n * Minimum confidence threshold (0-1).\n */\n threshold?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n\n /**\n * Tool name for use in agents.\n * @default \"sie_extract\"\n */\n name?: string;\n\n /**\n * Tool description for use in agents.\n */\n description?: string;\n}\n\n/**\n * LangChain tool for extraction using SIE.\n *\n * Wraps SIEClient.extract() to implement the LangChain Tool interface\n * for use in agents and chains. Returns JSON with entities, relations,\n * classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * // Direct usage\n * const extractor = new SIEExtractor({\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n * const result = await extractor.invoke(\"John Smith works at Acme Corp\");\n * const parsed = JSON.parse(result);\n *\n * // Use in an agent\n * import { ChatOpenAI } from \"@langchain/openai\";\n * import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n *\n * const agent = createReactAgent({\n * llm: new ChatOpenAI(),\n * tools: [extractor],\n * });\n * ```\n */\nexport class SIEExtractor extends Tool {\n name: string;\n description: string;\n\n private readonly model: string;\n private readonly labels: string[];\n private readonly threshold?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEExtractorParams = {}) {\n const toolName = params.name ?? \"sie_extract\";\n const toolDescription =\n params.description ??\n \"Extract structured information from text. \" +\n \"Input should be text to analyze. \" +\n \"Returns JSON with entities, relations, classifications, and detected objects.\";\n\n super({});\n\n this.name = toolName;\n this.description = toolDescription;\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"urchade/gliner_multi-v2.1\",\n client,\n labels = [\"person\", \"organization\", \"location\"],\n threshold,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.labels = labels;\n this.threshold = threshold;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Extract structured information from text.\n *\n * @param text - Text to extract from.\n * @returns JSON string with entities, relations, classifications, and objects.\n */\n async _call(text: string): Promise<string> {\n const extractOptions: ExtractOptions = {\n labels: this.labels,\n };\n if (this.threshold !== undefined) {\n extractOptions.threshold = this.threshold;\n }\n\n const result: ExtractResult = await this.client.extract(this.model, { text }, extractOptions);\n\n return JSON.stringify({\n entities: result.entities.map((e) => ({\n text: e.text,\n label: e.label,\n score: e.score,\n ...(e.start !== undefined && { start: e.start }),\n ...(e.end !== undefined && { end: e.end }),\n })),\n relations: result.relations.map((r) => ({\n head: r.head,\n tail: r.tail,\n relation: r.relation,\n score: r.score,\n })),\n classifications: result.classifications.map((c) => ({\n label: c.label,\n score: c.score,\n })),\n objects: result.objects.map((o) => ({\n label: o.label,\n score: o.score,\n bbox: o.bbox,\n })),\n });\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAuBA,wBAAkD;AAClD,IAAAA,kBAQO;;;ACXP,kCAAuC;AACvC,qBAAiD;AAoE1C,IAAM,cAAN,cAA0B,mDAAuB;AAAA,EACrC;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA4B,CAAC,GAAG;AAC1C,UAAM;AAEN,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,kBACJ,WACA,OAC8B;AAC9B,QAAI,UAAU,WAAW,GAAG;AAC1B,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,YAAY,EAAE,MAAM,MAAM;AAChC,UAAM,WAAW,UAAU,IAAI,CAAC,SAAS,EAAE,MAAM,IAAI,YAAY,EAAE;AAEnE,UAAM,SAAS,MAAM,KAAK,OAAO,MAAM,KAAK,OAAO,WAAW,QAAQ;AAItE,UAAM,WAAgC,CAAC;AACvC,eAAW,SAAS,OAAO,QAAQ;AACjC,YAAM,MAAM,OAAO,SAAS,MAAM,QAAQ,EAAE;AAC5C,YAAM,MAAM,UAAU,GAAG;AACzB,UAAI,KAAK;AACP,iBAAS,KAAK;AAAA,UACZ,aAAa,IAAI;AAAA,UACjB,UAAU,EAAE,GAAG,IAAI,UAAU,iBAAiB,MAAM,MAAM;AAAA,UAC1D,IAAI,IAAI;AAAA,QACV,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,KAAK,SAAS,QAAW;AAC3B,aAAO,SAAS,MAAM,GAAG,KAAK,IAAI;AAAA,IACpC;AACA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AC5JA,mBAAqB;AACrB,IAAAC,kBAKO;AAuFA,IAAM,eAAN,cAA2B,kBAAK;AAAA,EACrC;AAAA,EACA;AAAA,EAEiB;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA6B,CAAC,GAAG;AAC3C,UAAM,WAAW,OAAO,QAAQ;AAChC,UAAM,kBACJ,OAAO,eACP;AAIF,UAAM,CAAC,CAAC;AAER,SAAK,OAAO;AACZ,SAAK,cAAc;AAEnB,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,SAAS,CAAC,UAAU,gBAAgB,UAAU;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,YAAY;AACjB,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,0BAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,MAAM,MAA+B;AACzC,UAAM,iBAAiC;AAAA,MACrC,QAAQ,KAAK;AAAA,IACf;AACA,QAAI,KAAK,cAAc,QAAW;AAChC,qBAAe,YAAY,KAAK;AAAA,IAClC;AAEA,UAAM,SAAwB,MAAM,KAAK,OAAO,QAAQ,KAAK,OAAO,EAAE,KAAK,GAAG,cAAc;AAE5F,WAAO,KAAK,UAAU;AAAA,MACpB,UAAU,OAAO,SAAS,IAAI,CAAC,OAAO;AAAA,QACpC,MAAM,EAAE;AAAA,QACR,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,GAAI,EAAE,UAAU,UAAa,EAAE,OAAO,EAAE,MAAM;AAAA,QAC9C,GAAI,EAAE,QAAQ,UAAa,EAAE,KAAK,EAAE,IAAI;AAAA,MAC1C,EAAE;AAAA,MACF,WAAW,OAAO,UAAU,IAAI,CAAC,OAAO;AAAA,QACtC,MAAM,EAAE;AAAA,QACR,MAAM,EAAE;AAAA,QACR,UAAU,EAAE;AAAA,QACZ,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,iBAAiB,OAAO,gBAAgB,IAAI,CAAC,OAAO;AAAA,QAClD,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,SAAS,OAAO,QAAQ,IAAI,CAAC,OAAO;AAAA,QAClC,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,MAAM,EAAE;AAAA,MACV,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AFpHO,IAAM,gBAAN,cAA4B,6BAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAI,0BAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,gCAAe,MAAM,CAAC;AAAA,EAC3E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,eAAO,gCAAe,MAAsB;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,0BAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,iCAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,iCAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":["import_sie_sdk","import_sie_sdk"]}
|
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,243 @@
|
|
|
1
1
|
import { EmbeddingsParams, Embeddings } from '@langchain/core/embeddings';
|
|
2
2
|
import { SIEClient, DType } from '@superlinked/sie-sdk';
|
|
3
|
+
import { DocumentInterface } from '@langchain/core/documents';
|
|
4
|
+
import { BaseDocumentCompressor } from '@langchain/core/retrievers/document_compressors';
|
|
5
|
+
import { Tool } from '@langchain/core/tools';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* SIE reranker integration for LangChain.js
|
|
9
|
+
*
|
|
10
|
+
* Provides document reranking using SIE's score endpoint:
|
|
11
|
+
* - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { SIEReranker } from "@superlinked/sie-langchain";
|
|
16
|
+
*
|
|
17
|
+
* const reranker = new SIEReranker({
|
|
18
|
+
* baseUrl: "http://localhost:8080",
|
|
19
|
+
* model: "jinaai/jina-reranker-v2-base-multilingual",
|
|
20
|
+
* topK: 3,
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* const reranked = await reranker.compressDocuments(documents, "search query");
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Configuration options for SIEReranker.
|
|
29
|
+
*/
|
|
30
|
+
interface SIERerankerParams {
|
|
31
|
+
/**
|
|
32
|
+
* URL of the SIE server.
|
|
33
|
+
* @default "http://localhost:8080"
|
|
34
|
+
*/
|
|
35
|
+
baseUrl?: string;
|
|
36
|
+
/**
|
|
37
|
+
* Reranker model name/ID.
|
|
38
|
+
* @default "jinaai/jina-reranker-v2-base-multilingual"
|
|
39
|
+
*/
|
|
40
|
+
model?: string;
|
|
41
|
+
/**
|
|
42
|
+
* Optional pre-configured SIEClient instance.
|
|
43
|
+
* If provided, baseUrl and other connection options are ignored.
|
|
44
|
+
*/
|
|
45
|
+
client?: SIEClient;
|
|
46
|
+
/**
|
|
47
|
+
* Number of top documents to return. If undefined, returns all documents.
|
|
48
|
+
*/
|
|
49
|
+
topK?: number;
|
|
50
|
+
/**
|
|
51
|
+
* Target GPU type for routing (e.g., "l4", "a100-80gb").
|
|
52
|
+
*/
|
|
53
|
+
gpu?: string;
|
|
54
|
+
/**
|
|
55
|
+
* Request timeout in milliseconds.
|
|
56
|
+
* @default 180000 (3 minutes)
|
|
57
|
+
*/
|
|
58
|
+
timeout?: number;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* LangChain document compressor using SIE's reranking.
|
|
62
|
+
*
|
|
63
|
+
* Wraps SIEClient.score() to implement BaseDocumentCompressor.
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* import { SIEReranker } from "@superlinked/sie-langchain";
|
|
68
|
+
*
|
|
69
|
+
* const reranker = new SIEReranker({
|
|
70
|
+
* baseUrl: "http://localhost:8080",
|
|
71
|
+
* model: "jinaai/jina-reranker-v2-base-multilingual",
|
|
72
|
+
* topK: 3,
|
|
73
|
+
* });
|
|
74
|
+
*
|
|
75
|
+
* // Rerank retrieved documents
|
|
76
|
+
* const reranked = await reranker.compressDocuments(documents, "search query");
|
|
77
|
+
*
|
|
78
|
+
* // Use in a retrieval pipeline
|
|
79
|
+
* import { ContextualCompressionRetriever } from "langchain/retrievers/contextual_compression";
|
|
80
|
+
*
|
|
81
|
+
* const compressionRetriever = new ContextualCompressionRetriever({
|
|
82
|
+
* baseCompressor: reranker,
|
|
83
|
+
* baseRetriever: vectorStoreRetriever,
|
|
84
|
+
* });
|
|
85
|
+
* ```
|
|
86
|
+
*/
|
|
87
|
+
declare class SIEReranker extends BaseDocumentCompressor {
|
|
88
|
+
private readonly model;
|
|
89
|
+
private readonly topK?;
|
|
90
|
+
private _client;
|
|
91
|
+
private readonly _ownsClient;
|
|
92
|
+
private readonly baseUrl;
|
|
93
|
+
private readonly clientOptions;
|
|
94
|
+
constructor(params?: SIERerankerParams);
|
|
95
|
+
/**
|
|
96
|
+
* Get or create the SIEClient.
|
|
97
|
+
*/
|
|
98
|
+
private get client();
|
|
99
|
+
/**
|
|
100
|
+
* Rerank documents by relevance to query.
|
|
101
|
+
*
|
|
102
|
+
* @param documents - Documents to rerank.
|
|
103
|
+
* @param query - Query to rank documents against.
|
|
104
|
+
* @returns Reranked documents with relevance_score in metadata, sorted by score descending.
|
|
105
|
+
*/
|
|
106
|
+
compressDocuments(documents: DocumentInterface[], query: string): Promise<DocumentInterface[]>;
|
|
107
|
+
/**
|
|
108
|
+
* Close the underlying client connection.
|
|
109
|
+
*/
|
|
110
|
+
close(): Promise<void>;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* SIE extraction tool for LangChain.js
|
|
115
|
+
*
|
|
116
|
+
* Provides extraction using SIE's extract endpoint:
|
|
117
|
+
* - SIEExtractor: Extraction tool implementing LangChain Tool
|
|
118
|
+
*
|
|
119
|
+
* Returns entities, relations, classifications, and detected objects.
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* import { SIEExtractor } from "@superlinked/sie-langchain";
|
|
124
|
+
*
|
|
125
|
+
* const extractor = new SIEExtractor({
|
|
126
|
+
* baseUrl: "http://localhost:8080",
|
|
127
|
+
* model: "urchade/gliner_multi-v2.1",
|
|
128
|
+
* labels: ["person", "organization", "location"],
|
|
129
|
+
* });
|
|
130
|
+
*
|
|
131
|
+
* const result = await extractor.invoke("John Smith works at Acme Corp in NYC");
|
|
132
|
+
* const parsed = JSON.parse(result);
|
|
133
|
+
* console.log(parsed.entities);
|
|
134
|
+
* console.log(parsed.relations);
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Configuration options for SIEExtractor.
|
|
140
|
+
*/
|
|
141
|
+
interface SIEExtractorParams {
|
|
142
|
+
/**
|
|
143
|
+
* URL of the SIE server.
|
|
144
|
+
* @default "http://localhost:8080"
|
|
145
|
+
*/
|
|
146
|
+
baseUrl?: string;
|
|
147
|
+
/**
|
|
148
|
+
* Extraction model name/ID.
|
|
149
|
+
* @default "urchade/gliner_multi-v2.1"
|
|
150
|
+
*/
|
|
151
|
+
model?: string;
|
|
152
|
+
/**
|
|
153
|
+
* Optional pre-configured SIEClient instance.
|
|
154
|
+
* If provided, baseUrl and other connection options are ignored.
|
|
155
|
+
*/
|
|
156
|
+
client?: SIEClient;
|
|
157
|
+
/**
|
|
158
|
+
* Labels to extract (entity types, relation types, or classification labels).
|
|
159
|
+
* @default ["person", "organization", "location"]
|
|
160
|
+
*/
|
|
161
|
+
labels?: string[];
|
|
162
|
+
/**
|
|
163
|
+
* Minimum confidence threshold (0-1).
|
|
164
|
+
*/
|
|
165
|
+
threshold?: number;
|
|
166
|
+
/**
|
|
167
|
+
* Target GPU type for routing (e.g., "l4", "a100-80gb").
|
|
168
|
+
*/
|
|
169
|
+
gpu?: string;
|
|
170
|
+
/**
|
|
171
|
+
* Request timeout in milliseconds.
|
|
172
|
+
* @default 180000 (3 minutes)
|
|
173
|
+
*/
|
|
174
|
+
timeout?: number;
|
|
175
|
+
/**
|
|
176
|
+
* Tool name for use in agents.
|
|
177
|
+
* @default "sie_extract"
|
|
178
|
+
*/
|
|
179
|
+
name?: string;
|
|
180
|
+
/**
|
|
181
|
+
* Tool description for use in agents.
|
|
182
|
+
*/
|
|
183
|
+
description?: string;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* LangChain tool for extraction using SIE.
|
|
187
|
+
*
|
|
188
|
+
* Wraps SIEClient.extract() to implement the LangChain Tool interface
|
|
189
|
+
* for use in agents and chains. Returns JSON with entities, relations,
|
|
190
|
+
* classifications, and detected objects.
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* ```typescript
|
|
194
|
+
* import { SIEExtractor } from "@superlinked/sie-langchain";
|
|
195
|
+
*
|
|
196
|
+
* // Direct usage
|
|
197
|
+
* const extractor = new SIEExtractor({
|
|
198
|
+
* model: "urchade/gliner_multi-v2.1",
|
|
199
|
+
* labels: ["person", "organization", "location"],
|
|
200
|
+
* });
|
|
201
|
+
* const result = await extractor.invoke("John Smith works at Acme Corp");
|
|
202
|
+
* const parsed = JSON.parse(result);
|
|
203
|
+
*
|
|
204
|
+
* // Use in an agent
|
|
205
|
+
* import { ChatOpenAI } from "@langchain/openai";
|
|
206
|
+
* import { createReactAgent } from "@langchain/langgraph/prebuilt";
|
|
207
|
+
*
|
|
208
|
+
* const agent = createReactAgent({
|
|
209
|
+
* llm: new ChatOpenAI(),
|
|
210
|
+
* tools: [extractor],
|
|
211
|
+
* });
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
214
|
+
declare class SIEExtractor extends Tool {
|
|
215
|
+
name: string;
|
|
216
|
+
description: string;
|
|
217
|
+
private readonly model;
|
|
218
|
+
private readonly labels;
|
|
219
|
+
private readonly threshold?;
|
|
220
|
+
private _client;
|
|
221
|
+
private readonly _ownsClient;
|
|
222
|
+
private readonly baseUrl;
|
|
223
|
+
private readonly clientOptions;
|
|
224
|
+
constructor(params?: SIEExtractorParams);
|
|
225
|
+
/**
|
|
226
|
+
* Get or create the SIEClient.
|
|
227
|
+
*/
|
|
228
|
+
private get client();
|
|
229
|
+
/**
|
|
230
|
+
* Extract structured information from text.
|
|
231
|
+
*
|
|
232
|
+
* @param text - Text to extract from.
|
|
233
|
+
* @returns JSON string with entities, relations, classifications, and objects.
|
|
234
|
+
*/
|
|
235
|
+
_call(text: string): Promise<string>;
|
|
236
|
+
/**
|
|
237
|
+
* Close the underlying client connection.
|
|
238
|
+
*/
|
|
239
|
+
close(): Promise<void>;
|
|
240
|
+
}
|
|
3
241
|
|
|
4
242
|
/**
|
|
5
243
|
* SIE embeddings integration for LangChain.js
|
|
@@ -7,6 +245,8 @@ import { SIEClient, DType } from '@superlinked/sie-sdk';
|
|
|
7
245
|
* Provides drop-in replacement for OpenAI embeddings using SIE's inference server:
|
|
8
246
|
* - SIEEmbeddings: Dense embeddings for vector stores
|
|
9
247
|
* - SIESparseEncoder: Sparse encoder for hybrid search
|
|
248
|
+
* - SIEReranker: Cross-encoder reranking for retrieval pipelines
|
|
249
|
+
* - SIEExtractor: Entity extraction tool for agents
|
|
10
250
|
*
|
|
11
251
|
* @example
|
|
12
252
|
* ```typescript
|
|
@@ -118,10 +358,6 @@ declare class SIEEmbeddings extends Embeddings {
|
|
|
118
358
|
* @returns Embedding vector as array of numbers.
|
|
119
359
|
*/
|
|
120
360
|
embedQuery(text: string): Promise<number[]>;
|
|
121
|
-
/**
|
|
122
|
-
* Extract dense embedding from encode result.
|
|
123
|
-
*/
|
|
124
|
-
private extractDense;
|
|
125
361
|
/**
|
|
126
362
|
* Close the underlying client connection.
|
|
127
363
|
*/
|
|
@@ -198,14 +434,10 @@ declare class SIESparseEncoder {
|
|
|
198
434
|
indices: number[];
|
|
199
435
|
values: number[];
|
|
200
436
|
}>>;
|
|
201
|
-
/**
|
|
202
|
-
* Extract sparse embedding from encode result.
|
|
203
|
-
*/
|
|
204
|
-
private extractSparse;
|
|
205
437
|
/**
|
|
206
438
|
* Close the underlying client connection.
|
|
207
439
|
*/
|
|
208
440
|
close(): Promise<void>;
|
|
209
441
|
}
|
|
210
442
|
|
|
211
|
-
export { SIEEmbeddings, type SIEEmbeddingsParams, SIESparseEncoder, type SIESparseEncoderOptions };
|
|
443
|
+
export { SIEEmbeddings, type SIEEmbeddingsParams, SIEExtractor, type SIEExtractorParams, SIEReranker, type SIERerankerParams, SIESparseEncoder, type SIESparseEncoderOptions };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,243 @@
|
|
|
1
1
|
import { EmbeddingsParams, Embeddings } from '@langchain/core/embeddings';
|
|
2
2
|
import { SIEClient, DType } from '@superlinked/sie-sdk';
|
|
3
|
+
import { DocumentInterface } from '@langchain/core/documents';
|
|
4
|
+
import { BaseDocumentCompressor } from '@langchain/core/retrievers/document_compressors';
|
|
5
|
+
import { Tool } from '@langchain/core/tools';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* SIE reranker integration for LangChain.js
|
|
9
|
+
*
|
|
10
|
+
* Provides document reranking using SIE's score endpoint:
|
|
11
|
+
* - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { SIEReranker } from "@superlinked/sie-langchain";
|
|
16
|
+
*
|
|
17
|
+
* const reranker = new SIEReranker({
|
|
18
|
+
* baseUrl: "http://localhost:8080",
|
|
19
|
+
* model: "jinaai/jina-reranker-v2-base-multilingual",
|
|
20
|
+
* topK: 3,
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* const reranked = await reranker.compressDocuments(documents, "search query");
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Configuration options for SIEReranker.
|
|
29
|
+
*/
|
|
30
|
+
interface SIERerankerParams {
|
|
31
|
+
/**
|
|
32
|
+
* URL of the SIE server.
|
|
33
|
+
* @default "http://localhost:8080"
|
|
34
|
+
*/
|
|
35
|
+
baseUrl?: string;
|
|
36
|
+
/**
|
|
37
|
+
* Reranker model name/ID.
|
|
38
|
+
* @default "jinaai/jina-reranker-v2-base-multilingual"
|
|
39
|
+
*/
|
|
40
|
+
model?: string;
|
|
41
|
+
/**
|
|
42
|
+
* Optional pre-configured SIEClient instance.
|
|
43
|
+
* If provided, baseUrl and other connection options are ignored.
|
|
44
|
+
*/
|
|
45
|
+
client?: SIEClient;
|
|
46
|
+
/**
|
|
47
|
+
* Number of top documents to return. If undefined, returns all documents.
|
|
48
|
+
*/
|
|
49
|
+
topK?: number;
|
|
50
|
+
/**
|
|
51
|
+
* Target GPU type for routing (e.g., "l4", "a100-80gb").
|
|
52
|
+
*/
|
|
53
|
+
gpu?: string;
|
|
54
|
+
/**
|
|
55
|
+
* Request timeout in milliseconds.
|
|
56
|
+
* @default 180000 (3 minutes)
|
|
57
|
+
*/
|
|
58
|
+
timeout?: number;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* LangChain document compressor using SIE's reranking.
|
|
62
|
+
*
|
|
63
|
+
* Wraps SIEClient.score() to implement BaseDocumentCompressor.
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* import { SIEReranker } from "@superlinked/sie-langchain";
|
|
68
|
+
*
|
|
69
|
+
* const reranker = new SIEReranker({
|
|
70
|
+
* baseUrl: "http://localhost:8080",
|
|
71
|
+
* model: "jinaai/jina-reranker-v2-base-multilingual",
|
|
72
|
+
* topK: 3,
|
|
73
|
+
* });
|
|
74
|
+
*
|
|
75
|
+
* // Rerank retrieved documents
|
|
76
|
+
* const reranked = await reranker.compressDocuments(documents, "search query");
|
|
77
|
+
*
|
|
78
|
+
* // Use in a retrieval pipeline
|
|
79
|
+
* import { ContextualCompressionRetriever } from "langchain/retrievers/contextual_compression";
|
|
80
|
+
*
|
|
81
|
+
* const compressionRetriever = new ContextualCompressionRetriever({
|
|
82
|
+
* baseCompressor: reranker,
|
|
83
|
+
* baseRetriever: vectorStoreRetriever,
|
|
84
|
+
* });
|
|
85
|
+
* ```
|
|
86
|
+
*/
|
|
87
|
+
declare class SIEReranker extends BaseDocumentCompressor {
|
|
88
|
+
private readonly model;
|
|
89
|
+
private readonly topK?;
|
|
90
|
+
private _client;
|
|
91
|
+
private readonly _ownsClient;
|
|
92
|
+
private readonly baseUrl;
|
|
93
|
+
private readonly clientOptions;
|
|
94
|
+
constructor(params?: SIERerankerParams);
|
|
95
|
+
/**
|
|
96
|
+
* Get or create the SIEClient.
|
|
97
|
+
*/
|
|
98
|
+
private get client();
|
|
99
|
+
/**
|
|
100
|
+
* Rerank documents by relevance to query.
|
|
101
|
+
*
|
|
102
|
+
* @param documents - Documents to rerank.
|
|
103
|
+
* @param query - Query to rank documents against.
|
|
104
|
+
* @returns Reranked documents with relevance_score in metadata, sorted by score descending.
|
|
105
|
+
*/
|
|
106
|
+
compressDocuments(documents: DocumentInterface[], query: string): Promise<DocumentInterface[]>;
|
|
107
|
+
/**
|
|
108
|
+
* Close the underlying client connection.
|
|
109
|
+
*/
|
|
110
|
+
close(): Promise<void>;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* SIE extraction tool for LangChain.js
|
|
115
|
+
*
|
|
116
|
+
* Provides extraction using SIE's extract endpoint:
|
|
117
|
+
* - SIEExtractor: Extraction tool implementing LangChain Tool
|
|
118
|
+
*
|
|
119
|
+
* Returns entities, relations, classifications, and detected objects.
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* import { SIEExtractor } from "@superlinked/sie-langchain";
|
|
124
|
+
*
|
|
125
|
+
* const extractor = new SIEExtractor({
|
|
126
|
+
* baseUrl: "http://localhost:8080",
|
|
127
|
+
* model: "urchade/gliner_multi-v2.1",
|
|
128
|
+
* labels: ["person", "organization", "location"],
|
|
129
|
+
* });
|
|
130
|
+
*
|
|
131
|
+
* const result = await extractor.invoke("John Smith works at Acme Corp in NYC");
|
|
132
|
+
* const parsed = JSON.parse(result);
|
|
133
|
+
* console.log(parsed.entities);
|
|
134
|
+
* console.log(parsed.relations);
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Configuration options for SIEExtractor.
|
|
140
|
+
*/
|
|
141
|
+
interface SIEExtractorParams {
|
|
142
|
+
/**
|
|
143
|
+
* URL of the SIE server.
|
|
144
|
+
* @default "http://localhost:8080"
|
|
145
|
+
*/
|
|
146
|
+
baseUrl?: string;
|
|
147
|
+
/**
|
|
148
|
+
* Extraction model name/ID.
|
|
149
|
+
* @default "urchade/gliner_multi-v2.1"
|
|
150
|
+
*/
|
|
151
|
+
model?: string;
|
|
152
|
+
/**
|
|
153
|
+
* Optional pre-configured SIEClient instance.
|
|
154
|
+
* If provided, baseUrl and other connection options are ignored.
|
|
155
|
+
*/
|
|
156
|
+
client?: SIEClient;
|
|
157
|
+
/**
|
|
158
|
+
* Labels to extract (entity types, relation types, or classification labels).
|
|
159
|
+
* @default ["person", "organization", "location"]
|
|
160
|
+
*/
|
|
161
|
+
labels?: string[];
|
|
162
|
+
/**
|
|
163
|
+
* Minimum confidence threshold (0-1).
|
|
164
|
+
*/
|
|
165
|
+
threshold?: number;
|
|
166
|
+
/**
|
|
167
|
+
* Target GPU type for routing (e.g., "l4", "a100-80gb").
|
|
168
|
+
*/
|
|
169
|
+
gpu?: string;
|
|
170
|
+
/**
|
|
171
|
+
* Request timeout in milliseconds.
|
|
172
|
+
* @default 180000 (3 minutes)
|
|
173
|
+
*/
|
|
174
|
+
timeout?: number;
|
|
175
|
+
/**
|
|
176
|
+
* Tool name for use in agents.
|
|
177
|
+
* @default "sie_extract"
|
|
178
|
+
*/
|
|
179
|
+
name?: string;
|
|
180
|
+
/**
|
|
181
|
+
* Tool description for use in agents.
|
|
182
|
+
*/
|
|
183
|
+
description?: string;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* LangChain tool for extraction using SIE.
|
|
187
|
+
*
|
|
188
|
+
* Wraps SIEClient.extract() to implement the LangChain Tool interface
|
|
189
|
+
* for use in agents and chains. Returns JSON with entities, relations,
|
|
190
|
+
* classifications, and detected objects.
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* ```typescript
|
|
194
|
+
* import { SIEExtractor } from "@superlinked/sie-langchain";
|
|
195
|
+
*
|
|
196
|
+
* // Direct usage
|
|
197
|
+
* const extractor = new SIEExtractor({
|
|
198
|
+
* model: "urchade/gliner_multi-v2.1",
|
|
199
|
+
* labels: ["person", "organization", "location"],
|
|
200
|
+
* });
|
|
201
|
+
* const result = await extractor.invoke("John Smith works at Acme Corp");
|
|
202
|
+
* const parsed = JSON.parse(result);
|
|
203
|
+
*
|
|
204
|
+
* // Use in an agent
|
|
205
|
+
* import { ChatOpenAI } from "@langchain/openai";
|
|
206
|
+
* import { createReactAgent } from "@langchain/langgraph/prebuilt";
|
|
207
|
+
*
|
|
208
|
+
* const agent = createReactAgent({
|
|
209
|
+
* llm: new ChatOpenAI(),
|
|
210
|
+
* tools: [extractor],
|
|
211
|
+
* });
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
214
|
+
declare class SIEExtractor extends Tool {
|
|
215
|
+
name: string;
|
|
216
|
+
description: string;
|
|
217
|
+
private readonly model;
|
|
218
|
+
private readonly labels;
|
|
219
|
+
private readonly threshold?;
|
|
220
|
+
private _client;
|
|
221
|
+
private readonly _ownsClient;
|
|
222
|
+
private readonly baseUrl;
|
|
223
|
+
private readonly clientOptions;
|
|
224
|
+
constructor(params?: SIEExtractorParams);
|
|
225
|
+
/**
|
|
226
|
+
* Get or create the SIEClient.
|
|
227
|
+
*/
|
|
228
|
+
private get client();
|
|
229
|
+
/**
|
|
230
|
+
* Extract structured information from text.
|
|
231
|
+
*
|
|
232
|
+
* @param text - Text to extract from.
|
|
233
|
+
* @returns JSON string with entities, relations, classifications, and objects.
|
|
234
|
+
*/
|
|
235
|
+
_call(text: string): Promise<string>;
|
|
236
|
+
/**
|
|
237
|
+
* Close the underlying client connection.
|
|
238
|
+
*/
|
|
239
|
+
close(): Promise<void>;
|
|
240
|
+
}
|
|
3
241
|
|
|
4
242
|
/**
|
|
5
243
|
* SIE embeddings integration for LangChain.js
|
|
@@ -7,6 +245,8 @@ import { SIEClient, DType } from '@superlinked/sie-sdk';
|
|
|
7
245
|
* Provides drop-in replacement for OpenAI embeddings using SIE's inference server:
|
|
8
246
|
* - SIEEmbeddings: Dense embeddings for vector stores
|
|
9
247
|
* - SIESparseEncoder: Sparse encoder for hybrid search
|
|
248
|
+
* - SIEReranker: Cross-encoder reranking for retrieval pipelines
|
|
249
|
+
* - SIEExtractor: Entity extraction tool for agents
|
|
10
250
|
*
|
|
11
251
|
* @example
|
|
12
252
|
* ```typescript
|
|
@@ -118,10 +358,6 @@ declare class SIEEmbeddings extends Embeddings {
|
|
|
118
358
|
* @returns Embedding vector as array of numbers.
|
|
119
359
|
*/
|
|
120
360
|
embedQuery(text: string): Promise<number[]>;
|
|
121
|
-
/**
|
|
122
|
-
* Extract dense embedding from encode result.
|
|
123
|
-
*/
|
|
124
|
-
private extractDense;
|
|
125
361
|
/**
|
|
126
362
|
* Close the underlying client connection.
|
|
127
363
|
*/
|
|
@@ -198,14 +434,10 @@ declare class SIESparseEncoder {
|
|
|
198
434
|
indices: number[];
|
|
199
435
|
values: number[];
|
|
200
436
|
}>>;
|
|
201
|
-
/**
|
|
202
|
-
* Extract sparse embedding from encode result.
|
|
203
|
-
*/
|
|
204
|
-
private extractSparse;
|
|
205
437
|
/**
|
|
206
438
|
* Close the underlying client connection.
|
|
207
439
|
*/
|
|
208
440
|
close(): Promise<void>;
|
|
209
441
|
}
|
|
210
442
|
|
|
211
|
-
export { SIEEmbeddings, type SIEEmbeddingsParams, SIESparseEncoder, type SIESparseEncoderOptions };
|
|
443
|
+
export { SIEEmbeddings, type SIEEmbeddingsParams, SIEExtractor, type SIEExtractorParams, SIEReranker, type SIERerankerParams, SIESparseEncoder, type SIESparseEncoderOptions };
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,191 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
2
|
import { Embeddings } from "@langchain/core/embeddings";
|
|
3
3
|
import {
|
|
4
|
-
SIEClient,
|
|
5
|
-
|
|
4
|
+
SIEClient as SIEClient3,
|
|
5
|
+
denseEmbedding,
|
|
6
|
+
sparseEmbedding
|
|
6
7
|
} from "@superlinked/sie-sdk";
|
|
8
|
+
|
|
9
|
+
// src/rerankers.ts
|
|
10
|
+
import { BaseDocumentCompressor } from "@langchain/core/retrievers/document_compressors";
|
|
11
|
+
import { SIEClient } from "@superlinked/sie-sdk";
|
|
12
|
+
var SIEReranker = class extends BaseDocumentCompressor {
|
|
13
|
+
model;
|
|
14
|
+
topK;
|
|
15
|
+
_client;
|
|
16
|
+
_ownsClient;
|
|
17
|
+
baseUrl;
|
|
18
|
+
clientOptions;
|
|
19
|
+
constructor(params = {}) {
|
|
20
|
+
super();
|
|
21
|
+
const {
|
|
22
|
+
baseUrl = "http://localhost:8080",
|
|
23
|
+
model = "jinaai/jina-reranker-v2-base-multilingual",
|
|
24
|
+
client,
|
|
25
|
+
topK,
|
|
26
|
+
gpu,
|
|
27
|
+
timeout = 18e4
|
|
28
|
+
} = params;
|
|
29
|
+
this.baseUrl = baseUrl;
|
|
30
|
+
this.model = model;
|
|
31
|
+
this.topK = topK;
|
|
32
|
+
this._client = client;
|
|
33
|
+
this._ownsClient = !client;
|
|
34
|
+
this.clientOptions = {
|
|
35
|
+
timeout,
|
|
36
|
+
gpu
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Get or create the SIEClient.
|
|
41
|
+
*/
|
|
42
|
+
get client() {
|
|
43
|
+
if (!this._client) {
|
|
44
|
+
this._client = new SIEClient(this.baseUrl, this.clientOptions);
|
|
45
|
+
}
|
|
46
|
+
return this._client;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Rerank documents by relevance to query.
|
|
50
|
+
*
|
|
51
|
+
* @param documents - Documents to rerank.
|
|
52
|
+
* @param query - Query to rank documents against.
|
|
53
|
+
* @returns Reranked documents with relevance_score in metadata, sorted by score descending.
|
|
54
|
+
*/
|
|
55
|
+
async compressDocuments(documents, query) {
|
|
56
|
+
if (documents.length === 0) {
|
|
57
|
+
return [];
|
|
58
|
+
}
|
|
59
|
+
const queryItem = { text: query };
|
|
60
|
+
const docItems = documents.map((doc) => ({ text: doc.pageContent }));
|
|
61
|
+
const result = await this.client.score(this.model, queryItem, docItems);
|
|
62
|
+
const reranked = [];
|
|
63
|
+
for (const entry of result.scores) {
|
|
64
|
+
const idx = Number.parseInt(entry.itemId, 10);
|
|
65
|
+
const doc = documents[idx];
|
|
66
|
+
if (doc) {
|
|
67
|
+
reranked.push({
|
|
68
|
+
pageContent: doc.pageContent,
|
|
69
|
+
metadata: { ...doc.metadata, relevance_score: entry.score },
|
|
70
|
+
id: doc.id
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
if (this.topK !== void 0) {
|
|
75
|
+
return reranked.slice(0, this.topK);
|
|
76
|
+
}
|
|
77
|
+
return reranked;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Close the underlying client connection.
|
|
81
|
+
*/
|
|
82
|
+
async close() {
|
|
83
|
+
if (this._client && this._ownsClient) {
|
|
84
|
+
await this._client.close();
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
// src/extractors.ts
|
|
90
|
+
import { Tool } from "@langchain/core/tools";
|
|
91
|
+
import {
|
|
92
|
+
SIEClient as SIEClient2
|
|
93
|
+
} from "@superlinked/sie-sdk";
|
|
94
|
+
var SIEExtractor = class extends Tool {
|
|
95
|
+
name;
|
|
96
|
+
description;
|
|
97
|
+
model;
|
|
98
|
+
labels;
|
|
99
|
+
threshold;
|
|
100
|
+
_client;
|
|
101
|
+
_ownsClient;
|
|
102
|
+
baseUrl;
|
|
103
|
+
clientOptions;
|
|
104
|
+
constructor(params = {}) {
|
|
105
|
+
const toolName = params.name ?? "sie_extract";
|
|
106
|
+
const toolDescription = params.description ?? "Extract structured information from text. Input should be text to analyze. Returns JSON with entities, relations, classifications, and detected objects.";
|
|
107
|
+
super({});
|
|
108
|
+
this.name = toolName;
|
|
109
|
+
this.description = toolDescription;
|
|
110
|
+
const {
|
|
111
|
+
baseUrl = "http://localhost:8080",
|
|
112
|
+
model = "urchade/gliner_multi-v2.1",
|
|
113
|
+
client,
|
|
114
|
+
labels = ["person", "organization", "location"],
|
|
115
|
+
threshold,
|
|
116
|
+
gpu,
|
|
117
|
+
timeout = 18e4
|
|
118
|
+
} = params;
|
|
119
|
+
this.baseUrl = baseUrl;
|
|
120
|
+
this.model = model;
|
|
121
|
+
this.labels = labels;
|
|
122
|
+
this.threshold = threshold;
|
|
123
|
+
this._client = client;
|
|
124
|
+
this._ownsClient = !client;
|
|
125
|
+
this.clientOptions = {
|
|
126
|
+
timeout,
|
|
127
|
+
gpu
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Get or create the SIEClient.
|
|
132
|
+
*/
|
|
133
|
+
get client() {
|
|
134
|
+
if (!this._client) {
|
|
135
|
+
this._client = new SIEClient2(this.baseUrl, this.clientOptions);
|
|
136
|
+
}
|
|
137
|
+
return this._client;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Extract structured information from text.
|
|
141
|
+
*
|
|
142
|
+
* @param text - Text to extract from.
|
|
143
|
+
* @returns JSON string with entities, relations, classifications, and objects.
|
|
144
|
+
*/
|
|
145
|
+
async _call(text) {
|
|
146
|
+
const extractOptions = {
|
|
147
|
+
labels: this.labels
|
|
148
|
+
};
|
|
149
|
+
if (this.threshold !== void 0) {
|
|
150
|
+
extractOptions.threshold = this.threshold;
|
|
151
|
+
}
|
|
152
|
+
const result = await this.client.extract(this.model, { text }, extractOptions);
|
|
153
|
+
return JSON.stringify({
|
|
154
|
+
entities: result.entities.map((e) => ({
|
|
155
|
+
text: e.text,
|
|
156
|
+
label: e.label,
|
|
157
|
+
score: e.score,
|
|
158
|
+
...e.start !== void 0 && { start: e.start },
|
|
159
|
+
...e.end !== void 0 && { end: e.end }
|
|
160
|
+
})),
|
|
161
|
+
relations: result.relations.map((r) => ({
|
|
162
|
+
head: r.head,
|
|
163
|
+
tail: r.tail,
|
|
164
|
+
relation: r.relation,
|
|
165
|
+
score: r.score
|
|
166
|
+
})),
|
|
167
|
+
classifications: result.classifications.map((c) => ({
|
|
168
|
+
label: c.label,
|
|
169
|
+
score: c.score
|
|
170
|
+
})),
|
|
171
|
+
objects: result.objects.map((o) => ({
|
|
172
|
+
label: o.label,
|
|
173
|
+
score: o.score,
|
|
174
|
+
bbox: o.bbox
|
|
175
|
+
}))
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Close the underlying client connection.
|
|
180
|
+
*/
|
|
181
|
+
async close() {
|
|
182
|
+
if (this._client && this._ownsClient) {
|
|
183
|
+
await this._client.close();
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
// src/index.ts
|
|
7
189
|
var SIEEmbeddings = class extends Embeddings {
|
|
8
190
|
model;
|
|
9
191
|
instruction;
|
|
@@ -41,7 +223,7 @@ var SIEEmbeddings = class extends Embeddings {
|
|
|
41
223
|
get client() {
|
|
42
224
|
if (!this._client) {
|
|
43
225
|
const baseUrl = this.baseUrl ?? "http://localhost:8080";
|
|
44
|
-
this._client = new
|
|
226
|
+
this._client = new SIEClient3(baseUrl, this.clientOptions);
|
|
45
227
|
}
|
|
46
228
|
return this._client;
|
|
47
229
|
}
|
|
@@ -63,7 +245,7 @@ var SIEEmbeddings = class extends Embeddings {
|
|
|
63
245
|
isQuery: false
|
|
64
246
|
};
|
|
65
247
|
const results = await this.client.encode(this.model, items, options);
|
|
66
|
-
return results.map((result) =>
|
|
248
|
+
return results.map((result) => denseEmbedding(result));
|
|
67
249
|
}
|
|
68
250
|
/**
|
|
69
251
|
* Embed a single query text.
|
|
@@ -81,17 +263,7 @@ var SIEEmbeddings = class extends Embeddings {
|
|
|
81
263
|
isQuery: true
|
|
82
264
|
};
|
|
83
265
|
const result = await this.client.encode(this.model, { text }, options);
|
|
84
|
-
return
|
|
85
|
-
}
|
|
86
|
-
/**
|
|
87
|
-
* Extract dense embedding from encode result.
|
|
88
|
-
*/
|
|
89
|
-
extractDense(result) {
|
|
90
|
-
const dense = result.dense;
|
|
91
|
-
if (!dense) {
|
|
92
|
-
throw new Error("Encode result missing dense embedding");
|
|
93
|
-
}
|
|
94
|
-
return toNumberArray(dense);
|
|
266
|
+
return denseEmbedding(result);
|
|
95
267
|
}
|
|
96
268
|
/**
|
|
97
269
|
* Close the underlying client connection.
|
|
@@ -126,7 +298,7 @@ var SIESparseEncoder = class {
|
|
|
126
298
|
*/
|
|
127
299
|
get client() {
|
|
128
300
|
if (!this._client) {
|
|
129
|
-
this._client = new
|
|
301
|
+
this._client = new SIEClient3(this.baseUrl, this.clientOptions);
|
|
130
302
|
}
|
|
131
303
|
return this._client;
|
|
132
304
|
}
|
|
@@ -146,7 +318,7 @@ var SIESparseEncoder = class {
|
|
|
146
318
|
isQuery: true
|
|
147
319
|
};
|
|
148
320
|
const results = await this.client.encode(this.model, items, options);
|
|
149
|
-
return results.map((result) =>
|
|
321
|
+
return results.map((result) => sparseEmbedding(result));
|
|
150
322
|
}
|
|
151
323
|
/**
|
|
152
324
|
* Encode document texts to sparse vectors.
|
|
@@ -164,20 +336,7 @@ var SIESparseEncoder = class {
|
|
|
164
336
|
isQuery: false
|
|
165
337
|
};
|
|
166
338
|
const results = await this.client.encode(this.model, items, options);
|
|
167
|
-
return results.map((result) =>
|
|
168
|
-
}
|
|
169
|
-
/**
|
|
170
|
-
* Extract sparse embedding from encode result.
|
|
171
|
-
*/
|
|
172
|
-
extractSparse(result) {
|
|
173
|
-
const sparse = result.sparse;
|
|
174
|
-
if (!sparse) {
|
|
175
|
-
return { indices: [], values: [] };
|
|
176
|
-
}
|
|
177
|
-
return {
|
|
178
|
-
indices: toNumberArray(sparse.indices),
|
|
179
|
-
values: toNumberArray(sparse.values)
|
|
180
|
-
};
|
|
339
|
+
return results.map((result) => sparseEmbedding(result));
|
|
181
340
|
}
|
|
182
341
|
/**
|
|
183
342
|
* Close the underlying client connection.
|
|
@@ -190,6 +349,8 @@ var SIESparseEncoder = class {
|
|
|
190
349
|
};
|
|
191
350
|
export {
|
|
192
351
|
SIEEmbeddings,
|
|
352
|
+
SIEExtractor,
|
|
353
|
+
SIEReranker,
|
|
193
354
|
SIESparseEncoder
|
|
194
355
|
};
|
|
195
356
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n toNumberArray,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractDense(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return this.extractDense(result as EncodeResult);\n }\n\n /**\n * Extract dense embedding from encode result.\n */\n private extractDense(result: EncodeResult): number[] {\n const dense = result.dense;\n if (!dense) {\n throw new Error(\"Encode result missing dense embedding\");\n }\n return toNumberArray(dense);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Extract sparse embedding from encode result.\n */\n private extractSparse(result: EncodeResult): { indices: number[]; values: number[] } {\n const sparse = result.sparse;\n if (!sparse) {\n return { indices: [], values: [] };\n }\n\n return {\n indices: toNumberArray(sparse.indices),\n values: toNumberArray(sparse.values),\n };\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";AAqBA,SAAS,kBAAyC;AAClD;AAAA,EAIE;AAAA,EAEA;AAAA,OACK;AA8EA,IAAM,gBAAN,cAA4B,WAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAI,UAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,aAAa,MAAM,CAAC;AAAA,EAC9E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,WAAO,KAAK,aAAa,MAAsB;AAAA,EACjD;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,QAAgC;AACnD,UAAM,QAAQ,OAAO;AACrB,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AACA,WAAO,cAAc,KAAK;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,QAA+D;AACnF,UAAM,SAAS,OAAO;AACtB,QAAI,CAAC,QAAQ;AACX,aAAO,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAAA,IACnC;AAEA,WAAO;AAAA,MACL,SAAS,cAAc,OAAO,OAAO;AAAA,MACrC,QAAQ,cAAc,OAAO,MAAM;AAAA,IACrC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/rerankers.ts","../src/extractors.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n * - SIEReranker: Cross-encoder reranking for retrieval pipelines\n * - SIEExtractor: Entity extraction tool for agents\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n denseEmbedding,\n sparseEmbedding,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => denseEmbedding(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return denseEmbedding(result as EncodeResult);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\nexport { SIEReranker, type SIERerankerParams } from \"./rerankers.js\";\nexport { SIEExtractor, type SIEExtractorParams } from \"./extractors.js\";\n","/**\n * SIE reranker integration for LangChain.js\n *\n * Provides document reranking using SIE's score endpoint:\n * - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n * ```\n */\n\nimport type { DocumentInterface } from \"@langchain/core/documents\";\nimport { BaseDocumentCompressor } from \"@langchain/core/retrievers/document_compressors\";\nimport { SIEClient, type SIEClientOptions } from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEReranker.\n */\nexport interface SIERerankerParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Reranker model name/ID.\n * @default \"jinaai/jina-reranker-v2-base-multilingual\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Number of top documents to return. If undefined, returns all documents.\n */\n topK?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain document compressor using SIE's reranking.\n *\n * Wraps SIEClient.score() to implement BaseDocumentCompressor.\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * // Rerank retrieved documents\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n *\n * // Use in a retrieval pipeline\n * import { ContextualCompressionRetriever } from \"langchain/retrievers/contextual_compression\";\n *\n * const compressionRetriever = new ContextualCompressionRetriever({\n * baseCompressor: reranker,\n * baseRetriever: vectorStoreRetriever,\n * });\n * ```\n */\nexport class SIEReranker extends BaseDocumentCompressor {\n private readonly model: string;\n private readonly topK?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIERerankerParams = {}) {\n super();\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"jinaai/jina-reranker-v2-base-multilingual\",\n client,\n topK,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.topK = topK;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Rerank documents by relevance to query.\n *\n * @param documents - Documents to rerank.\n * @param query - Query to rank documents against.\n * @returns Reranked documents with relevance_score in metadata, sorted by score descending.\n */\n async compressDocuments(\n documents: DocumentInterface[],\n query: string,\n ): Promise<DocumentInterface[]> {\n if (documents.length === 0) {\n return [];\n }\n\n const queryItem = { text: query };\n const docItems = documents.map((doc) => ({ text: doc.pageContent }));\n\n const result = await this.client.score(this.model, queryItem, docItems);\n\n // Map score entries back to documents with relevance_score in metadata.\n // ScoreResult.scores are already sorted by score descending.\n const reranked: DocumentInterface[] = [];\n for (const entry of result.scores) {\n const idx = Number.parseInt(entry.itemId, 10);\n const doc = documents[idx];\n if (doc) {\n reranked.push({\n pageContent: doc.pageContent,\n metadata: { ...doc.metadata, relevance_score: entry.score },\n id: doc.id,\n });\n }\n }\n\n if (this.topK !== undefined) {\n return reranked.slice(0, this.topK);\n }\n return reranked;\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n","/**\n * SIE extraction tool for LangChain.js\n *\n * Provides extraction using SIE's extract endpoint:\n * - SIEExtractor: Extraction tool implementing LangChain Tool\n *\n * Returns entities, relations, classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * const extractor = new SIEExtractor({\n * baseUrl: \"http://localhost:8080\",\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n *\n * const result = await extractor.invoke(\"John Smith works at Acme Corp in NYC\");\n * const parsed = JSON.parse(result);\n * console.log(parsed.entities);\n * console.log(parsed.relations);\n * ```\n */\n\nimport { Tool } from \"@langchain/core/tools\";\nimport {\n type ExtractOptions,\n type ExtractResult,\n SIEClient,\n type SIEClientOptions,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEExtractor.\n */\nexport interface SIEExtractorParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Extraction model name/ID.\n * @default \"urchade/gliner_multi-v2.1\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Labels to extract (entity types, relation types, or classification labels).\n * @default [\"person\", \"organization\", \"location\"]\n */\n labels?: string[];\n\n /**\n * Minimum confidence threshold (0-1).\n */\n threshold?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n\n /**\n * Tool name for use in agents.\n * @default \"sie_extract\"\n */\n name?: string;\n\n /**\n * Tool description for use in agents.\n */\n description?: string;\n}\n\n/**\n * LangChain tool for extraction using SIE.\n *\n * Wraps SIEClient.extract() to implement the LangChain Tool interface\n * for use in agents and chains. Returns JSON with entities, relations,\n * classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * // Direct usage\n * const extractor = new SIEExtractor({\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n * const result = await extractor.invoke(\"John Smith works at Acme Corp\");\n * const parsed = JSON.parse(result);\n *\n * // Use in an agent\n * import { ChatOpenAI } from \"@langchain/openai\";\n * import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n *\n * const agent = createReactAgent({\n * llm: new ChatOpenAI(),\n * tools: [extractor],\n * });\n * ```\n */\nexport class SIEExtractor extends Tool {\n name: string;\n description: string;\n\n private readonly model: string;\n private readonly labels: string[];\n private readonly threshold?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEExtractorParams = {}) {\n const toolName = params.name ?? \"sie_extract\";\n const toolDescription =\n params.description ??\n \"Extract structured information from text. \" +\n \"Input should be text to analyze. \" +\n \"Returns JSON with entities, relations, classifications, and detected objects.\";\n\n super({});\n\n this.name = toolName;\n this.description = toolDescription;\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"urchade/gliner_multi-v2.1\",\n client,\n labels = [\"person\", \"organization\", \"location\"],\n threshold,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.labels = labels;\n this.threshold = threshold;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Extract structured information from text.\n *\n * @param text - Text to extract from.\n * @returns JSON string with entities, relations, classifications, and objects.\n */\n async _call(text: string): Promise<string> {\n const extractOptions: ExtractOptions = {\n labels: this.labels,\n };\n if (this.threshold !== undefined) {\n extractOptions.threshold = this.threshold;\n }\n\n const result: ExtractResult = await this.client.extract(this.model, { text }, extractOptions);\n\n return JSON.stringify({\n entities: result.entities.map((e) => ({\n text: e.text,\n label: e.label,\n score: e.score,\n ...(e.start !== undefined && { start: e.start }),\n ...(e.end !== undefined && { end: e.end }),\n })),\n relations: result.relations.map((r) => ({\n head: r.head,\n tail: r.tail,\n relation: r.relation,\n score: r.score,\n })),\n classifications: result.classifications.map((c) => ({\n label: c.label,\n score: c.score,\n })),\n objects: result.objects.map((o) => ({\n label: o.label,\n score: o.score,\n bbox: o.bbox,\n })),\n });\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";AAuBA,SAAS,kBAAyC;AAClD;AAAA,EAIE,aAAAA;AAAA,EAEA;AAAA,EACA;AAAA,OACK;;;ACXP,SAAS,8BAA8B;AACvC,SAAS,iBAAwC;AAoE1C,IAAM,cAAN,cAA0B,uBAAuB;AAAA,EACrC;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA4B,CAAC,GAAG;AAC1C,UAAM;AAEN,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,kBACJ,WACA,OAC8B;AAC9B,QAAI,UAAU,WAAW,GAAG;AAC1B,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,YAAY,EAAE,MAAM,MAAM;AAChC,UAAM,WAAW,UAAU,IAAI,CAAC,SAAS,EAAE,MAAM,IAAI,YAAY,EAAE;AAEnE,UAAM,SAAS,MAAM,KAAK,OAAO,MAAM,KAAK,OAAO,WAAW,QAAQ;AAItE,UAAM,WAAgC,CAAC;AACvC,eAAW,SAAS,OAAO,QAAQ;AACjC,YAAM,MAAM,OAAO,SAAS,MAAM,QAAQ,EAAE;AAC5C,YAAM,MAAM,UAAU,GAAG;AACzB,UAAI,KAAK;AACP,iBAAS,KAAK;AAAA,UACZ,aAAa,IAAI;AAAA,UACjB,UAAU,EAAE,GAAG,IAAI,UAAU,iBAAiB,MAAM,MAAM;AAAA,UAC1D,IAAI,IAAI;AAAA,QACV,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,KAAK,SAAS,QAAW;AAC3B,aAAO,SAAS,MAAM,GAAG,KAAK,IAAI;AAAA,IACpC;AACA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AC5JA,SAAS,YAAY;AACrB;AAAA,EAGE,aAAAC;AAAA,OAEK;AAuFA,IAAM,eAAN,cAA2B,KAAK;AAAA,EACrC;AAAA,EACA;AAAA,EAEiB;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA6B,CAAC,GAAG;AAC3C,UAAM,WAAW,OAAO,QAAQ;AAChC,UAAM,kBACJ,OAAO,eACP;AAIF,UAAM,CAAC,CAAC;AAER,SAAK,OAAO;AACZ,SAAK,cAAc;AAEnB,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,SAAS,CAAC,UAAU,gBAAgB,UAAU;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,YAAY;AACjB,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAIA,WAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,MAAM,MAA+B;AACzC,UAAM,iBAAiC;AAAA,MACrC,QAAQ,KAAK;AAAA,IACf;AACA,QAAI,KAAK,cAAc,QAAW;AAChC,qBAAe,YAAY,KAAK;AAAA,IAClC;AAEA,UAAM,SAAwB,MAAM,KAAK,OAAO,QAAQ,KAAK,OAAO,EAAE,KAAK,GAAG,cAAc;AAE5F,WAAO,KAAK,UAAU;AAAA,MACpB,UAAU,OAAO,SAAS,IAAI,CAAC,OAAO;AAAA,QACpC,MAAM,EAAE;AAAA,QACR,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,GAAI,EAAE,UAAU,UAAa,EAAE,OAAO,EAAE,MAAM;AAAA,QAC9C,GAAI,EAAE,QAAQ,UAAa,EAAE,KAAK,EAAE,IAAI;AAAA,MAC1C,EAAE;AAAA,MACF,WAAW,OAAO,UAAU,IAAI,CAAC,OAAO;AAAA,QACtC,MAAM,EAAE;AAAA,QACR,MAAM,EAAE;AAAA,QACR,UAAU,EAAE;AAAA,QACZ,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,iBAAiB,OAAO,gBAAgB,IAAI,CAAC,OAAO;AAAA,QAClD,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,SAAS,OAAO,QAAQ,IAAI,CAAC,OAAO;AAAA,QAClC,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,MAAM,EAAE;AAAA,MACV,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AFpHO,IAAM,gBAAN,cAA4B,WAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAIC,WAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,eAAe,MAAM,CAAC;AAAA,EAC3E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,WAAO,eAAe,MAAsB;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAIA,WAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,gBAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,gBAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":["SIEClient","SIEClient","SIEClient"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@superlinked/sie-langchain",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.10",
|
|
4
4
|
"description": "SIE embeddings integration for LangChain.js",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"dist"
|
|
18
18
|
],
|
|
19
19
|
"dependencies": {
|
|
20
|
-
"@superlinked/sie-sdk": "0.1.
|
|
20
|
+
"@superlinked/sie-sdk": "0.1.10"
|
|
21
21
|
},
|
|
22
22
|
"peerDependencies": {
|
|
23
23
|
"@langchain/core": ">=0.2.0"
|