@superlinked/sie-langchain 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -21,11 +21,192 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
23
  SIEEmbeddings: () => SIEEmbeddings,
24
+ SIEExtractor: () => SIEExtractor,
25
+ SIEReranker: () => SIEReranker,
24
26
  SIESparseEncoder: () => SIESparseEncoder
25
27
  });
26
28
  module.exports = __toCommonJS(index_exports);
27
29
  var import_embeddings = require("@langchain/core/embeddings");
30
+ var import_sie_sdk3 = require("@superlinked/sie-sdk");
31
+
32
+ // src/rerankers.ts
33
+ var import_document_compressors = require("@langchain/core/retrievers/document_compressors");
28
34
  var import_sie_sdk = require("@superlinked/sie-sdk");
35
+ var SIEReranker = class extends import_document_compressors.BaseDocumentCompressor {
36
+ model;
37
+ topK;
38
+ _client;
39
+ _ownsClient;
40
+ baseUrl;
41
+ clientOptions;
42
+ constructor(params = {}) {
43
+ super();
44
+ const {
45
+ baseUrl = "http://localhost:8080",
46
+ model = "jinaai/jina-reranker-v2-base-multilingual",
47
+ client,
48
+ topK,
49
+ gpu,
50
+ timeout = 18e4
51
+ } = params;
52
+ this.baseUrl = baseUrl;
53
+ this.model = model;
54
+ this.topK = topK;
55
+ this._client = client;
56
+ this._ownsClient = !client;
57
+ this.clientOptions = {
58
+ timeout,
59
+ gpu
60
+ };
61
+ }
62
+ /**
63
+ * Get or create the SIEClient.
64
+ */
65
+ get client() {
66
+ if (!this._client) {
67
+ this._client = new import_sie_sdk.SIEClient(this.baseUrl, this.clientOptions);
68
+ }
69
+ return this._client;
70
+ }
71
+ /**
72
+ * Rerank documents by relevance to query.
73
+ *
74
+ * @param documents - Documents to rerank.
75
+ * @param query - Query to rank documents against.
76
+ * @returns Reranked documents with relevance_score in metadata, sorted by score descending.
77
+ */
78
+ async compressDocuments(documents, query) {
79
+ if (documents.length === 0) {
80
+ return [];
81
+ }
82
+ const queryItem = { text: query };
83
+ const docItems = documents.map((doc) => ({ text: doc.pageContent }));
84
+ const result = await this.client.score(this.model, queryItem, docItems);
85
+ const reranked = [];
86
+ for (const entry of result.scores) {
87
+ const idx = Number.parseInt(entry.itemId, 10);
88
+ const doc = documents[idx];
89
+ if (doc) {
90
+ reranked.push({
91
+ pageContent: doc.pageContent,
92
+ metadata: { ...doc.metadata, relevance_score: entry.score },
93
+ id: doc.id
94
+ });
95
+ }
96
+ }
97
+ if (this.topK !== void 0) {
98
+ return reranked.slice(0, this.topK);
99
+ }
100
+ return reranked;
101
+ }
102
+ /**
103
+ * Close the underlying client connection.
104
+ */
105
+ async close() {
106
+ if (this._client && this._ownsClient) {
107
+ await this._client.close();
108
+ }
109
+ }
110
+ };
111
+
112
+ // src/extractors.ts
113
+ var import_tools = require("@langchain/core/tools");
114
+ var import_sie_sdk2 = require("@superlinked/sie-sdk");
115
+ var SIEExtractor = class extends import_tools.Tool {
116
+ name;
117
+ description;
118
+ model;
119
+ labels;
120
+ threshold;
121
+ _client;
122
+ _ownsClient;
123
+ baseUrl;
124
+ clientOptions;
125
+ constructor(params = {}) {
126
+ const toolName = params.name ?? "sie_extract";
127
+ const toolDescription = params.description ?? "Extract structured information from text. Input should be text to analyze. Returns JSON with entities, relations, classifications, and detected objects.";
128
+ super({});
129
+ this.name = toolName;
130
+ this.description = toolDescription;
131
+ const {
132
+ baseUrl = "http://localhost:8080",
133
+ model = "urchade/gliner_multi-v2.1",
134
+ client,
135
+ labels = ["person", "organization", "location"],
136
+ threshold,
137
+ gpu,
138
+ timeout = 18e4
139
+ } = params;
140
+ this.baseUrl = baseUrl;
141
+ this.model = model;
142
+ this.labels = labels;
143
+ this.threshold = threshold;
144
+ this._client = client;
145
+ this._ownsClient = !client;
146
+ this.clientOptions = {
147
+ timeout,
148
+ gpu
149
+ };
150
+ }
151
+ /**
152
+ * Get or create the SIEClient.
153
+ */
154
+ get client() {
155
+ if (!this._client) {
156
+ this._client = new import_sie_sdk2.SIEClient(this.baseUrl, this.clientOptions);
157
+ }
158
+ return this._client;
159
+ }
160
+ /**
161
+ * Extract structured information from text.
162
+ *
163
+ * @param text - Text to extract from.
164
+ * @returns JSON string with entities, relations, classifications, and objects.
165
+ */
166
+ async _call(text) {
167
+ const extractOptions = {
168
+ labels: this.labels
169
+ };
170
+ if (this.threshold !== void 0) {
171
+ extractOptions.threshold = this.threshold;
172
+ }
173
+ const result = await this.client.extract(this.model, { text }, extractOptions);
174
+ return JSON.stringify({
175
+ entities: result.entities.map((e) => ({
176
+ text: e.text,
177
+ label: e.label,
178
+ score: e.score,
179
+ ...e.start !== void 0 && { start: e.start },
180
+ ...e.end !== void 0 && { end: e.end }
181
+ })),
182
+ relations: result.relations.map((r) => ({
183
+ head: r.head,
184
+ tail: r.tail,
185
+ relation: r.relation,
186
+ score: r.score
187
+ })),
188
+ classifications: result.classifications.map((c) => ({
189
+ label: c.label,
190
+ score: c.score
191
+ })),
192
+ objects: result.objects.map((o) => ({
193
+ label: o.label,
194
+ score: o.score,
195
+ bbox: o.bbox
196
+ }))
197
+ });
198
+ }
199
+ /**
200
+ * Close the underlying client connection.
201
+ */
202
+ async close() {
203
+ if (this._client && this._ownsClient) {
204
+ await this._client.close();
205
+ }
206
+ }
207
+ };
208
+
209
+ // src/index.ts
29
210
  var SIEEmbeddings = class extends import_embeddings.Embeddings {
30
211
  model;
31
212
  instruction;
@@ -63,7 +244,7 @@ var SIEEmbeddings = class extends import_embeddings.Embeddings {
63
244
  get client() {
64
245
  if (!this._client) {
65
246
  const baseUrl = this.baseUrl ?? "http://localhost:8080";
66
- this._client = new import_sie_sdk.SIEClient(baseUrl, this.clientOptions);
247
+ this._client = new import_sie_sdk3.SIEClient(baseUrl, this.clientOptions);
67
248
  }
68
249
  return this._client;
69
250
  }
@@ -85,7 +266,7 @@ var SIEEmbeddings = class extends import_embeddings.Embeddings {
85
266
  isQuery: false
86
267
  };
87
268
  const results = await this.client.encode(this.model, items, options);
88
- return results.map((result) => this.extractDense(result));
269
+ return results.map((result) => (0, import_sie_sdk3.denseEmbedding)(result));
89
270
  }
90
271
  /**
91
272
  * Embed a single query text.
@@ -103,17 +284,7 @@ var SIEEmbeddings = class extends import_embeddings.Embeddings {
103
284
  isQuery: true
104
285
  };
105
286
  const result = await this.client.encode(this.model, { text }, options);
106
- return this.extractDense(result);
107
- }
108
- /**
109
- * Extract dense embedding from encode result.
110
- */
111
- extractDense(result) {
112
- const dense = result.dense;
113
- if (!dense) {
114
- throw new Error("Encode result missing dense embedding");
115
- }
116
- return (0, import_sie_sdk.toNumberArray)(dense);
287
+ return (0, import_sie_sdk3.denseEmbedding)(result);
117
288
  }
118
289
  /**
119
290
  * Close the underlying client connection.
@@ -148,7 +319,7 @@ var SIESparseEncoder = class {
148
319
  */
149
320
  get client() {
150
321
  if (!this._client) {
151
- this._client = new import_sie_sdk.SIEClient(this.baseUrl, this.clientOptions);
322
+ this._client = new import_sie_sdk3.SIEClient(this.baseUrl, this.clientOptions);
152
323
  }
153
324
  return this._client;
154
325
  }
@@ -168,7 +339,7 @@ var SIESparseEncoder = class {
168
339
  isQuery: true
169
340
  };
170
341
  const results = await this.client.encode(this.model, items, options);
171
- return results.map((result) => this.extractSparse(result));
342
+ return results.map((result) => (0, import_sie_sdk3.sparseEmbedding)(result));
172
343
  }
173
344
  /**
174
345
  * Encode document texts to sparse vectors.
@@ -186,20 +357,7 @@ var SIESparseEncoder = class {
186
357
  isQuery: false
187
358
  };
188
359
  const results = await this.client.encode(this.model, items, options);
189
- return results.map((result) => this.extractSparse(result));
190
- }
191
- /**
192
- * Extract sparse embedding from encode result.
193
- */
194
- extractSparse(result) {
195
- const sparse = result.sparse;
196
- if (!sparse) {
197
- return { indices: [], values: [] };
198
- }
199
- return {
200
- indices: (0, import_sie_sdk.toNumberArray)(sparse.indices),
201
- values: (0, import_sie_sdk.toNumberArray)(sparse.values)
202
- };
360
+ return results.map((result) => (0, import_sie_sdk3.sparseEmbedding)(result));
203
361
  }
204
362
  /**
205
363
  * Close the underlying client connection.
@@ -213,6 +371,8 @@ var SIESparseEncoder = class {
213
371
  // Annotate the CommonJS export names for ESM import in node:
214
372
  0 && (module.exports = {
215
373
  SIEEmbeddings,
374
+ SIEExtractor,
375
+ SIEReranker,
216
376
  SIESparseEncoder
217
377
  });
218
378
  //# sourceMappingURL=index.cjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n toNumberArray,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractDense(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return this.extractDense(result as EncodeResult);\n }\n\n /**\n * Extract dense embedding from encode result.\n */\n private extractDense(result: EncodeResult): number[] {\n const dense = result.dense;\n if (!dense) {\n throw new Error(\"Encode result missing dense embedding\");\n }\n return toNumberArray(dense);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Extract sparse embedding from encode result.\n */\n private extractSparse(result: EncodeResult): { indices: number[]; values: number[] } {\n const sparse = result.sparse;\n if (!sparse) {\n return { indices: [], values: [] };\n }\n\n return {\n indices: toNumberArray(sparse.indices),\n values: toNumberArray(sparse.values),\n };\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAqBA,wBAAkD;AAClD,qBAOO;AA8EA,IAAM,gBAAN,cAA4B,6BAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAI,yBAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,aAAa,MAAM,CAAC;AAAA,EAC9E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,WAAO,KAAK,aAAa,MAAsB;AAAA,EACjD;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,QAAgC;AACnD,UAAM,QAAQ,OAAO;AACrB,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AACA,eAAO,8BAAc,KAAK;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,QAA+D;AACnF,UAAM,SAAS,OAAO;AACtB,QAAI,CAAC,QAAQ;AACX,aAAO,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAAA,IACnC;AAEA,WAAO;AAAA,MACL,aAAS,8BAAc,OAAO,OAAO;AAAA,MACrC,YAAQ,8BAAc,OAAO,MAAM;AAAA,IACrC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts","../src/rerankers.ts","../src/extractors.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n * - SIEReranker: Cross-encoder reranking for retrieval pipelines\n * - SIEExtractor: Entity extraction tool for agents\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n denseEmbedding,\n sparseEmbedding,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => denseEmbedding(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return denseEmbedding(result as EncodeResult);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\nexport { SIEReranker, type SIERerankerParams } from \"./rerankers.js\";\nexport { SIEExtractor, type SIEExtractorParams } from \"./extractors.js\";\n","/**\n * SIE reranker integration for LangChain.js\n *\n * Provides document reranking using SIE's score endpoint:\n * - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n * ```\n */\n\nimport type { DocumentInterface } from \"@langchain/core/documents\";\nimport { BaseDocumentCompressor } from \"@langchain/core/retrievers/document_compressors\";\nimport { SIEClient, type SIEClientOptions } from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEReranker.\n */\nexport interface SIERerankerParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Reranker model name/ID.\n * @default \"jinaai/jina-reranker-v2-base-multilingual\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Number of top documents to return. If undefined, returns all documents.\n */\n topK?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain document compressor using SIE's reranking.\n *\n * Wraps SIEClient.score() to implement BaseDocumentCompressor.\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * // Rerank retrieved documents\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n *\n * // Use in a retrieval pipeline\n * import { ContextualCompressionRetriever } from \"langchain/retrievers/contextual_compression\";\n *\n * const compressionRetriever = new ContextualCompressionRetriever({\n * baseCompressor: reranker,\n * baseRetriever: vectorStoreRetriever,\n * });\n * ```\n */\nexport class SIEReranker extends BaseDocumentCompressor {\n private readonly model: string;\n private readonly topK?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIERerankerParams = {}) {\n super();\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"jinaai/jina-reranker-v2-base-multilingual\",\n client,\n topK,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.topK = topK;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Rerank documents by relevance to query.\n *\n * @param documents - Documents to rerank.\n * @param query - Query to rank documents against.\n * @returns Reranked documents with relevance_score in metadata, sorted by score descending.\n */\n async compressDocuments(\n documents: DocumentInterface[],\n query: string,\n ): Promise<DocumentInterface[]> {\n if (documents.length === 0) {\n return [];\n }\n\n const queryItem = { text: query };\n const docItems = documents.map((doc) => ({ text: doc.pageContent }));\n\n const result = await this.client.score(this.model, queryItem, docItems);\n\n // Map score entries back to documents with relevance_score in metadata.\n // ScoreResult.scores are already sorted by score descending.\n const reranked: DocumentInterface[] = [];\n for (const entry of result.scores) {\n const idx = Number.parseInt(entry.itemId, 10);\n const doc = documents[idx];\n if (doc) {\n reranked.push({\n pageContent: doc.pageContent,\n metadata: { ...doc.metadata, relevance_score: entry.score },\n id: doc.id,\n });\n }\n }\n\n if (this.topK !== undefined) {\n return reranked.slice(0, this.topK);\n }\n return reranked;\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n","/**\n * SIE extraction tool for LangChain.js\n *\n * Provides extraction using SIE's extract endpoint:\n * - SIEExtractor: Extraction tool implementing LangChain Tool\n *\n * Returns entities, relations, classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * const extractor = new SIEExtractor({\n * baseUrl: \"http://localhost:8080\",\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n *\n * const result = await extractor.invoke(\"John Smith works at Acme Corp in NYC\");\n * const parsed = JSON.parse(result);\n * console.log(parsed.entities);\n * console.log(parsed.relations);\n * ```\n */\n\nimport { Tool } from \"@langchain/core/tools\";\nimport {\n type ExtractOptions,\n type ExtractResult,\n SIEClient,\n type SIEClientOptions,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEExtractor.\n */\nexport interface SIEExtractorParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Extraction model name/ID.\n * @default \"urchade/gliner_multi-v2.1\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Labels to extract (entity types, relation types, or classification labels).\n * @default [\"person\", \"organization\", \"location\"]\n */\n labels?: string[];\n\n /**\n * Minimum confidence threshold (0-1).\n */\n threshold?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n\n /**\n * Tool name for use in agents.\n * @default \"sie_extract\"\n */\n name?: string;\n\n /**\n * Tool description for use in agents.\n */\n description?: string;\n}\n\n/**\n * LangChain tool for extraction using SIE.\n *\n * Wraps SIEClient.extract() to implement the LangChain Tool interface\n * for use in agents and chains. Returns JSON with entities, relations,\n * classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * // Direct usage\n * const extractor = new SIEExtractor({\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n * const result = await extractor.invoke(\"John Smith works at Acme Corp\");\n * const parsed = JSON.parse(result);\n *\n * // Use in an agent\n * import { ChatOpenAI } from \"@langchain/openai\";\n * import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n *\n * const agent = createReactAgent({\n * llm: new ChatOpenAI(),\n * tools: [extractor],\n * });\n * ```\n */\nexport class SIEExtractor extends Tool {\n name: string;\n description: string;\n\n private readonly model: string;\n private readonly labels: string[];\n private readonly threshold?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEExtractorParams = {}) {\n const toolName = params.name ?? \"sie_extract\";\n const toolDescription =\n params.description ??\n \"Extract structured information from text. \" +\n \"Input should be text to analyze. \" +\n \"Returns JSON with entities, relations, classifications, and detected objects.\";\n\n super({});\n\n this.name = toolName;\n this.description = toolDescription;\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"urchade/gliner_multi-v2.1\",\n client,\n labels = [\"person\", \"organization\", \"location\"],\n threshold,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.labels = labels;\n this.threshold = threshold;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Extract structured information from text.\n *\n * @param text - Text to extract from.\n * @returns JSON string with entities, relations, classifications, and objects.\n */\n async _call(text: string): Promise<string> {\n const extractOptions: ExtractOptions = {\n labels: this.labels,\n };\n if (this.threshold !== undefined) {\n extractOptions.threshold = this.threshold;\n }\n\n const result: ExtractResult = await this.client.extract(this.model, { text }, extractOptions);\n\n return JSON.stringify({\n entities: result.entities.map((e) => ({\n text: e.text,\n label: e.label,\n score: e.score,\n ...(e.start !== undefined && { start: e.start }),\n ...(e.end !== undefined && { end: e.end }),\n })),\n relations: result.relations.map((r) => ({\n head: r.head,\n tail: r.tail,\n relation: r.relation,\n score: r.score,\n })),\n classifications: result.classifications.map((c) => ({\n label: c.label,\n score: c.score,\n })),\n objects: result.objects.map((o) => ({\n label: o.label,\n score: o.score,\n bbox: o.bbox,\n })),\n });\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAuBA,wBAAkD;AAClD,IAAAA,kBAQO;;;ACXP,kCAAuC;AACvC,qBAAiD;AAoE1C,IAAM,cAAN,cAA0B,mDAAuB;AAAA,EACrC;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA4B,CAAC,GAAG;AAC1C,UAAM;AAEN,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,yBAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,kBACJ,WACA,OAC8B;AAC9B,QAAI,UAAU,WAAW,GAAG;AAC1B,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,YAAY,EAAE,MAAM,MAAM;AAChC,UAAM,WAAW,UAAU,IAAI,CAAC,SAAS,EAAE,MAAM,IAAI,YAAY,EAAE;AAEnE,UAAM,SAAS,MAAM,KAAK,OAAO,MAAM,KAAK,OAAO,WAAW,QAAQ;AAItE,UAAM,WAAgC,CAAC;AACvC,eAAW,SAAS,OAAO,QAAQ;AACjC,YAAM,MAAM,OAAO,SAAS,MAAM,QAAQ,EAAE;AAC5C,YAAM,MAAM,UAAU,GAAG;AACzB,UAAI,KAAK;AACP,iBAAS,KAAK;AAAA,UACZ,aAAa,IAAI;AAAA,UACjB,UAAU,EAAE,GAAG,IAAI,UAAU,iBAAiB,MAAM,MAAM;AAAA,UAC1D,IAAI,IAAI;AAAA,QACV,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,KAAK,SAAS,QAAW;AAC3B,aAAO,SAAS,MAAM,GAAG,KAAK,IAAI;AAAA,IACpC;AACA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AC5JA,mBAAqB;AACrB,IAAAC,kBAKO;AAuFA,IAAM,eAAN,cAA2B,kBAAK;AAAA,EACrC;AAAA,EACA;AAAA,EAEiB;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA6B,CAAC,GAAG;AAC3C,UAAM,WAAW,OAAO,QAAQ;AAChC,UAAM,kBACJ,OAAO,eACP;AAIF,UAAM,CAAC,CAAC;AAER,SAAK,OAAO;AACZ,SAAK,cAAc;AAEnB,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,SAAS,CAAC,UAAU,gBAAgB,UAAU;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,YAAY;AACjB,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,0BAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,MAAM,MAA+B;AACzC,UAAM,iBAAiC;AAAA,MACrC,QAAQ,KAAK;AAAA,IACf;AACA,QAAI,KAAK,cAAc,QAAW;AAChC,qBAAe,YAAY,KAAK;AAAA,IAClC;AAEA,UAAM,SAAwB,MAAM,KAAK,OAAO,QAAQ,KAAK,OAAO,EAAE,KAAK,GAAG,cAAc;AAE5F,WAAO,KAAK,UAAU;AAAA,MACpB,UAAU,OAAO,SAAS,IAAI,CAAC,OAAO;AAAA,QACpC,MAAM,EAAE;AAAA,QACR,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,GAAI,EAAE,UAAU,UAAa,EAAE,OAAO,EAAE,MAAM;AAAA,QAC9C,GAAI,EAAE,QAAQ,UAAa,EAAE,KAAK,EAAE,IAAI;AAAA,MAC1C,EAAE;AAAA,MACF,WAAW,OAAO,UAAU,IAAI,CAAC,OAAO;AAAA,QACtC,MAAM,EAAE;AAAA,QACR,MAAM,EAAE;AAAA,QACR,UAAU,EAAE;AAAA,QACZ,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,iBAAiB,OAAO,gBAAgB,IAAI,CAAC,OAAO;AAAA,QAClD,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,SAAS,OAAO,QAAQ,IAAI,CAAC,OAAO;AAAA,QAClC,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,MAAM,EAAE;AAAA,MACV,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AFpHO,IAAM,gBAAN,cAA4B,6BAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAI,0BAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,gCAAe,MAAM,CAAC;AAAA,EAC3E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,eAAO,gCAAe,MAAsB;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,0BAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,iCAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,eAAW,iCAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":["import_sie_sdk","import_sie_sdk"]}
package/dist/index.d.cts CHANGED
@@ -1,5 +1,243 @@
1
1
  import { EmbeddingsParams, Embeddings } from '@langchain/core/embeddings';
2
2
  import { SIEClient, DType } from '@superlinked/sie-sdk';
3
+ import { DocumentInterface } from '@langchain/core/documents';
4
+ import { BaseDocumentCompressor } from '@langchain/core/retrievers/document_compressors';
5
+ import { Tool } from '@langchain/core/tools';
6
+
7
+ /**
8
+ * SIE reranker integration for LangChain.js
9
+ *
10
+ * Provides document reranking using SIE's score endpoint:
11
+ * - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * import { SIEReranker } from "@superlinked/sie-langchain";
16
+ *
17
+ * const reranker = new SIEReranker({
18
+ * baseUrl: "http://localhost:8080",
19
+ * model: "jinaai/jina-reranker-v2-base-multilingual",
20
+ * topK: 3,
21
+ * });
22
+ *
23
+ * const reranked = await reranker.compressDocuments(documents, "search query");
24
+ * ```
25
+ */
26
+
27
+ /**
28
+ * Configuration options for SIEReranker.
29
+ */
30
+ interface SIERerankerParams {
31
+ /**
32
+ * URL of the SIE server.
33
+ * @default "http://localhost:8080"
34
+ */
35
+ baseUrl?: string;
36
+ /**
37
+ * Reranker model name/ID.
38
+ * @default "jinaai/jina-reranker-v2-base-multilingual"
39
+ */
40
+ model?: string;
41
+ /**
42
+ * Optional pre-configured SIEClient instance.
43
+ * If provided, baseUrl and other connection options are ignored.
44
+ */
45
+ client?: SIEClient;
46
+ /**
47
+ * Number of top documents to return. If undefined, returns all documents.
48
+ */
49
+ topK?: number;
50
+ /**
51
+ * Target GPU type for routing (e.g., "l4", "a100-80gb").
52
+ */
53
+ gpu?: string;
54
+ /**
55
+ * Request timeout in milliseconds.
56
+ * @default 180000 (3 minutes)
57
+ */
58
+ timeout?: number;
59
+ }
60
+ /**
61
+ * LangChain document compressor using SIE's reranking.
62
+ *
63
+ * Wraps SIEClient.score() to implement BaseDocumentCompressor.
64
+ *
65
+ * @example
66
+ * ```typescript
67
+ * import { SIEReranker } from "@superlinked/sie-langchain";
68
+ *
69
+ * const reranker = new SIEReranker({
70
+ * baseUrl: "http://localhost:8080",
71
+ * model: "jinaai/jina-reranker-v2-base-multilingual",
72
+ * topK: 3,
73
+ * });
74
+ *
75
+ * // Rerank retrieved documents
76
+ * const reranked = await reranker.compressDocuments(documents, "search query");
77
+ *
78
+ * // Use in a retrieval pipeline
79
+ * import { ContextualCompressionRetriever } from "langchain/retrievers/contextual_compression";
80
+ *
81
+ * const compressionRetriever = new ContextualCompressionRetriever({
82
+ * baseCompressor: reranker,
83
+ * baseRetriever: vectorStoreRetriever,
84
+ * });
85
+ * ```
86
+ */
87
+ declare class SIEReranker extends BaseDocumentCompressor {
88
+ private readonly model;
89
+ private readonly topK?;
90
+ private _client;
91
+ private readonly _ownsClient;
92
+ private readonly baseUrl;
93
+ private readonly clientOptions;
94
+ constructor(params?: SIERerankerParams);
95
+ /**
96
+ * Get or create the SIEClient.
97
+ */
98
+ private get client();
99
+ /**
100
+ * Rerank documents by relevance to query.
101
+ *
102
+ * @param documents - Documents to rerank.
103
+ * @param query - Query to rank documents against.
104
+ * @returns Reranked documents with relevance_score in metadata, sorted by score descending.
105
+ */
106
+ compressDocuments(documents: DocumentInterface[], query: string): Promise<DocumentInterface[]>;
107
+ /**
108
+ * Close the underlying client connection.
109
+ */
110
+ close(): Promise<void>;
111
+ }
112
+
113
+ /**
114
+ * SIE extraction tool for LangChain.js
115
+ *
116
+ * Provides extraction using SIE's extract endpoint:
117
+ * - SIEExtractor: Extraction tool implementing LangChain Tool
118
+ *
119
+ * Returns entities, relations, classifications, and detected objects.
120
+ *
121
+ * @example
122
+ * ```typescript
123
+ * import { SIEExtractor } from "@superlinked/sie-langchain";
124
+ *
125
+ * const extractor = new SIEExtractor({
126
+ * baseUrl: "http://localhost:8080",
127
+ * model: "urchade/gliner_multi-v2.1",
128
+ * labels: ["person", "organization", "location"],
129
+ * });
130
+ *
131
+ * const result = await extractor.invoke("John Smith works at Acme Corp in NYC");
132
+ * const parsed = JSON.parse(result);
133
+ * console.log(parsed.entities);
134
+ * console.log(parsed.relations);
135
+ * ```
136
+ */
137
+
138
+ /**
139
+ * Configuration options for SIEExtractor.
140
+ */
141
+ interface SIEExtractorParams {
142
+ /**
143
+ * URL of the SIE server.
144
+ * @default "http://localhost:8080"
145
+ */
146
+ baseUrl?: string;
147
+ /**
148
+ * Extraction model name/ID.
149
+ * @default "urchade/gliner_multi-v2.1"
150
+ */
151
+ model?: string;
152
+ /**
153
+ * Optional pre-configured SIEClient instance.
154
+ * If provided, baseUrl and other connection options are ignored.
155
+ */
156
+ client?: SIEClient;
157
+ /**
158
+ * Labels to extract (entity types, relation types, or classification labels).
159
+ * @default ["person", "organization", "location"]
160
+ */
161
+ labels?: string[];
162
+ /**
163
+ * Minimum confidence threshold (0-1).
164
+ */
165
+ threshold?: number;
166
+ /**
167
+ * Target GPU type for routing (e.g., "l4", "a100-80gb").
168
+ */
169
+ gpu?: string;
170
+ /**
171
+ * Request timeout in milliseconds.
172
+ * @default 180000 (3 minutes)
173
+ */
174
+ timeout?: number;
175
+ /**
176
+ * Tool name for use in agents.
177
+ * @default "sie_extract"
178
+ */
179
+ name?: string;
180
+ /**
181
+ * Tool description for use in agents.
182
+ */
183
+ description?: string;
184
+ }
185
+ /**
186
+ * LangChain tool for extraction using SIE.
187
+ *
188
+ * Wraps SIEClient.extract() to implement the LangChain Tool interface
189
+ * for use in agents and chains. Returns JSON with entities, relations,
190
+ * classifications, and detected objects.
191
+ *
192
+ * @example
193
+ * ```typescript
194
+ * import { SIEExtractor } from "@superlinked/sie-langchain";
195
+ *
196
+ * // Direct usage
197
+ * const extractor = new SIEExtractor({
198
+ * model: "urchade/gliner_multi-v2.1",
199
+ * labels: ["person", "organization", "location"],
200
+ * });
201
+ * const result = await extractor.invoke("John Smith works at Acme Corp");
202
+ * const parsed = JSON.parse(result);
203
+ *
204
+ * // Use in an agent
205
+ * import { ChatOpenAI } from "@langchain/openai";
206
+ * import { createReactAgent } from "@langchain/langgraph/prebuilt";
207
+ *
208
+ * const agent = createReactAgent({
209
+ * llm: new ChatOpenAI(),
210
+ * tools: [extractor],
211
+ * });
212
+ * ```
213
+ */
214
+ declare class SIEExtractor extends Tool {
215
+ name: string;
216
+ description: string;
217
+ private readonly model;
218
+ private readonly labels;
219
+ private readonly threshold?;
220
+ private _client;
221
+ private readonly _ownsClient;
222
+ private readonly baseUrl;
223
+ private readonly clientOptions;
224
+ constructor(params?: SIEExtractorParams);
225
+ /**
226
+ * Get or create the SIEClient.
227
+ */
228
+ private get client();
229
+ /**
230
+ * Extract structured information from text.
231
+ *
232
+ * @param text - Text to extract from.
233
+ * @returns JSON string with entities, relations, classifications, and objects.
234
+ */
235
+ _call(text: string): Promise<string>;
236
+ /**
237
+ * Close the underlying client connection.
238
+ */
239
+ close(): Promise<void>;
240
+ }
3
241
 
4
242
  /**
5
243
  * SIE embeddings integration for LangChain.js
@@ -7,6 +245,8 @@ import { SIEClient, DType } from '@superlinked/sie-sdk';
7
245
  * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:
8
246
  * - SIEEmbeddings: Dense embeddings for vector stores
9
247
  * - SIESparseEncoder: Sparse encoder for hybrid search
248
+ * - SIEReranker: Cross-encoder reranking for retrieval pipelines
249
+ * - SIEExtractor: Entity extraction tool for agents
10
250
  *
11
251
  * @example
12
252
  * ```typescript
@@ -118,10 +358,6 @@ declare class SIEEmbeddings extends Embeddings {
118
358
  * @returns Embedding vector as array of numbers.
119
359
  */
120
360
  embedQuery(text: string): Promise<number[]>;
121
- /**
122
- * Extract dense embedding from encode result.
123
- */
124
- private extractDense;
125
361
  /**
126
362
  * Close the underlying client connection.
127
363
  */
@@ -198,14 +434,10 @@ declare class SIESparseEncoder {
198
434
  indices: number[];
199
435
  values: number[];
200
436
  }>>;
201
- /**
202
- * Extract sparse embedding from encode result.
203
- */
204
- private extractSparse;
205
437
  /**
206
438
  * Close the underlying client connection.
207
439
  */
208
440
  close(): Promise<void>;
209
441
  }
210
442
 
211
- export { SIEEmbeddings, type SIEEmbeddingsParams, SIESparseEncoder, type SIESparseEncoderOptions };
443
+ export { SIEEmbeddings, type SIEEmbeddingsParams, SIEExtractor, type SIEExtractorParams, SIEReranker, type SIERerankerParams, SIESparseEncoder, type SIESparseEncoderOptions };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,243 @@
1
1
  import { EmbeddingsParams, Embeddings } from '@langchain/core/embeddings';
2
2
  import { SIEClient, DType } from '@superlinked/sie-sdk';
3
+ import { DocumentInterface } from '@langchain/core/documents';
4
+ import { BaseDocumentCompressor } from '@langchain/core/retrievers/document_compressors';
5
+ import { Tool } from '@langchain/core/tools';
6
+
7
+ /**
8
+ * SIE reranker integration for LangChain.js
9
+ *
10
+ * Provides document reranking using SIE's score endpoint:
11
+ * - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * import { SIEReranker } from "@superlinked/sie-langchain";
16
+ *
17
+ * const reranker = new SIEReranker({
18
+ * baseUrl: "http://localhost:8080",
19
+ * model: "jinaai/jina-reranker-v2-base-multilingual",
20
+ * topK: 3,
21
+ * });
22
+ *
23
+ * const reranked = await reranker.compressDocuments(documents, "search query");
24
+ * ```
25
+ */
26
+
27
+ /**
28
+ * Configuration options for SIEReranker.
29
+ */
30
+ interface SIERerankerParams {
31
+ /**
32
+ * URL of the SIE server.
33
+ * @default "http://localhost:8080"
34
+ */
35
+ baseUrl?: string;
36
+ /**
37
+ * Reranker model name/ID.
38
+ * @default "jinaai/jina-reranker-v2-base-multilingual"
39
+ */
40
+ model?: string;
41
+ /**
42
+ * Optional pre-configured SIEClient instance.
43
+ * If provided, baseUrl and other connection options are ignored.
44
+ */
45
+ client?: SIEClient;
46
+ /**
47
+ * Number of top documents to return. If undefined, returns all documents.
48
+ */
49
+ topK?: number;
50
+ /**
51
+ * Target GPU type for routing (e.g., "l4", "a100-80gb").
52
+ */
53
+ gpu?: string;
54
+ /**
55
+ * Request timeout in milliseconds.
56
+ * @default 180000 (3 minutes)
57
+ */
58
+ timeout?: number;
59
+ }
60
+ /**
61
+ * LangChain document compressor using SIE's reranking.
62
+ *
63
+ * Wraps SIEClient.score() to implement BaseDocumentCompressor.
64
+ *
65
+ * @example
66
+ * ```typescript
67
+ * import { SIEReranker } from "@superlinked/sie-langchain";
68
+ *
69
+ * const reranker = new SIEReranker({
70
+ * baseUrl: "http://localhost:8080",
71
+ * model: "jinaai/jina-reranker-v2-base-multilingual",
72
+ * topK: 3,
73
+ * });
74
+ *
75
+ * // Rerank retrieved documents
76
+ * const reranked = await reranker.compressDocuments(documents, "search query");
77
+ *
78
+ * // Use in a retrieval pipeline
79
+ * import { ContextualCompressionRetriever } from "langchain/retrievers/contextual_compression";
80
+ *
81
+ * const compressionRetriever = new ContextualCompressionRetriever({
82
+ * baseCompressor: reranker,
83
+ * baseRetriever: vectorStoreRetriever,
84
+ * });
85
+ * ```
86
+ */
87
+ declare class SIEReranker extends BaseDocumentCompressor {
88
+ private readonly model;
89
+ private readonly topK?;
90
+ private _client;
91
+ private readonly _ownsClient;
92
+ private readonly baseUrl;
93
+ private readonly clientOptions;
94
+ constructor(params?: SIERerankerParams);
95
+ /**
96
+ * Get or create the SIEClient.
97
+ */
98
+ private get client();
99
+ /**
100
+ * Rerank documents by relevance to query.
101
+ *
102
+ * @param documents - Documents to rerank.
103
+ * @param query - Query to rank documents against.
104
+ * @returns Reranked documents with relevance_score in metadata, sorted by score descending.
105
+ */
106
+ compressDocuments(documents: DocumentInterface[], query: string): Promise<DocumentInterface[]>;
107
+ /**
108
+ * Close the underlying client connection.
109
+ */
110
+ close(): Promise<void>;
111
+ }
112
+
113
+ /**
114
+ * SIE extraction tool for LangChain.js
115
+ *
116
+ * Provides extraction using SIE's extract endpoint:
117
+ * - SIEExtractor: Extraction tool implementing LangChain Tool
118
+ *
119
+ * Returns entities, relations, classifications, and detected objects.
120
+ *
121
+ * @example
122
+ * ```typescript
123
+ * import { SIEExtractor } from "@superlinked/sie-langchain";
124
+ *
125
+ * const extractor = new SIEExtractor({
126
+ * baseUrl: "http://localhost:8080",
127
+ * model: "urchade/gliner_multi-v2.1",
128
+ * labels: ["person", "organization", "location"],
129
+ * });
130
+ *
131
+ * const result = await extractor.invoke("John Smith works at Acme Corp in NYC");
132
+ * const parsed = JSON.parse(result);
133
+ * console.log(parsed.entities);
134
+ * console.log(parsed.relations);
135
+ * ```
136
+ */
137
+
138
+ /**
139
+ * Configuration options for SIEExtractor.
140
+ */
141
+ interface SIEExtractorParams {
142
+ /**
143
+ * URL of the SIE server.
144
+ * @default "http://localhost:8080"
145
+ */
146
+ baseUrl?: string;
147
+ /**
148
+ * Extraction model name/ID.
149
+ * @default "urchade/gliner_multi-v2.1"
150
+ */
151
+ model?: string;
152
+ /**
153
+ * Optional pre-configured SIEClient instance.
154
+ * If provided, baseUrl and other connection options are ignored.
155
+ */
156
+ client?: SIEClient;
157
+ /**
158
+ * Labels to extract (entity types, relation types, or classification labels).
159
+ * @default ["person", "organization", "location"]
160
+ */
161
+ labels?: string[];
162
+ /**
163
+ * Minimum confidence threshold (0-1).
164
+ */
165
+ threshold?: number;
166
+ /**
167
+ * Target GPU type for routing (e.g., "l4", "a100-80gb").
168
+ */
169
+ gpu?: string;
170
+ /**
171
+ * Request timeout in milliseconds.
172
+ * @default 180000 (3 minutes)
173
+ */
174
+ timeout?: number;
175
+ /**
176
+ * Tool name for use in agents.
177
+ * @default "sie_extract"
178
+ */
179
+ name?: string;
180
+ /**
181
+ * Tool description for use in agents.
182
+ */
183
+ description?: string;
184
+ }
185
+ /**
186
+ * LangChain tool for extraction using SIE.
187
+ *
188
+ * Wraps SIEClient.extract() to implement the LangChain Tool interface
189
+ * for use in agents and chains. Returns JSON with entities, relations,
190
+ * classifications, and detected objects.
191
+ *
192
+ * @example
193
+ * ```typescript
194
+ * import { SIEExtractor } from "@superlinked/sie-langchain";
195
+ *
196
+ * // Direct usage
197
+ * const extractor = new SIEExtractor({
198
+ * model: "urchade/gliner_multi-v2.1",
199
+ * labels: ["person", "organization", "location"],
200
+ * });
201
+ * const result = await extractor.invoke("John Smith works at Acme Corp");
202
+ * const parsed = JSON.parse(result);
203
+ *
204
+ * // Use in an agent
205
+ * import { ChatOpenAI } from "@langchain/openai";
206
+ * import { createReactAgent } from "@langchain/langgraph/prebuilt";
207
+ *
208
+ * const agent = createReactAgent({
209
+ * llm: new ChatOpenAI(),
210
+ * tools: [extractor],
211
+ * });
212
+ * ```
213
+ */
214
+ declare class SIEExtractor extends Tool {
215
+ name: string;
216
+ description: string;
217
+ private readonly model;
218
+ private readonly labels;
219
+ private readonly threshold?;
220
+ private _client;
221
+ private readonly _ownsClient;
222
+ private readonly baseUrl;
223
+ private readonly clientOptions;
224
+ constructor(params?: SIEExtractorParams);
225
+ /**
226
+ * Get or create the SIEClient.
227
+ */
228
+ private get client();
229
+ /**
230
+ * Extract structured information from text.
231
+ *
232
+ * @param text - Text to extract from.
233
+ * @returns JSON string with entities, relations, classifications, and objects.
234
+ */
235
+ _call(text: string): Promise<string>;
236
+ /**
237
+ * Close the underlying client connection.
238
+ */
239
+ close(): Promise<void>;
240
+ }
3
241
 
4
242
  /**
5
243
  * SIE embeddings integration for LangChain.js
@@ -7,6 +245,8 @@ import { SIEClient, DType } from '@superlinked/sie-sdk';
7
245
  * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:
8
246
  * - SIEEmbeddings: Dense embeddings for vector stores
9
247
  * - SIESparseEncoder: Sparse encoder for hybrid search
248
+ * - SIEReranker: Cross-encoder reranking for retrieval pipelines
249
+ * - SIEExtractor: Entity extraction tool for agents
10
250
  *
11
251
  * @example
12
252
  * ```typescript
@@ -118,10 +358,6 @@ declare class SIEEmbeddings extends Embeddings {
118
358
  * @returns Embedding vector as array of numbers.
119
359
  */
120
360
  embedQuery(text: string): Promise<number[]>;
121
- /**
122
- * Extract dense embedding from encode result.
123
- */
124
- private extractDense;
125
361
  /**
126
362
  * Close the underlying client connection.
127
363
  */
@@ -198,14 +434,10 @@ declare class SIESparseEncoder {
198
434
  indices: number[];
199
435
  values: number[];
200
436
  }>>;
201
- /**
202
- * Extract sparse embedding from encode result.
203
- */
204
- private extractSparse;
205
437
  /**
206
438
  * Close the underlying client connection.
207
439
  */
208
440
  close(): Promise<void>;
209
441
  }
210
442
 
211
- export { SIEEmbeddings, type SIEEmbeddingsParams, SIESparseEncoder, type SIESparseEncoderOptions };
443
+ export { SIEEmbeddings, type SIEEmbeddingsParams, SIEExtractor, type SIEExtractorParams, SIEReranker, type SIERerankerParams, SIESparseEncoder, type SIESparseEncoderOptions };
package/dist/index.js CHANGED
@@ -1,9 +1,191 @@
1
1
  // src/index.ts
2
2
  import { Embeddings } from "@langchain/core/embeddings";
3
3
  import {
4
- SIEClient,
5
- toNumberArray
4
+ SIEClient as SIEClient3,
5
+ denseEmbedding,
6
+ sparseEmbedding
6
7
  } from "@superlinked/sie-sdk";
8
+
9
+ // src/rerankers.ts
10
+ import { BaseDocumentCompressor } from "@langchain/core/retrievers/document_compressors";
11
+ import { SIEClient } from "@superlinked/sie-sdk";
12
+ var SIEReranker = class extends BaseDocumentCompressor {
13
+ model;
14
+ topK;
15
+ _client;
16
+ _ownsClient;
17
+ baseUrl;
18
+ clientOptions;
19
+ constructor(params = {}) {
20
+ super();
21
+ const {
22
+ baseUrl = "http://localhost:8080",
23
+ model = "jinaai/jina-reranker-v2-base-multilingual",
24
+ client,
25
+ topK,
26
+ gpu,
27
+ timeout = 18e4
28
+ } = params;
29
+ this.baseUrl = baseUrl;
30
+ this.model = model;
31
+ this.topK = topK;
32
+ this._client = client;
33
+ this._ownsClient = !client;
34
+ this.clientOptions = {
35
+ timeout,
36
+ gpu
37
+ };
38
+ }
39
+ /**
40
+ * Get or create the SIEClient.
41
+ */
42
+ get client() {
43
+ if (!this._client) {
44
+ this._client = new SIEClient(this.baseUrl, this.clientOptions);
45
+ }
46
+ return this._client;
47
+ }
48
+ /**
49
+ * Rerank documents by relevance to query.
50
+ *
51
+ * @param documents - Documents to rerank.
52
+ * @param query - Query to rank documents against.
53
+ * @returns Reranked documents with relevance_score in metadata, sorted by score descending.
54
+ */
55
+ async compressDocuments(documents, query) {
56
+ if (documents.length === 0) {
57
+ return [];
58
+ }
59
+ const queryItem = { text: query };
60
+ const docItems = documents.map((doc) => ({ text: doc.pageContent }));
61
+ const result = await this.client.score(this.model, queryItem, docItems);
62
+ const reranked = [];
63
+ for (const entry of result.scores) {
64
+ const idx = Number.parseInt(entry.itemId, 10);
65
+ const doc = documents[idx];
66
+ if (doc) {
67
+ reranked.push({
68
+ pageContent: doc.pageContent,
69
+ metadata: { ...doc.metadata, relevance_score: entry.score },
70
+ id: doc.id
71
+ });
72
+ }
73
+ }
74
+ if (this.topK !== void 0) {
75
+ return reranked.slice(0, this.topK);
76
+ }
77
+ return reranked;
78
+ }
79
+ /**
80
+ * Close the underlying client connection.
81
+ */
82
+ async close() {
83
+ if (this._client && this._ownsClient) {
84
+ await this._client.close();
85
+ }
86
+ }
87
+ };
88
+
89
+ // src/extractors.ts
90
+ import { Tool } from "@langchain/core/tools";
91
+ import {
92
+ SIEClient as SIEClient2
93
+ } from "@superlinked/sie-sdk";
94
+ var SIEExtractor = class extends Tool {
95
+ name;
96
+ description;
97
+ model;
98
+ labels;
99
+ threshold;
100
+ _client;
101
+ _ownsClient;
102
+ baseUrl;
103
+ clientOptions;
104
+ constructor(params = {}) {
105
+ const toolName = params.name ?? "sie_extract";
106
+ const toolDescription = params.description ?? "Extract structured information from text. Input should be text to analyze. Returns JSON with entities, relations, classifications, and detected objects.";
107
+ super({});
108
+ this.name = toolName;
109
+ this.description = toolDescription;
110
+ const {
111
+ baseUrl = "http://localhost:8080",
112
+ model = "urchade/gliner_multi-v2.1",
113
+ client,
114
+ labels = ["person", "organization", "location"],
115
+ threshold,
116
+ gpu,
117
+ timeout = 18e4
118
+ } = params;
119
+ this.baseUrl = baseUrl;
120
+ this.model = model;
121
+ this.labels = labels;
122
+ this.threshold = threshold;
123
+ this._client = client;
124
+ this._ownsClient = !client;
125
+ this.clientOptions = {
126
+ timeout,
127
+ gpu
128
+ };
129
+ }
130
+ /**
131
+ * Get or create the SIEClient.
132
+ */
133
+ get client() {
134
+ if (!this._client) {
135
+ this._client = new SIEClient2(this.baseUrl, this.clientOptions);
136
+ }
137
+ return this._client;
138
+ }
139
+ /**
140
+ * Extract structured information from text.
141
+ *
142
+ * @param text - Text to extract from.
143
+ * @returns JSON string with entities, relations, classifications, and objects.
144
+ */
145
+ async _call(text) {
146
+ const extractOptions = {
147
+ labels: this.labels
148
+ };
149
+ if (this.threshold !== void 0) {
150
+ extractOptions.threshold = this.threshold;
151
+ }
152
+ const result = await this.client.extract(this.model, { text }, extractOptions);
153
+ return JSON.stringify({
154
+ entities: result.entities.map((e) => ({
155
+ text: e.text,
156
+ label: e.label,
157
+ score: e.score,
158
+ ...e.start !== void 0 && { start: e.start },
159
+ ...e.end !== void 0 && { end: e.end }
160
+ })),
161
+ relations: result.relations.map((r) => ({
162
+ head: r.head,
163
+ tail: r.tail,
164
+ relation: r.relation,
165
+ score: r.score
166
+ })),
167
+ classifications: result.classifications.map((c) => ({
168
+ label: c.label,
169
+ score: c.score
170
+ })),
171
+ objects: result.objects.map((o) => ({
172
+ label: o.label,
173
+ score: o.score,
174
+ bbox: o.bbox
175
+ }))
176
+ });
177
+ }
178
+ /**
179
+ * Close the underlying client connection.
180
+ */
181
+ async close() {
182
+ if (this._client && this._ownsClient) {
183
+ await this._client.close();
184
+ }
185
+ }
186
+ };
187
+
188
+ // src/index.ts
7
189
  var SIEEmbeddings = class extends Embeddings {
8
190
  model;
9
191
  instruction;
@@ -41,7 +223,7 @@ var SIEEmbeddings = class extends Embeddings {
41
223
  get client() {
42
224
  if (!this._client) {
43
225
  const baseUrl = this.baseUrl ?? "http://localhost:8080";
44
- this._client = new SIEClient(baseUrl, this.clientOptions);
226
+ this._client = new SIEClient3(baseUrl, this.clientOptions);
45
227
  }
46
228
  return this._client;
47
229
  }
@@ -63,7 +245,7 @@ var SIEEmbeddings = class extends Embeddings {
63
245
  isQuery: false
64
246
  };
65
247
  const results = await this.client.encode(this.model, items, options);
66
- return results.map((result) => this.extractDense(result));
248
+ return results.map((result) => denseEmbedding(result));
67
249
  }
68
250
  /**
69
251
  * Embed a single query text.
@@ -81,17 +263,7 @@ var SIEEmbeddings = class extends Embeddings {
81
263
  isQuery: true
82
264
  };
83
265
  const result = await this.client.encode(this.model, { text }, options);
84
- return this.extractDense(result);
85
- }
86
- /**
87
- * Extract dense embedding from encode result.
88
- */
89
- extractDense(result) {
90
- const dense = result.dense;
91
- if (!dense) {
92
- throw new Error("Encode result missing dense embedding");
93
- }
94
- return toNumberArray(dense);
266
+ return denseEmbedding(result);
95
267
  }
96
268
  /**
97
269
  * Close the underlying client connection.
@@ -126,7 +298,7 @@ var SIESparseEncoder = class {
126
298
  */
127
299
  get client() {
128
300
  if (!this._client) {
129
- this._client = new SIEClient(this.baseUrl, this.clientOptions);
301
+ this._client = new SIEClient3(this.baseUrl, this.clientOptions);
130
302
  }
131
303
  return this._client;
132
304
  }
@@ -146,7 +318,7 @@ var SIESparseEncoder = class {
146
318
  isQuery: true
147
319
  };
148
320
  const results = await this.client.encode(this.model, items, options);
149
- return results.map((result) => this.extractSparse(result));
321
+ return results.map((result) => sparseEmbedding(result));
150
322
  }
151
323
  /**
152
324
  * Encode document texts to sparse vectors.
@@ -164,20 +336,7 @@ var SIESparseEncoder = class {
164
336
  isQuery: false
165
337
  };
166
338
  const results = await this.client.encode(this.model, items, options);
167
- return results.map((result) => this.extractSparse(result));
168
- }
169
- /**
170
- * Extract sparse embedding from encode result.
171
- */
172
- extractSparse(result) {
173
- const sparse = result.sparse;
174
- if (!sparse) {
175
- return { indices: [], values: [] };
176
- }
177
- return {
178
- indices: toNumberArray(sparse.indices),
179
- values: toNumberArray(sparse.values)
180
- };
339
+ return results.map((result) => sparseEmbedding(result));
181
340
  }
182
341
  /**
183
342
  * Close the underlying client connection.
@@ -190,6 +349,8 @@ var SIESparseEncoder = class {
190
349
  };
191
350
  export {
192
351
  SIEEmbeddings,
352
+ SIEExtractor,
353
+ SIEReranker,
193
354
  SIESparseEncoder
194
355
  };
195
356
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n toNumberArray,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractDense(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return this.extractDense(result as EncodeResult);\n }\n\n /**\n * Extract dense embedding from encode result.\n */\n private extractDense(result: EncodeResult): number[] {\n const dense = result.dense;\n if (!dense) {\n throw new Error(\"Encode result missing dense embedding\");\n }\n return toNumberArray(dense);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => this.extractSparse(result));\n }\n\n /**\n * Extract sparse embedding from encode result.\n */\n private extractSparse(result: EncodeResult): { indices: number[]; values: number[] } {\n const sparse = result.sparse;\n if (!sparse) {\n return { indices: [], values: [] };\n }\n\n return {\n indices: toNumberArray(sparse.indices),\n values: toNumberArray(sparse.values),\n };\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";AAqBA,SAAS,kBAAyC;AAClD;AAAA,EAIE;AAAA,EAEA;AAAA,OACK;AA8EA,IAAM,gBAAN,cAA4B,WAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAI,UAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,aAAa,MAAM,CAAC;AAAA,EAC9E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,WAAO,KAAK,aAAa,MAAsB;AAAA,EACjD;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,QAAgC;AACnD,UAAM,QAAQ,OAAO;AACrB,QAAI,CAAC,OAAO;AACV,YAAM,IAAI,MAAM,uCAAuC;AAAA,IACzD;AACA,WAAO,cAAc,KAAK;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,KAAK,cAAc,MAAM,CAAC;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA,EAKQ,cAAc,QAA+D;AACnF,UAAM,SAAS,OAAO;AACtB,QAAI,CAAC,QAAQ;AACX,aAAO,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAAA,IACnC;AAEA,WAAO;AAAA,MACL,SAAS,cAAc,OAAO,OAAO;AAAA,MACrC,QAAQ,cAAc,OAAO,MAAM;AAAA,IACrC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts","../src/rerankers.ts","../src/extractors.ts"],"sourcesContent":["/**\n * SIE embeddings integration for LangChain.js\n *\n * Provides drop-in replacement for OpenAI embeddings using SIE's inference server:\n * - SIEEmbeddings: Dense embeddings for vector stores\n * - SIESparseEncoder: Sparse encoder for hybrid search\n * - SIEReranker: Cross-encoder reranking for retrieval pipelines\n * - SIEExtractor: Entity extraction tool for agents\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * const vectors = await embeddings.embedDocuments([\"Hello world\"]);\n * const queryVector = await embeddings.embedQuery(\"What is hello?\");\n * ```\n */\n\nimport { Embeddings, type EmbeddingsParams } from \"@langchain/core/embeddings\";\nimport {\n type DType,\n type EncodeOptions,\n type EncodeResult,\n SIEClient,\n type SIEClientOptions,\n denseEmbedding,\n sparseEmbedding,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEEmbeddings.\n */\nexport interface SIEEmbeddingsParams extends EmbeddingsParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Optional instruction prefix for embedding (model-dependent).\n */\n instruction?: string;\n\n /**\n * Output dtype: \"float32\" (default), \"float16\", \"int8\", \"binary\".\n */\n outputDtype?: DType;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain Embeddings implementation using SIE.\n *\n * Wraps SIEClient.encode() to implement the LangChain Embeddings interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings } from \"@superlinked/sie-langchain\";\n *\n * // Basic usage\n * const embeddings = new SIEEmbeddings({\n * baseUrl: \"http://localhost:8080\",\n * model: \"BAAI/bge-m3\",\n * });\n *\n * // Embed documents\n * const docVectors = await embeddings.embedDocuments([\n * \"First document\",\n * \"Second document\",\n * ]);\n *\n * // Embed a query (may use different encoding for asymmetric models)\n * const queryVector = await embeddings.embedQuery(\"What is the topic?\");\n *\n * // With GPU routing\n * const gpuEmbeddings = new SIEEmbeddings({\n * baseUrl: \"https://cluster.example.com\",\n * model: \"BAAI/bge-m3\",\n * gpu: \"a100-80gb\",\n * });\n * ```\n */\nexport class SIEEmbeddings extends Embeddings {\n private readonly model: string;\n private readonly instruction?: string;\n private readonly outputDtype?: DType;\n private _client: SIEClient | undefined;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEEmbeddingsParams = {}) {\n super(params);\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n client,\n instruction,\n outputDtype,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.model = model;\n this.instruction = instruction;\n this.outputDtype = outputDtype;\n this._client = client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n\n // If no client provided, we'll create one lazily using baseUrl\n if (!client) {\n this.clientOptions.timeout = timeout;\n this.clientOptions.gpu = gpu;\n // Store baseUrl for lazy client creation\n (this as { baseUrl?: string }).baseUrl = baseUrl;\n }\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n const baseUrl = (this as { baseUrl?: string }).baseUrl ?? \"http://localhost:8080\";\n this._client = new SIEClient(baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Embed a list of documents.\n *\n * @param texts - List of document texts to embed.\n * @returns List of embedding vectors (as arrays of numbers).\n */\n async embedDocuments(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => denseEmbedding(result));\n }\n\n /**\n * Embed a single query text.\n *\n * For asymmetric models (like BGE-M3), this uses query-specific encoding.\n *\n * @param text - Query text to embed.\n * @returns Embedding vector as array of numbers.\n */\n async embedQuery(text: string): Promise<number[]> {\n const options: EncodeOptions = {\n outputTypes: [\"dense\"],\n instruction: this.instruction,\n outputDtype: this.outputDtype,\n isQuery: true,\n };\n\n const result = await this.client.encode(this.model, { text }, options);\n return denseEmbedding(result as EncodeResult);\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\n/**\n * Configuration options for SIESparseEncoder.\n */\nexport interface SIESparseEncoderOptions {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Model name/ID to use for encoding. Must support sparse output.\n * @default \"BAAI/bge-m3\"\n */\n model?: string;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * Sparse encoder for LangChain hybrid search.\n *\n * Compatible with PineconeHybridSearchRetriever's sparse_encoder interface.\n *\n * @example\n * ```typescript\n * import { SIEEmbeddings, SIESparseEncoder } from \"@superlinked/sie-langchain\";\n * import { PineconeHybridSearchRetriever } from \"@langchain/pinecone\";\n *\n * const retriever = new PineconeHybridSearchRetriever({\n * embeddings: new SIEEmbeddings({ model: \"BAAI/bge-m3\" }),\n * sparseEncoder: new SIESparseEncoder({ model: \"BAAI/bge-m3\" }),\n * index: pineconeIndex,\n * });\n * ```\n */\nexport class SIESparseEncoder {\n private readonly model: string;\n private _client: SIEClient | undefined;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(options: SIESparseEncoderOptions = {}) {\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"BAAI/bge-m3\",\n gpu,\n timeout = 180_000,\n } = options;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Encode query texts to sparse vectors.\n *\n * @param texts - List of query texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeQueries(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: true,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Encode document texts to sparse vectors.\n *\n * @param texts - List of document texts to encode.\n * @returns List of objects with \"indices\" and \"values\" arrays.\n */\n async encodeDocuments(texts: string[]): Promise<Array<{ indices: number[]; values: number[] }>> {\n if (texts.length === 0) {\n return [];\n }\n\n const items = texts.map((text) => ({ text }));\n const options: EncodeOptions = {\n outputTypes: [\"sparse\"],\n isQuery: false,\n };\n\n const results = await this.client.encode(this.model, items, options);\n return (results as EncodeResult[]).map((result) => sparseEmbedding(result));\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client) {\n await this._client.close();\n }\n }\n}\n\nexport { SIEReranker, type SIERerankerParams } from \"./rerankers.js\";\nexport { SIEExtractor, type SIEExtractorParams } from \"./extractors.js\";\n","/**\n * SIE reranker integration for LangChain.js\n *\n * Provides document reranking using SIE's score endpoint:\n * - SIEReranker: Cross-encoder reranking implementing BaseDocumentCompressor\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n * ```\n */\n\nimport type { DocumentInterface } from \"@langchain/core/documents\";\nimport { BaseDocumentCompressor } from \"@langchain/core/retrievers/document_compressors\";\nimport { SIEClient, type SIEClientOptions } from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEReranker.\n */\nexport interface SIERerankerParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Reranker model name/ID.\n * @default \"jinaai/jina-reranker-v2-base-multilingual\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Number of top documents to return. If undefined, returns all documents.\n */\n topK?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n}\n\n/**\n * LangChain document compressor using SIE's reranking.\n *\n * Wraps SIEClient.score() to implement BaseDocumentCompressor.\n *\n * @example\n * ```typescript\n * import { SIEReranker } from \"@superlinked/sie-langchain\";\n *\n * const reranker = new SIEReranker({\n * baseUrl: \"http://localhost:8080\",\n * model: \"jinaai/jina-reranker-v2-base-multilingual\",\n * topK: 3,\n * });\n *\n * // Rerank retrieved documents\n * const reranked = await reranker.compressDocuments(documents, \"search query\");\n *\n * // Use in a retrieval pipeline\n * import { ContextualCompressionRetriever } from \"langchain/retrievers/contextual_compression\";\n *\n * const compressionRetriever = new ContextualCompressionRetriever({\n * baseCompressor: reranker,\n * baseRetriever: vectorStoreRetriever,\n * });\n * ```\n */\nexport class SIEReranker extends BaseDocumentCompressor {\n private readonly model: string;\n private readonly topK?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIERerankerParams = {}) {\n super();\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"jinaai/jina-reranker-v2-base-multilingual\",\n client,\n topK,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.topK = topK;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Rerank documents by relevance to query.\n *\n * @param documents - Documents to rerank.\n * @param query - Query to rank documents against.\n * @returns Reranked documents with relevance_score in metadata, sorted by score descending.\n */\n async compressDocuments(\n documents: DocumentInterface[],\n query: string,\n ): Promise<DocumentInterface[]> {\n if (documents.length === 0) {\n return [];\n }\n\n const queryItem = { text: query };\n const docItems = documents.map((doc) => ({ text: doc.pageContent }));\n\n const result = await this.client.score(this.model, queryItem, docItems);\n\n // Map score entries back to documents with relevance_score in metadata.\n // ScoreResult.scores are already sorted by score descending.\n const reranked: DocumentInterface[] = [];\n for (const entry of result.scores) {\n const idx = Number.parseInt(entry.itemId, 10);\n const doc = documents[idx];\n if (doc) {\n reranked.push({\n pageContent: doc.pageContent,\n metadata: { ...doc.metadata, relevance_score: entry.score },\n id: doc.id,\n });\n }\n }\n\n if (this.topK !== undefined) {\n return reranked.slice(0, this.topK);\n }\n return reranked;\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n","/**\n * SIE extraction tool for LangChain.js\n *\n * Provides extraction using SIE's extract endpoint:\n * - SIEExtractor: Extraction tool implementing LangChain Tool\n *\n * Returns entities, relations, classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * const extractor = new SIEExtractor({\n * baseUrl: \"http://localhost:8080\",\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n *\n * const result = await extractor.invoke(\"John Smith works at Acme Corp in NYC\");\n * const parsed = JSON.parse(result);\n * console.log(parsed.entities);\n * console.log(parsed.relations);\n * ```\n */\n\nimport { Tool } from \"@langchain/core/tools\";\nimport {\n type ExtractOptions,\n type ExtractResult,\n SIEClient,\n type SIEClientOptions,\n} from \"@superlinked/sie-sdk\";\n\n/**\n * Configuration options for SIEExtractor.\n */\nexport interface SIEExtractorParams {\n /**\n * URL of the SIE server.\n * @default \"http://localhost:8080\"\n */\n baseUrl?: string;\n\n /**\n * Extraction model name/ID.\n * @default \"urchade/gliner_multi-v2.1\"\n */\n model?: string;\n\n /**\n * Optional pre-configured SIEClient instance.\n * If provided, baseUrl and other connection options are ignored.\n */\n client?: SIEClient;\n\n /**\n * Labels to extract (entity types, relation types, or classification labels).\n * @default [\"person\", \"organization\", \"location\"]\n */\n labels?: string[];\n\n /**\n * Minimum confidence threshold (0-1).\n */\n threshold?: number;\n\n /**\n * Target GPU type for routing (e.g., \"l4\", \"a100-80gb\").\n */\n gpu?: string;\n\n /**\n * Request timeout in milliseconds.\n * @default 180000 (3 minutes)\n */\n timeout?: number;\n\n /**\n * Tool name for use in agents.\n * @default \"sie_extract\"\n */\n name?: string;\n\n /**\n * Tool description for use in agents.\n */\n description?: string;\n}\n\n/**\n * LangChain tool for extraction using SIE.\n *\n * Wraps SIEClient.extract() to implement the LangChain Tool interface\n * for use in agents and chains. Returns JSON with entities, relations,\n * classifications, and detected objects.\n *\n * @example\n * ```typescript\n * import { SIEExtractor } from \"@superlinked/sie-langchain\";\n *\n * // Direct usage\n * const extractor = new SIEExtractor({\n * model: \"urchade/gliner_multi-v2.1\",\n * labels: [\"person\", \"organization\", \"location\"],\n * });\n * const result = await extractor.invoke(\"John Smith works at Acme Corp\");\n * const parsed = JSON.parse(result);\n *\n * // Use in an agent\n * import { ChatOpenAI } from \"@langchain/openai\";\n * import { createReactAgent } from \"@langchain/langgraph/prebuilt\";\n *\n * const agent = createReactAgent({\n * llm: new ChatOpenAI(),\n * tools: [extractor],\n * });\n * ```\n */\nexport class SIEExtractor extends Tool {\n name: string;\n description: string;\n\n private readonly model: string;\n private readonly labels: string[];\n private readonly threshold?: number;\n private _client: SIEClient | undefined;\n private readonly _ownsClient: boolean;\n private readonly baseUrl: string;\n private readonly clientOptions: SIEClientOptions;\n\n constructor(params: SIEExtractorParams = {}) {\n const toolName = params.name ?? \"sie_extract\";\n const toolDescription =\n params.description ??\n \"Extract structured information from text. \" +\n \"Input should be text to analyze. \" +\n \"Returns JSON with entities, relations, classifications, and detected objects.\";\n\n super({});\n\n this.name = toolName;\n this.description = toolDescription;\n\n const {\n baseUrl = \"http://localhost:8080\",\n model = \"urchade/gliner_multi-v2.1\",\n client,\n labels = [\"person\", \"organization\", \"location\"],\n threshold,\n gpu,\n timeout = 180_000,\n } = params;\n\n this.baseUrl = baseUrl;\n this.model = model;\n this.labels = labels;\n this.threshold = threshold;\n this._client = client;\n this._ownsClient = !client;\n\n this.clientOptions = {\n timeout,\n gpu,\n };\n }\n\n /**\n * Get or create the SIEClient.\n */\n private get client(): SIEClient {\n if (!this._client) {\n this._client = new SIEClient(this.baseUrl, this.clientOptions);\n }\n return this._client;\n }\n\n /**\n * Extract structured information from text.\n *\n * @param text - Text to extract from.\n * @returns JSON string with entities, relations, classifications, and objects.\n */\n async _call(text: string): Promise<string> {\n const extractOptions: ExtractOptions = {\n labels: this.labels,\n };\n if (this.threshold !== undefined) {\n extractOptions.threshold = this.threshold;\n }\n\n const result: ExtractResult = await this.client.extract(this.model, { text }, extractOptions);\n\n return JSON.stringify({\n entities: result.entities.map((e) => ({\n text: e.text,\n label: e.label,\n score: e.score,\n ...(e.start !== undefined && { start: e.start }),\n ...(e.end !== undefined && { end: e.end }),\n })),\n relations: result.relations.map((r) => ({\n head: r.head,\n tail: r.tail,\n relation: r.relation,\n score: r.score,\n })),\n classifications: result.classifications.map((c) => ({\n label: c.label,\n score: c.score,\n })),\n objects: result.objects.map((o) => ({\n label: o.label,\n score: o.score,\n bbox: o.bbox,\n })),\n });\n }\n\n /**\n * Close the underlying client connection.\n */\n async close(): Promise<void> {\n if (this._client && this._ownsClient) {\n await this._client.close();\n }\n }\n}\n"],"mappings":";AAuBA,SAAS,kBAAyC;AAClD;AAAA,EAIE,aAAAA;AAAA,EAEA;AAAA,EACA;AAAA,OACK;;;ACXP,SAAS,8BAA8B;AACvC,SAAS,iBAAwC;AAoE1C,IAAM,cAAN,cAA0B,uBAAuB;AAAA,EACrC;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA4B,CAAC,GAAG;AAC1C,UAAM;AAEN,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAI,UAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,kBACJ,WACA,OAC8B;AAC9B,QAAI,UAAU,WAAW,GAAG;AAC1B,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,YAAY,EAAE,MAAM,MAAM;AAChC,UAAM,WAAW,UAAU,IAAI,CAAC,SAAS,EAAE,MAAM,IAAI,YAAY,EAAE;AAEnE,UAAM,SAAS,MAAM,KAAK,OAAO,MAAM,KAAK,OAAO,WAAW,QAAQ;AAItE,UAAM,WAAgC,CAAC;AACvC,eAAW,SAAS,OAAO,QAAQ;AACjC,YAAM,MAAM,OAAO,SAAS,MAAM,QAAQ,EAAE;AAC5C,YAAM,MAAM,UAAU,GAAG;AACzB,UAAI,KAAK;AACP,iBAAS,KAAK;AAAA,UACZ,aAAa,IAAI;AAAA,UACjB,UAAU,EAAE,GAAG,IAAI,UAAU,iBAAiB,MAAM,MAAM;AAAA,UAC1D,IAAI,IAAI;AAAA,QACV,CAAC;AAAA,MACH;AAAA,IACF;AAEA,QAAI,KAAK,SAAS,QAAW;AAC3B,aAAO,SAAS,MAAM,GAAG,KAAK,IAAI;AAAA,IACpC;AACA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AC5JA,SAAS,YAAY;AACrB;AAAA,EAGE,aAAAC;AAAA,OAEK;AAuFA,IAAM,eAAN,cAA2B,KAAK;AAAA,EACrC;AAAA,EACA;AAAA,EAEiB;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAA6B,CAAC,GAAG;AAC3C,UAAM,WAAW,OAAO,QAAQ;AAChC,UAAM,kBACJ,OAAO,eACP;AAIF,UAAM,CAAC,CAAC;AAER,SAAK,OAAO;AACZ,SAAK,cAAc;AAEnB,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,SAAS,CAAC,UAAU,gBAAgB,UAAU;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,YAAY;AACjB,SAAK,UAAU;AACf,SAAK,cAAc,CAAC;AAEpB,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAIA,WAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,MAAM,MAA+B;AACzC,UAAM,iBAAiC;AAAA,MACrC,QAAQ,KAAK;AAAA,IACf;AACA,QAAI,KAAK,cAAc,QAAW;AAChC,qBAAe,YAAY,KAAK;AAAA,IAClC;AAEA,UAAM,SAAwB,MAAM,KAAK,OAAO,QAAQ,KAAK,OAAO,EAAE,KAAK,GAAG,cAAc;AAE5F,WAAO,KAAK,UAAU;AAAA,MACpB,UAAU,OAAO,SAAS,IAAI,CAAC,OAAO;AAAA,QACpC,MAAM,EAAE;AAAA,QACR,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,GAAI,EAAE,UAAU,UAAa,EAAE,OAAO,EAAE,MAAM;AAAA,QAC9C,GAAI,EAAE,QAAQ,UAAa,EAAE,KAAK,EAAE,IAAI;AAAA,MAC1C,EAAE;AAAA,MACF,WAAW,OAAO,UAAU,IAAI,CAAC,OAAO;AAAA,QACtC,MAAM,EAAE;AAAA,QACR,MAAM,EAAE;AAAA,QACR,UAAU,EAAE;AAAA,QACZ,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,iBAAiB,OAAO,gBAAgB,IAAI,CAAC,OAAO;AAAA,QAClD,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,MACF,SAAS,OAAO,QAAQ,IAAI,CAAC,OAAO;AAAA,QAClC,OAAO,EAAE;AAAA,QACT,OAAO,EAAE;AAAA,QACT,MAAM,EAAE;AAAA,MACV,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,WAAW,KAAK,aAAa;AACpC,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;;;AFpHO,IAAM,gBAAN,cAA4B,WAAW;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACT;AAAA,EACS;AAAA,EAEjB,YAAY,SAA8B,CAAC,GAAG;AAC5C,UAAM,MAAM;AAEZ,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,QAAQ;AACb,SAAK,cAAc;AACnB,SAAK,cAAc;AACnB,SAAK,UAAU;AAEf,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAGA,QAAI,CAAC,QAAQ;AACX,WAAK,cAAc,UAAU;AAC7B,WAAK,cAAc,MAAM;AAEzB,MAAC,KAA8B,UAAU;AAAA,IAC3C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,YAAM,UAAW,KAA8B,WAAW;AAC1D,WAAK,UAAU,IAAIC,WAAU,SAAS,KAAK,aAAa;AAAA,IAC1D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,eAAe,OAAsC;AACzD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,eAAe,MAAM,CAAC;AAAA,EAC3E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAM,WAAW,MAAiC;AAChD,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,OAAO;AAAA,MACrB,aAAa,KAAK;AAAA,MAClB,aAAa,KAAK;AAAA,MAClB,SAAS;AAAA,IACX;AAEA,UAAM,SAAS,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EAAE,KAAK,GAAG,OAAO;AACrE,WAAO,eAAe,MAAsB;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;AA+CO,IAAM,mBAAN,MAAuB;AAAA,EACX;AAAA,EACT;AAAA,EACS;AAAA,EACA;AAAA,EAEjB,YAAY,UAAmC,CAAC,GAAG;AACjD,UAAM;AAAA,MACJ,UAAU;AAAA,MACV,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,IACZ,IAAI;AAEJ,SAAK,UAAU;AACf,SAAK,QAAQ;AACb,SAAK,gBAAgB;AAAA,MACnB;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAY,SAAoB;AAC9B,QAAI,CAAC,KAAK,SAAS;AACjB,WAAK,UAAU,IAAIA,WAAU,KAAK,SAAS,KAAK,aAAa;AAAA,IAC/D;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,cAAc,OAA0E;AAC5F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,gBAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,gBAAgB,OAA0E;AAC9F,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,QAAQ,MAAM,IAAI,CAAC,UAAU,EAAE,KAAK,EAAE;AAC5C,UAAM,UAAyB;AAAA,MAC7B,aAAa,CAAC,QAAQ;AAAA,MACtB,SAAS;AAAA,IACX;AAEA,UAAM,UAAU,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,OAAO,OAAO;AACnE,WAAQ,QAA2B,IAAI,CAAC,WAAW,gBAAgB,MAAM,CAAC;AAAA,EAC5E;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,QAAuB;AAC3B,QAAI,KAAK,SAAS;AAChB,YAAM,KAAK,QAAQ,MAAM;AAAA,IAC3B;AAAA,EACF;AACF;","names":["SIEClient","SIEClient","SIEClient"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@superlinked/sie-langchain",
3
- "version": "0.1.8",
3
+ "version": "0.1.10",
4
4
  "description": "SIE embeddings integration for LangChain.js",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -17,7 +17,7 @@
17
17
  "dist"
18
18
  ],
19
19
  "dependencies": {
20
- "@superlinked/sie-sdk": "0.1.8"
20
+ "@superlinked/sie-sdk": "0.1.10"
21
21
  },
22
22
  "peerDependencies": {
23
23
  "@langchain/core": ">=0.2.0"