@magda/semantic-indexer-sdk 6.0.0-alpha.6 → 6.0.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -4
- package/dist/index.js +86 -48
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { DeleteByQuery_Request } from '@opensearch-project/opensearch/api/_core/deleteByQuery.js';
|
|
2
2
|
import { DeleteByQuery_Response } from '@opensearch-project/opensearch/api/_core/deleteByQuery.js';
|
|
3
|
-
import {
|
|
3
|
+
import { Search_Response } from '@opensearch-project/opensearch/api/index.js';
|
|
4
4
|
import { TransportRequestOptions } from '@opensearch-project/opensearch/lib/Transport.js';
|
|
5
5
|
import { default as URI_2 } from 'urijs';
|
|
6
6
|
import urijs from 'urijs';
|
|
@@ -205,14 +205,15 @@ declare class DeleteResult {
|
|
|
205
205
|
}
|
|
206
206
|
|
|
207
207
|
declare class EmbeddingApiClient extends BaseApiClient {
|
|
208
|
-
|
|
208
|
+
taskSize: number;
|
|
209
209
|
constructor(options: EmbeddingApiClientConfig);
|
|
210
|
-
|
|
210
|
+
testConnection(): Promise<boolean>;
|
|
211
211
|
get(text: string): Promise<number[]>;
|
|
212
212
|
get(textList: string[]): Promise<number[][]>;
|
|
213
213
|
}
|
|
214
214
|
|
|
215
215
|
declare interface EmbeddingApiClientConfig extends BaseApiClientConfig {
|
|
216
|
+
taskSize?: number;
|
|
216
217
|
}
|
|
217
218
|
|
|
218
219
|
export declare type EmbeddingText = {
|
|
@@ -375,6 +376,7 @@ declare class OpensearchApiClient {
|
|
|
375
376
|
private client;
|
|
376
377
|
constructor(config: OpensearchConfig);
|
|
377
378
|
static getOpensearchApiClient(openSearchConfig: OpensearchConfig): Promise<OpensearchApiClient>;
|
|
379
|
+
ping(): Promise<void>;
|
|
378
380
|
createIndex(indexDefinition: {
|
|
379
381
|
indexName: string;
|
|
380
382
|
settings?: any;
|
|
@@ -384,9 +386,9 @@ declare class OpensearchApiClient {
|
|
|
384
386
|
indexExists(indexName: string): Promise<boolean>;
|
|
385
387
|
indexDocument(indexName: string, document: any): Promise<void>;
|
|
386
388
|
bulkIndexDocument(indexName: string, documents: any[]): Promise<void>;
|
|
387
|
-
searchDocuments(indexName: string, query: string): Promise<ResponseBody>;
|
|
388
389
|
deleteDocument(indexName: string, documentId: string): Promise<void>;
|
|
389
390
|
deleteByQuery(params: DeleteByQuery_Request, options?: TransportRequestOptions): Promise<DeleteByQuery_Response>;
|
|
391
|
+
search(indexName: string, searchBody: Record<string, unknown>): Promise<Search_Response>;
|
|
390
392
|
}
|
|
391
393
|
|
|
392
394
|
declare interface OpensearchConfig {
|
package/dist/index.js
CHANGED
|
@@ -69051,9 +69051,9 @@ var require_Helpers = __commonJS({
|
|
|
69051
69051
|
}
|
|
69052
69052
|
});
|
|
69053
69053
|
|
|
69054
|
-
// ../../node_modules
|
|
69054
|
+
// ../../node_modules/secure-json-parse/index.js
|
|
69055
69055
|
var require_secure_json_parse = __commonJS({
|
|
69056
|
-
"../../node_modules
|
|
69056
|
+
"../../node_modules/secure-json-parse/index.js"(exports, module) {
|
|
69057
69057
|
"use strict";
|
|
69058
69058
|
init_cjs_shim();
|
|
69059
69059
|
var hasBuffer = typeof Buffer !== "undefined";
|
|
@@ -114030,7 +114030,7 @@ var getDefaultProjectName = () => {
|
|
|
114030
114030
|
};
|
|
114031
114031
|
|
|
114032
114032
|
// ../../magda-semantic-indexer-framework/node_modules/langsmith/dist/index.js
|
|
114033
|
-
var __version__ = "0.3.
|
|
114033
|
+
var __version__ = "0.3.58";
|
|
114034
114034
|
|
|
114035
114035
|
// ../../magda-semantic-indexer-framework/node_modules/langsmith/dist/utils/env.js
|
|
114036
114036
|
var globalEnv;
|
|
@@ -115505,10 +115505,10 @@ var Client = class _Client {
|
|
|
115505
115505
|
if (patch) {
|
|
115506
115506
|
const sampled = [];
|
|
115507
115507
|
for (const run of runs) {
|
|
115508
|
-
if (!this.filteredPostUuids.has(run.
|
|
115508
|
+
if (!this.filteredPostUuids.has(run.trace_id)) {
|
|
115509
115509
|
sampled.push(run);
|
|
115510
|
-
} else {
|
|
115511
|
-
this.filteredPostUuids.delete(run.
|
|
115510
|
+
} else if (run.id === run.trace_id) {
|
|
115511
|
+
this.filteredPostUuids.delete(run.trace_id);
|
|
115512
115512
|
}
|
|
115513
115513
|
}
|
|
115514
115514
|
return sampled;
|
|
@@ -115880,8 +115880,8 @@ var Client = class _Client {
|
|
|
115880
115880
|
["patch", preparedUpdateParams]
|
|
115881
115881
|
]) {
|
|
115882
115882
|
for (const originalPayload of payloads) {
|
|
115883
|
-
const { inputs, outputs, events: events2, attachments, ...payload } = originalPayload;
|
|
115884
|
-
const fields = { inputs, outputs, events: events2 };
|
|
115883
|
+
const { inputs, outputs, events: events2, extra, error, serialized, attachments, ...payload } = originalPayload;
|
|
115884
|
+
const fields = { inputs, outputs, events: events2, extra, error, serialized };
|
|
115885
115885
|
const stringifiedPayload = serialize(payload, `Serializing for multipart ingestion of run with id: ${payload.id}`);
|
|
115886
115886
|
accumulatedParts.push({
|
|
115887
115887
|
name: `${method}.${payload.id}`,
|
|
@@ -118982,16 +118982,52 @@ function _getWriteReplicasFromEnv() {
|
|
|
118982
118982
|
return [];
|
|
118983
118983
|
try {
|
|
118984
118984
|
const parsed = JSON.parse(envVar);
|
|
118985
|
-
|
|
118986
|
-
|
|
118987
|
-
|
|
118988
|
-
|
|
118989
|
-
|
|
118985
|
+
if (Array.isArray(parsed)) {
|
|
118986
|
+
const replicas = [];
|
|
118987
|
+
for (const item of parsed) {
|
|
118988
|
+
if (typeof item !== "object" || item === null) {
|
|
118989
|
+
console.warn(`Invalid item type in LANGSMITH_RUNS_ENDPOINTS: expected object, got ${typeof item}`);
|
|
118990
|
+
continue;
|
|
118991
|
+
}
|
|
118992
|
+
if (typeof item.api_url !== "string") {
|
|
118993
|
+
console.warn(`Invalid api_url type in LANGSMITH_RUNS_ENDPOINTS: expected string, got ${typeof item.api_url}`);
|
|
118994
|
+
continue;
|
|
118995
|
+
}
|
|
118996
|
+
if (typeof item.api_key !== "string") {
|
|
118997
|
+
console.warn(`Invalid api_key type in LANGSMITH_RUNS_ENDPOINTS: expected string, got ${typeof item.api_key}`);
|
|
118998
|
+
continue;
|
|
118999
|
+
}
|
|
119000
|
+
replicas.push({
|
|
119001
|
+
apiUrl: item.api_url.replace(/\/$/, ""),
|
|
119002
|
+
apiKey: item.api_key
|
|
119003
|
+
});
|
|
119004
|
+
}
|
|
119005
|
+
return replicas;
|
|
119006
|
+
} else if (typeof parsed === "object" && parsed !== null) {
|
|
119007
|
+
_checkEndpointEnvUnset(parsed);
|
|
119008
|
+
const replicas = [];
|
|
119009
|
+
for (const [url2, key] of Object.entries(parsed)) {
|
|
119010
|
+
const cleanUrl = url2.replace(/\/$/, "");
|
|
119011
|
+
if (typeof key === "string") {
|
|
119012
|
+
replicas.push({
|
|
119013
|
+
apiUrl: cleanUrl,
|
|
119014
|
+
apiKey: key
|
|
119015
|
+
});
|
|
119016
|
+
} else {
|
|
119017
|
+
console.warn(`Invalid value type in LANGSMITH_RUNS_ENDPOINTS for URL ${url2}: expected string, got ${typeof key}`);
|
|
119018
|
+
continue;
|
|
119019
|
+
}
|
|
119020
|
+
}
|
|
119021
|
+
return replicas;
|
|
119022
|
+
} else {
|
|
119023
|
+
console.warn(`Invalid LANGSMITH_RUNS_ENDPOINTS \u2013 must be valid JSON array of objects with api_url and api_key properties, or object mapping url->apiKey, got ${typeof parsed}`);
|
|
119024
|
+
return [];
|
|
119025
|
+
}
|
|
118990
119026
|
} catch (e2) {
|
|
118991
119027
|
if (isConflictingEndpointsError(e2)) {
|
|
118992
119028
|
throw e2;
|
|
118993
119029
|
}
|
|
118994
|
-
console.warn("Invalid LANGSMITH_RUNS_ENDPOINTS \u2013 must be valid JSON
|
|
119030
|
+
console.warn("Invalid LANGSMITH_RUNS_ENDPOINTS \u2013 must be valid JSON array of objects with api_url and api_key properties, or object mapping url->apiKey");
|
|
118995
119031
|
return [];
|
|
118996
119032
|
}
|
|
118997
119033
|
}
|
|
@@ -120458,10 +120494,9 @@ var AIMessageChunk = class _AIMessageChunk extends BaseMessageChunk {
|
|
|
120458
120494
|
};
|
|
120459
120495
|
} else {
|
|
120460
120496
|
const groupedToolCallChunk = fields.tool_call_chunks.reduce((acc, chunk) => {
|
|
120461
|
-
|
|
120462
|
-
|
|
120463
|
-
acc[
|
|
120464
|
-
acc[chunk.id].push(chunk);
|
|
120497
|
+
const chunkId = chunk.id || `fallback-${chunk.index || 0}`;
|
|
120498
|
+
acc[chunkId] = acc[chunkId] ?? [];
|
|
120499
|
+
acc[chunkId].push(chunk);
|
|
120465
120500
|
return acc;
|
|
120466
120501
|
}, {});
|
|
120467
120502
|
const toolCalls = [];
|
|
@@ -120471,6 +120506,7 @@ var AIMessageChunk = class _AIMessageChunk extends BaseMessageChunk {
|
|
|
120471
120506
|
const name = chunks[0]?.name ?? "";
|
|
120472
120507
|
const joinedArgs = chunks.map((c) => c.args || "").join("");
|
|
120473
120508
|
const argsStr = joinedArgs.length ? joinedArgs : "{}";
|
|
120509
|
+
const originalId = chunks[0]?.id || id;
|
|
120474
120510
|
try {
|
|
120475
120511
|
parsedArgs = parsePartialJson(argsStr);
|
|
120476
120512
|
if (parsedArgs === null || typeof parsedArgs !== "object" || Array.isArray(parsedArgs)) {
|
|
@@ -120479,14 +120515,14 @@ var AIMessageChunk = class _AIMessageChunk extends BaseMessageChunk {
|
|
|
120479
120515
|
toolCalls.push({
|
|
120480
120516
|
name,
|
|
120481
120517
|
args: parsedArgs,
|
|
120482
|
-
id,
|
|
120518
|
+
id: originalId,
|
|
120483
120519
|
type: "tool_call"
|
|
120484
120520
|
});
|
|
120485
120521
|
} catch (e2) {
|
|
120486
120522
|
invalidToolCalls.push({
|
|
120487
120523
|
name,
|
|
120488
120524
|
args: argsStr,
|
|
120489
|
-
id,
|
|
120525
|
+
id: originalId,
|
|
120490
120526
|
error: "Malformed args.",
|
|
120491
120527
|
type: "invalid_tool_call"
|
|
120492
120528
|
});
|
|
@@ -134405,30 +134441,37 @@ var EmbeddingApiClient = class extends BaseApiClient {
|
|
|
134405
134441
|
constructor(options) {
|
|
134406
134442
|
options.baseApiUrl = options.baseApiUrl || "http://localhost:3000";
|
|
134407
134443
|
super(options);
|
|
134444
|
+
this.taskSize = options.taskSize || 10;
|
|
134408
134445
|
this.testConnection();
|
|
134409
134446
|
}
|
|
134410
134447
|
async testConnection() {
|
|
134411
134448
|
try {
|
|
134412
134449
|
await this.get("test");
|
|
134413
|
-
|
|
134450
|
+
return true;
|
|
134414
134451
|
} catch (err) {
|
|
134415
134452
|
throw new Error(`Failed to connect to embedding API: ${this.getBaseApiUri().toString()}, error: ${err}`);
|
|
134416
134453
|
}
|
|
134417
134454
|
}
|
|
134418
134455
|
async get(input) {
|
|
134419
|
-
|
|
134420
|
-
|
|
134421
|
-
|
|
134422
|
-
const
|
|
134423
|
-
|
|
134424
|
-
|
|
134425
|
-
|
|
134456
|
+
const url2 = this.getBaseApiUri().segmentCoded("v1").segmentCoded("embeddings").toString();
|
|
134457
|
+
try {
|
|
134458
|
+
if (Array.isArray(input)) {
|
|
134459
|
+
const result = [];
|
|
134460
|
+
for (let i2 = 0; i2 < input.length; i2 += this.taskSize) {
|
|
134461
|
+
const chunk = input.slice(i2, i2 + this.taskSize);
|
|
134462
|
+
const res = await fetchRequest("post", url2, { input: chunk }, "application/json", false, this.addAuthHeader());
|
|
134463
|
+
result.push(...res.data.map((d) => d.embedding));
|
|
134464
|
+
}
|
|
134465
|
+
return result;
|
|
134466
|
+
} else {
|
|
134467
|
+
const res = await fetchRequest("post", url2, { input }, "application/json", false, this.addAuthHeader());
|
|
134468
|
+
return res.data[0].embedding;
|
|
134426
134469
|
}
|
|
134427
|
-
|
|
134428
|
-
|
|
134429
|
-
|
|
134430
|
-
|
|
134431
|
-
|
|
134470
|
+
} catch (e2) {
|
|
134471
|
+
if (e2 instanceof ServerError) {
|
|
134472
|
+
throw new Error(`Embedding API error: ${e2.statusCode}: ${e2.message}`);
|
|
134473
|
+
}
|
|
134474
|
+
throw new Error(`Embedding API request failed: ${e2 instanceof Error ? e2.message : String(e2)}`);
|
|
134432
134475
|
}
|
|
134433
134476
|
}
|
|
134434
134477
|
};
|
|
@@ -134469,6 +134512,9 @@ var OpensearchApiClient = class _OpensearchApiClient {
|
|
|
134469
134512
|
}
|
|
134470
134513
|
return instance;
|
|
134471
134514
|
}
|
|
134515
|
+
async ping() {
|
|
134516
|
+
await this.client.ping();
|
|
134517
|
+
}
|
|
134472
134518
|
async createIndex(indexDefinition) {
|
|
134473
134519
|
await this.client.indices.create({
|
|
134474
134520
|
index: indexDefinition.indexName,
|
|
@@ -134512,20 +134558,6 @@ var OpensearchApiClient = class _OpensearchApiClient {
|
|
|
134512
134558
|
}
|
|
134513
134559
|
});
|
|
134514
134560
|
}
|
|
134515
|
-
async searchDocuments(indexName, query) {
|
|
134516
|
-
const searchResult = await this.client.search({
|
|
134517
|
-
index: indexName,
|
|
134518
|
-
body: {
|
|
134519
|
-
query: {
|
|
134520
|
-
match: {
|
|
134521
|
-
text: query
|
|
134522
|
-
}
|
|
134523
|
-
}
|
|
134524
|
-
}
|
|
134525
|
-
});
|
|
134526
|
-
console.log(`Search query: ${query}, result: ${JSON.stringify(searchResult.body)}`);
|
|
134527
|
-
return searchResult.body;
|
|
134528
|
-
}
|
|
134529
134561
|
async deleteDocument(indexName, documentId) {
|
|
134530
134562
|
await this.client.delete({
|
|
134531
134563
|
index: indexName,
|
|
@@ -134540,6 +134572,12 @@ var OpensearchApiClient = class _OpensearchApiClient {
|
|
|
134540
134572
|
});
|
|
134541
134573
|
return response;
|
|
134542
134574
|
}
|
|
134575
|
+
async search(indexName, searchBody) {
|
|
134576
|
+
return this.client.search({
|
|
134577
|
+
index: indexName,
|
|
134578
|
+
body: searchBody
|
|
134579
|
+
});
|
|
134580
|
+
}
|
|
134543
134581
|
};
|
|
134544
134582
|
|
|
134545
134583
|
// ../../magda-semantic-indexer-framework/dist/semanticIndexerOptions.js
|
|
@@ -140043,7 +140081,7 @@ async function semanticIndexer(userConfig) {
|
|
|
140043
140081
|
async: true,
|
|
140044
140082
|
dereference: false,
|
|
140045
140083
|
includeEvents: false,
|
|
140046
|
-
includeRecords:
|
|
140084
|
+
includeRecords: true,
|
|
140047
140085
|
onRecordFound,
|
|
140048
140086
|
maxRetries: 3
|
|
140049
140087
|
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@magda/semantic-indexer-sdk",
|
|
3
3
|
"description": "MAGDA Semantic Indexer SDK",
|
|
4
|
-
"version": "6.0.0-alpha.
|
|
4
|
+
"version": "6.0.0-alpha.7",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
7
7
|
".": "./dist/index.js",
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"author": "",
|
|
24
24
|
"license": "Apache-2.0",
|
|
25
25
|
"devDependencies": {
|
|
26
|
-
"@magda/semantic-indexer-framework": "^6.0.0-alpha.
|
|
26
|
+
"@magda/semantic-indexer-framework": "^6.0.0-alpha.7",
|
|
27
27
|
"@microsoft/api-extractor": "~7.39.0",
|
|
28
28
|
"esbuild": "^0.19.10",
|
|
29
29
|
"ts-loader": "^9.5.1",
|