@magda/semantic-indexer-sdk 6.0.0-alpha.6 → 6.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { DeleteByQuery_Request } from '@opensearch-project/opensearch/api/_core/deleteByQuery.js';
2
2
  import { DeleteByQuery_Response } from '@opensearch-project/opensearch/api/_core/deleteByQuery.js';
3
- import { ResponseBody } from '@opensearch-project/opensearch/api/_types/_core.search.js';
3
+ import { Search_Response } from '@opensearch-project/opensearch/api/index.js';
4
4
  import { TransportRequestOptions } from '@opensearch-project/opensearch/lib/Transport.js';
5
5
  import { default as URI_2 } from 'urijs';
6
6
  import urijs from 'urijs';
@@ -205,14 +205,15 @@ declare class DeleteResult {
205
205
  }
206
206
 
207
207
  declare class EmbeddingApiClient extends BaseApiClient {
208
- private taskSize;
208
+ taskSize: number;
209
209
  constructor(options: EmbeddingApiClientConfig);
210
- private testConnection;
210
+ testConnection(): Promise<boolean>;
211
211
  get(text: string): Promise<number[]>;
212
212
  get(textList: string[]): Promise<number[][]>;
213
213
  }
214
214
 
215
215
  declare interface EmbeddingApiClientConfig extends BaseApiClientConfig {
216
+ taskSize?: number;
216
217
  }
217
218
 
218
219
  export declare type EmbeddingText = {
@@ -375,6 +376,7 @@ declare class OpensearchApiClient {
375
376
  private client;
376
377
  constructor(config: OpensearchConfig);
377
378
  static getOpensearchApiClient(openSearchConfig: OpensearchConfig): Promise<OpensearchApiClient>;
379
+ ping(): Promise<void>;
378
380
  createIndex(indexDefinition: {
379
381
  indexName: string;
380
382
  settings?: any;
@@ -384,9 +386,9 @@ declare class OpensearchApiClient {
384
386
  indexExists(indexName: string): Promise<boolean>;
385
387
  indexDocument(indexName: string, document: any): Promise<void>;
386
388
  bulkIndexDocument(indexName: string, documents: any[]): Promise<void>;
387
- searchDocuments(indexName: string, query: string): Promise<ResponseBody>;
388
389
  deleteDocument(indexName: string, documentId: string): Promise<void>;
389
390
  deleteByQuery(params: DeleteByQuery_Request, options?: TransportRequestOptions): Promise<DeleteByQuery_Response>;
391
+ search(indexName: string, searchBody: Record<string, unknown>): Promise<Search_Response>;
390
392
  }
391
393
 
392
394
  declare interface OpensearchConfig {
package/dist/index.js CHANGED
@@ -69051,9 +69051,9 @@ var require_Helpers = __commonJS({
69051
69051
  }
69052
69052
  });
69053
69053
 
69054
- // ../../node_modules/@opensearch-project/opensearch/node_modules/secure-json-parse/index.js
69054
+ // ../../node_modules/secure-json-parse/index.js
69055
69055
  var require_secure_json_parse = __commonJS({
69056
- "../../node_modules/@opensearch-project/opensearch/node_modules/secure-json-parse/index.js"(exports, module) {
69056
+ "../../node_modules/secure-json-parse/index.js"(exports, module) {
69057
69057
  "use strict";
69058
69058
  init_cjs_shim();
69059
69059
  var hasBuffer = typeof Buffer !== "undefined";
@@ -114030,7 +114030,7 @@ var getDefaultProjectName = () => {
114030
114030
  };
114031
114031
 
114032
114032
  // ../../magda-semantic-indexer-framework/node_modules/langsmith/dist/index.js
114033
- var __version__ = "0.3.49";
114033
+ var __version__ = "0.3.58";
114034
114034
 
114035
114035
  // ../../magda-semantic-indexer-framework/node_modules/langsmith/dist/utils/env.js
114036
114036
  var globalEnv;
@@ -115505,10 +115505,10 @@ var Client = class _Client {
115505
115505
  if (patch) {
115506
115506
  const sampled = [];
115507
115507
  for (const run of runs) {
115508
- if (!this.filteredPostUuids.has(run.id)) {
115508
+ if (!this.filteredPostUuids.has(run.trace_id)) {
115509
115509
  sampled.push(run);
115510
- } else {
115511
- this.filteredPostUuids.delete(run.id);
115510
+ } else if (run.id === run.trace_id) {
115511
+ this.filteredPostUuids.delete(run.trace_id);
115512
115512
  }
115513
115513
  }
115514
115514
  return sampled;
@@ -115880,8 +115880,8 @@ var Client = class _Client {
115880
115880
  ["patch", preparedUpdateParams]
115881
115881
  ]) {
115882
115882
  for (const originalPayload of payloads) {
115883
- const { inputs, outputs, events: events2, attachments, ...payload } = originalPayload;
115884
- const fields = { inputs, outputs, events: events2 };
115883
+ const { inputs, outputs, events: events2, extra, error, serialized, attachments, ...payload } = originalPayload;
115884
+ const fields = { inputs, outputs, events: events2, extra, error, serialized };
115885
115885
  const stringifiedPayload = serialize(payload, `Serializing for multipart ingestion of run with id: ${payload.id}`);
115886
115886
  accumulatedParts.push({
115887
115887
  name: `${method}.${payload.id}`,
@@ -118982,16 +118982,52 @@ function _getWriteReplicasFromEnv() {
118982
118982
  return [];
118983
118983
  try {
118984
118984
  const parsed = JSON.parse(envVar);
118985
- _checkEndpointEnvUnset(parsed);
118986
- return Object.entries(parsed).map(([url2, key]) => ({
118987
- apiUrl: url2.replace(/\/$/, ""),
118988
- apiKey: key
118989
- }));
118985
+ if (Array.isArray(parsed)) {
118986
+ const replicas = [];
118987
+ for (const item of parsed) {
118988
+ if (typeof item !== "object" || item === null) {
118989
+ console.warn(`Invalid item type in LANGSMITH_RUNS_ENDPOINTS: expected object, got ${typeof item}`);
118990
+ continue;
118991
+ }
118992
+ if (typeof item.api_url !== "string") {
118993
+ console.warn(`Invalid api_url type in LANGSMITH_RUNS_ENDPOINTS: expected string, got ${typeof item.api_url}`);
118994
+ continue;
118995
+ }
118996
+ if (typeof item.api_key !== "string") {
118997
+ console.warn(`Invalid api_key type in LANGSMITH_RUNS_ENDPOINTS: expected string, got ${typeof item.api_key}`);
118998
+ continue;
118999
+ }
119000
+ replicas.push({
119001
+ apiUrl: item.api_url.replace(/\/$/, ""),
119002
+ apiKey: item.api_key
119003
+ });
119004
+ }
119005
+ return replicas;
119006
+ } else if (typeof parsed === "object" && parsed !== null) {
119007
+ _checkEndpointEnvUnset(parsed);
119008
+ const replicas = [];
119009
+ for (const [url2, key] of Object.entries(parsed)) {
119010
+ const cleanUrl = url2.replace(/\/$/, "");
119011
+ if (typeof key === "string") {
119012
+ replicas.push({
119013
+ apiUrl: cleanUrl,
119014
+ apiKey: key
119015
+ });
119016
+ } else {
119017
+ console.warn(`Invalid value type in LANGSMITH_RUNS_ENDPOINTS for URL ${url2}: expected string, got ${typeof key}`);
119018
+ continue;
119019
+ }
119020
+ }
119021
+ return replicas;
119022
+ } else {
119023
+ console.warn(`Invalid LANGSMITH_RUNS_ENDPOINTS \u2013 must be valid JSON array of objects with api_url and api_key properties, or object mapping url->apiKey, got ${typeof parsed}`);
119024
+ return [];
119025
+ }
118990
119026
  } catch (e2) {
118991
119027
  if (isConflictingEndpointsError(e2)) {
118992
119028
  throw e2;
118993
119029
  }
118994
- console.warn("Invalid LANGSMITH_RUNS_ENDPOINTS \u2013 must be valid JSON mapping of url->apiKey");
119030
+ console.warn("Invalid LANGSMITH_RUNS_ENDPOINTS \u2013 must be valid JSON array of objects with api_url and api_key properties, or object mapping url->apiKey");
118995
119031
  return [];
118996
119032
  }
118997
119033
  }
@@ -120458,10 +120494,9 @@ var AIMessageChunk = class _AIMessageChunk extends BaseMessageChunk {
120458
120494
  };
120459
120495
  } else {
120460
120496
  const groupedToolCallChunk = fields.tool_call_chunks.reduce((acc, chunk) => {
120461
- if (!chunk.id)
120462
- return acc;
120463
- acc[chunk.id] = acc[chunk.id] ?? [];
120464
- acc[chunk.id].push(chunk);
120497
+ const chunkId = chunk.id || `fallback-${chunk.index || 0}`;
120498
+ acc[chunkId] = acc[chunkId] ?? [];
120499
+ acc[chunkId].push(chunk);
120465
120500
  return acc;
120466
120501
  }, {});
120467
120502
  const toolCalls = [];
@@ -120471,6 +120506,7 @@ var AIMessageChunk = class _AIMessageChunk extends BaseMessageChunk {
120471
120506
  const name = chunks[0]?.name ?? "";
120472
120507
  const joinedArgs = chunks.map((c) => c.args || "").join("");
120473
120508
  const argsStr = joinedArgs.length ? joinedArgs : "{}";
120509
+ const originalId = chunks[0]?.id || id;
120474
120510
  try {
120475
120511
  parsedArgs = parsePartialJson(argsStr);
120476
120512
  if (parsedArgs === null || typeof parsedArgs !== "object" || Array.isArray(parsedArgs)) {
@@ -120479,14 +120515,14 @@ var AIMessageChunk = class _AIMessageChunk extends BaseMessageChunk {
120479
120515
  toolCalls.push({
120480
120516
  name,
120481
120517
  args: parsedArgs,
120482
- id,
120518
+ id: originalId,
120483
120519
  type: "tool_call"
120484
120520
  });
120485
120521
  } catch (e2) {
120486
120522
  invalidToolCalls.push({
120487
120523
  name,
120488
120524
  args: argsStr,
120489
- id,
120525
+ id: originalId,
120490
120526
  error: "Malformed args.",
120491
120527
  type: "invalid_tool_call"
120492
120528
  });
@@ -134405,30 +134441,37 @@ var EmbeddingApiClient = class extends BaseApiClient {
134405
134441
  constructor(options) {
134406
134442
  options.baseApiUrl = options.baseApiUrl || "http://localhost:3000";
134407
134443
  super(options);
134444
+ this.taskSize = options.taskSize || 10;
134408
134445
  this.testConnection();
134409
134446
  }
134410
134447
  async testConnection() {
134411
134448
  try {
134412
134449
  await this.get("test");
134413
- console.log(`Successfully connected to embedding API: ${this.getBaseApiUri().toString()}`);
134450
+ return true;
134414
134451
  } catch (err) {
134415
134452
  throw new Error(`Failed to connect to embedding API: ${this.getBaseApiUri().toString()}, error: ${err}`);
134416
134453
  }
134417
134454
  }
134418
134455
  async get(input) {
134419
- if (Array.isArray(input)) {
134420
- const result = [];
134421
- for (let i2 = 0; i2 < input.length; i2 += this.taskSize) {
134422
- const chunk = input.slice(i2, i2 + this.taskSize);
134423
- const body = { input: chunk };
134424
- const response = await fetchRequest("post", this.getBaseApiUri().segmentCoded("v1").segmentCoded("embeddings").toString(), body, "application/json", false, this.addAuthHeader());
134425
- result.push(...response.data.map((d) => d.embedding));
134456
+ const url2 = this.getBaseApiUri().segmentCoded("v1").segmentCoded("embeddings").toString();
134457
+ try {
134458
+ if (Array.isArray(input)) {
134459
+ const result = [];
134460
+ for (let i2 = 0; i2 < input.length; i2 += this.taskSize) {
134461
+ const chunk = input.slice(i2, i2 + this.taskSize);
134462
+ const res = await fetchRequest("post", url2, { input: chunk }, "application/json", false, this.addAuthHeader());
134463
+ result.push(...res.data.map((d) => d.embedding));
134464
+ }
134465
+ return result;
134466
+ } else {
134467
+ const res = await fetchRequest("post", url2, { input }, "application/json", false, this.addAuthHeader());
134468
+ return res.data[0].embedding;
134426
134469
  }
134427
- return result;
134428
- } else {
134429
- const body = { input };
134430
- const response = await fetchRequest("post", this.getBaseApiUri().segmentCoded("v1").segmentCoded("embeddings").toString(), body, "application/json", false, this.addAuthHeader());
134431
- return response.data[0].embedding;
134470
+ } catch (e2) {
134471
+ if (e2 instanceof ServerError) {
134472
+ throw new Error(`Embedding API error: ${e2.statusCode}: ${e2.message}`);
134473
+ }
134474
+ throw new Error(`Embedding API request failed: ${e2 instanceof Error ? e2.message : String(e2)}`);
134432
134475
  }
134433
134476
  }
134434
134477
  };
@@ -134469,6 +134512,9 @@ var OpensearchApiClient = class _OpensearchApiClient {
134469
134512
  }
134470
134513
  return instance;
134471
134514
  }
134515
+ async ping() {
134516
+ await this.client.ping();
134517
+ }
134472
134518
  async createIndex(indexDefinition) {
134473
134519
  await this.client.indices.create({
134474
134520
  index: indexDefinition.indexName,
@@ -134512,20 +134558,6 @@ var OpensearchApiClient = class _OpensearchApiClient {
134512
134558
  }
134513
134559
  });
134514
134560
  }
134515
- async searchDocuments(indexName, query) {
134516
- const searchResult = await this.client.search({
134517
- index: indexName,
134518
- body: {
134519
- query: {
134520
- match: {
134521
- text: query
134522
- }
134523
- }
134524
- }
134525
- });
134526
- console.log(`Search query: ${query}, result: ${JSON.stringify(searchResult.body)}`);
134527
- return searchResult.body;
134528
- }
134529
134561
  async deleteDocument(indexName, documentId) {
134530
134562
  await this.client.delete({
134531
134563
  index: indexName,
@@ -134540,6 +134572,12 @@ var OpensearchApiClient = class _OpensearchApiClient {
134540
134572
  });
134541
134573
  return response;
134542
134574
  }
134575
+ async search(indexName, searchBody) {
134576
+ return this.client.search({
134577
+ index: indexName,
134578
+ body: searchBody
134579
+ });
134580
+ }
134543
134581
  };
134544
134582
 
134545
134583
  // ../../magda-semantic-indexer-framework/dist/semanticIndexerOptions.js
@@ -140043,7 +140081,7 @@ async function semanticIndexer(userConfig) {
140043
140081
  async: true,
140044
140082
  dereference: false,
140045
140083
  includeEvents: false,
140046
- includeRecords: false,
140084
+ includeRecords: true,
140047
140085
  onRecordFound,
140048
140086
  maxRetries: 3
140049
140087
  };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@magda/semantic-indexer-sdk",
3
3
  "description": "MAGDA Semantic Indexer SDK",
4
- "version": "6.0.0-alpha.6",
4
+ "version": "6.0.0-alpha.7",
5
5
  "type": "module",
6
6
  "exports": {
7
7
  ".": "./dist/index.js",
@@ -23,7 +23,7 @@
23
23
  "author": "",
24
24
  "license": "Apache-2.0",
25
25
  "devDependencies": {
26
- "@magda/semantic-indexer-framework": "^6.0.0-alpha.6",
26
+ "@magda/semantic-indexer-framework": "^6.0.0-alpha.7",
27
27
  "@microsoft/api-extractor": "~7.39.0",
28
28
  "esbuild": "^0.19.10",
29
29
  "ts-loader": "^9.5.1",