@superatomai/sdk-node 0.0.14-mds → 0.0.15-mds

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -2193,11 +2193,16 @@ interface ExternalTool {
2193
2193
  toolType?: 'source' | 'direct';
2194
2194
  /** Full untruncated schema for source agent (all columns visible) */
2195
2195
  fullSchema?: string;
2196
+ /** Schema size tier: small (≤50 tables), medium (51-200), large (201-500), very_large (500+) */
2197
+ schemaTier?: string;
2198
+ /** Schema search function for very_large tier — keyword search over entities */
2199
+ schemaSearchFn?: (keywords: string[]) => string;
2196
2200
  fn: (input: any) => Promise<any>;
2197
2201
  limit?: number;
2198
2202
  outputSchema?: any;
2199
2203
  executionType?: 'immediate' | 'deferred';
2200
2204
  userProvidedData?: any;
2205
+ params?: Record<string, any>;
2201
2206
  }
2202
2207
  /**
2203
2208
  * Executed tool tracking info
package/dist/index.d.ts CHANGED
@@ -2193,11 +2193,16 @@ interface ExternalTool {
2193
2193
  toolType?: 'source' | 'direct';
2194
2194
  /** Full untruncated schema for source agent (all columns visible) */
2195
2195
  fullSchema?: string;
2196
+ /** Schema size tier: small (≤50 tables), medium (51-200), large (201-500), very_large (500+) */
2197
+ schemaTier?: string;
2198
+ /** Schema search function for very_large tier — keyword search over entities */
2199
+ schemaSearchFn?: (keywords: string[]) => string;
2196
2200
  fn: (input: any) => Promise<any>;
2197
2201
  limit?: number;
2198
2202
  outputSchema?: any;
2199
2203
  executionType?: 'immediate' | 'deferred';
2200
2204
  userProvidedData?: any;
2205
+ params?: Record<string, any>;
2201
2206
  }
2202
2207
  /**
2203
2208
  * Executed tool tracking info
package/dist/index.js CHANGED
@@ -6886,6 +6886,7 @@ function formatSummariesForPrompt(summaries) {
6886
6886
  }
6887
6887
  function extractEntityDetails(description) {
6888
6888
  const details = [];
6889
+ const seenNames = /* @__PURE__ */ new Set();
6889
6890
  const bulletSections = description.split(/(?=•\s)/);
6890
6891
  for (const section of bulletSections) {
6891
6892
  if (!section.trim().startsWith("\u2022")) continue;
@@ -6899,6 +6900,31 @@ function extractEntityDetails(description) {
6899
6900
  rowCount: rowCount > 0 ? rowCount : void 0,
6900
6901
  columns
6901
6902
  });
6903
+ seenNames.add(name.toLowerCase());
6904
+ }
6905
+ const catalogMatch = description.match(/(?:All \d+ tables|Other tables)[^:]*:\n?([\s\S]*?)(?:\n\n|$)/i);
6906
+ if (catalogMatch) {
6907
+ const catalogText = catalogMatch[1];
6908
+ const entryPattern = /([\w.]+)\(([\d.]+[KMB]?)\s*rows?\)/gi;
6909
+ let match;
6910
+ while ((match = entryPattern.exec(catalogText)) !== null) {
6911
+ const fullName = match[1];
6912
+ const name = fullName.includes(".") ? fullName.split(".").pop() : fullName;
6913
+ if (seenNames.has(name.toLowerCase())) continue;
6914
+ const rowStr = match[2];
6915
+ let rowCount = 0;
6916
+ if (rowStr.endsWith("B")) rowCount = parseFloat(rowStr) * 1e9;
6917
+ else if (rowStr.endsWith("M")) rowCount = parseFloat(rowStr) * 1e6;
6918
+ else if (rowStr.endsWith("K")) rowCount = parseFloat(rowStr) * 1e3;
6919
+ else rowCount = parseInt(rowStr, 10);
6920
+ details.push({
6921
+ name,
6922
+ rowCount: rowCount > 0 ? Math.round(rowCount) : void 0,
6923
+ columns: []
6924
+ // Catalog entries have no column details
6925
+ });
6926
+ seenNames.add(name.toLowerCase());
6927
+ }
6902
6928
  }
6903
6929
  if (details.length === 0) {
6904
6930
  const endpointPattern = /Endpoint:\s*(\S+)/g;
@@ -6976,6 +7002,9 @@ var SourceAgent = class {
6976
7002
  * 2. Source agent's OWN LLM generates query via tool calling
6977
7003
  * 3. Execute with retry — all handled internally
6978
7004
  * 4. Return SourceAgentResult with data + isLimited metadata
7005
+ *
7006
+ * For very_large tier: LLM first calls search_schema to discover tables,
7007
+ * then writes SQL with correct table/column names.
6979
7008
  */
6980
7009
  async execute(input) {
6981
7010
  const startTime = Date.now();
@@ -6995,12 +7024,33 @@ var SourceAgent = class {
6995
7024
  const prompts = await this.buildPrompt(intent, aggregation);
6996
7025
  logger.logLLMPrompt(`sourceAgent:${this.tool.name}`, "system", extractPromptText(prompts.system));
6997
7026
  logger.logLLMPrompt(`sourceAgent:${this.tool.name}`, "user", prompts.user);
6998
- const llmTool = this.buildLLMToolDefinition();
7027
+ const tools = this.buildToolDefinitions();
6999
7028
  let executedTool = null;
7000
7029
  let resultData = [];
7001
7030
  let queryExecuted;
7002
7031
  let totalRowsMatched = 0;
7003
- const toolHandler = async (_toolName, toolInput) => {
7032
+ const schemaSearchFn = this.tool.schemaSearchFn;
7033
+ const toolHandler = async (toolName, toolInput) => {
7034
+ if (toolName.endsWith("_search_schema") && schemaSearchFn) {
7035
+ const keywords = toolInput.keywords || [];
7036
+ logger.info(`[SourceAgent:${this.tool.name}] Schema search: ${keywords.join(", ")}`);
7037
+ if (this.streamBuffer.hasCallback()) {
7038
+ this.streamBuffer.write(`\u{1F50D} **Searching schema for:** ${keywords.join(", ")}
7039
+
7040
+ `);
7041
+ await streamDelay();
7042
+ }
7043
+ const result = schemaSearchFn(keywords);
7044
+ if (this.streamBuffer.hasCallback()) {
7045
+ const matchCount = (result.match(/^• /gm) || []).length;
7046
+ this.streamBuffer.write(`\u{1F4CB} Found ${matchCount} matching table(s)
7047
+
7048
+ `);
7049
+ }
7050
+ return `Schema search results:
7051
+
7052
+ ${result}`;
7053
+ }
7004
7054
  this.attempts++;
7005
7055
  if (this.attempts > this.config.maxRetries) {
7006
7056
  throw new Error(`Max retry attempts (${this.config.maxRetries}) reached for ${this.tool.name}`);
@@ -7089,10 +7139,11 @@ ${formatted}`;
7089
7139
  Analyze the error and try again with a corrected query.`;
7090
7140
  }
7091
7141
  };
7092
- const maxIterations = this.config.maxRetries + 2;
7142
+ const hasSchemaSearch = !!schemaSearchFn;
7143
+ const maxIterations = this.config.maxRetries + 2 + (hasSchemaSearch ? 2 : 0);
7093
7144
  await LLM.streamWithTools(
7094
7145
  { sys: prompts.system, user: prompts.user },
7095
- [llmTool],
7146
+ tools,
7096
7147
  toolHandler,
7097
7148
  {
7098
7149
  model: this.config.sourceAgentModel || void 0,
@@ -7173,10 +7224,19 @@ Analyze the error and try again with a corrected query.`;
7173
7224
  const sourceName = this.tool.name;
7174
7225
  const sourceType = this.extractSourceType();
7175
7226
  const fullSchema = this.tool.fullSchema || this.tool.description || "No schema available";
7227
+ const hasSchemaSearch = !!this.tool.schemaSearchFn;
7228
+ const schemaSearchInstructions = hasSchemaSearch ? `## Schema Search
7229
+ This source has a large schema. The schema above shows a catalog of table names only.
7230
+ Before writing any query, you MUST use the search_schema tool to find exact table and column names.
7231
+ 1. Search with keywords related to the requested data
7232
+ 2. Review the returned column details carefully
7233
+ 3. Then write your SQL query using the exact names from the search results
7234
+ You may search multiple times with different keywords if the first search doesn't find what you need.` : "";
7176
7235
  const prompts = await promptLoader.loadPrompts("agent-source-query", {
7177
7236
  SOURCE_NAME: sourceName,
7178
7237
  SOURCE_TYPE: sourceType,
7179
7238
  FULL_SCHEMA: fullSchema,
7239
+ SCHEMA_SEARCH_INSTRUCTIONS: schemaSearchInstructions,
7180
7240
  MAX_ROWS: String(this.config.maxRowsPerSource),
7181
7241
  AGGREGATION_MODE: aggregation,
7182
7242
  GLOBAL_KNOWLEDGE_BASE: this.config.globalKnowledgeBase || "No global knowledge base available.",
@@ -7187,7 +7247,18 @@ Analyze the error and try again with a corrected query.`;
7187
7247
  return { system: prompts.system, user: prompts.user };
7188
7248
  }
7189
7249
  /**
7190
- * Build the LLM tool definition from the external tool.
7250
+ * Build all LLM tool definitions for this source agent.
7251
+ * Returns: [queryTool] for most tiers, [queryTool, searchSchemaTool] for very_large.
7252
+ */
7253
+ buildToolDefinitions() {
7254
+ const tools = [this.buildLLMToolDefinition()];
7255
+ if (this.tool.schemaSearchFn) {
7256
+ tools.push(this.buildSchemaSearchToolDefinition());
7257
+ }
7258
+ return tools;
7259
+ }
7260
+ /**
7261
+ * Build the query tool definition from the external tool.
7191
7262
  * Parses param descriptions like "string - Sheet name" or "array (optional) - Columns"
7192
7263
  * to extract the correct JSON schema type and required/optional status.
7193
7264
  */
@@ -7229,6 +7300,27 @@ Analyze the error and try again with a corrected query.`;
7229
7300
  }
7230
7301
  };
7231
7302
  }
7303
+ /**
7304
+ * Build the search_schema tool definition for very_large tier sources.
7305
+ * Allows the LLM to discover table/column names before writing SQL.
7306
+ */
7307
+ buildSchemaSearchToolDefinition() {
7308
+ return {
7309
+ name: `${this.tool.id}_search_schema`,
7310
+ description: "Search the database schema for tables and columns matching keywords. Returns full column details for matching tables. Use this BEFORE writing a query to find exact table and column names.",
7311
+ input_schema: {
7312
+ type: "object",
7313
+ properties: {
7314
+ keywords: {
7315
+ type: "array",
7316
+ items: { type: "string" },
7317
+ description: 'Keywords to search for in table names and column names (e.g., ["quotation", "amount", "customer"]). Use multiple keywords for broader results.'
7318
+ }
7319
+ },
7320
+ required: ["keywords"]
7321
+ }
7322
+ };
7323
+ }
7232
7324
  /**
7233
7325
  * Extract source type from tool ID.
7234
7326
  */
@@ -10177,6 +10269,8 @@ var get_agent_user_response = async (prompt, components, anthropicApiKey, groqAp
10177
10269
  description: t.description,
10178
10270
  toolType: t.toolType,
10179
10271
  fullSchema: t.fullSchema,
10272
+ schemaTier: t.schemaTier,
10273
+ schemaSearchFn: t.schemaSearchFn,
10180
10274
  fn: t.fn,
10181
10275
  limit: t.limit,
10182
10276
  outputSchema: t.outputSchema,