npm - @superatomai/sdk-node - Versions diffs - 0.0.14-mds → 0.0.15-mds - Mend

@superatomai/sdk-node 0.0.14-mds → 0.0.15-mds

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -2193,11 +2193,16 @@ interface ExternalTool {
     toolType?: 'source' | 'direct';
     /** Full untruncated schema for source agent (all columns visible) */
     fullSchema?: string;
+    /** Schema size tier: small (≤50 tables), medium (51-200), large (201-500), very_large (500+) */
+    schemaTier?: string;
+    /** Schema search function for very_large tier — keyword search over entities */
+    schemaSearchFn?: (keywords: string[]) => string;
     fn: (input: any) => Promise<any>;
     limit?: number;
     outputSchema?: any;
     executionType?: 'immediate' | 'deferred';
     userProvidedData?: any;
+    params?: Record<string, any>;
 }
 /**
  * Executed tool tracking info

package/dist/index.d.ts CHANGED Viewed

@@ -2193,11 +2193,16 @@ interface ExternalTool {
     toolType?: 'source' | 'direct';
     /** Full untruncated schema for source agent (all columns visible) */
     fullSchema?: string;
+    /** Schema size tier: small (≤50 tables), medium (51-200), large (201-500), very_large (500+) */
+    schemaTier?: string;
+    /** Schema search function for very_large tier — keyword search over entities */
+    schemaSearchFn?: (keywords: string[]) => string;
     fn: (input: any) => Promise<any>;
     limit?: number;
     outputSchema?: any;
     executionType?: 'immediate' | 'deferred';
     userProvidedData?: any;
+    params?: Record<string, any>;
 }
 /**
  * Executed tool tracking info

package/dist/index.js CHANGED Viewed

@@ -6886,6 +6886,7 @@ function formatSummariesForPrompt(summaries) {
 }
 function extractEntityDetails(description) {
   const details = [];
+  const seenNames = /* @__PURE__ */ new Set();
   const bulletSections = description.split(/(?=•\s)/);
   for (const section of bulletSections) {
     if (!section.trim().startsWith("\u2022")) continue;
@@ -6899,6 +6900,31 @@ function extractEntityDetails(description) {
       rowCount: rowCount > 0 ? rowCount : void 0,
       columns
     });
+    seenNames.add(name.toLowerCase());
+  }
+  const catalogMatch = description.match(/(?:All \d+ tables|Other tables)[^:]*:\n?([\s\S]*?)(?:\n\n|$)/i);
+  if (catalogMatch) {
+    const catalogText = catalogMatch[1];
+    const entryPattern = /([\w.]+)\(([\d.]+[KMB]?)\s*rows?\)/gi;
+    let match;
+    while ((match = entryPattern.exec(catalogText)) !== null) {
+      const fullName = match[1];
+      const name = fullName.includes(".") ? fullName.split(".").pop() : fullName;
+      if (seenNames.has(name.toLowerCase())) continue;
+      const rowStr = match[2];
+      let rowCount = 0;
+      if (rowStr.endsWith("B")) rowCount = parseFloat(rowStr) * 1e9;
+      else if (rowStr.endsWith("M")) rowCount = parseFloat(rowStr) * 1e6;
+      else if (rowStr.endsWith("K")) rowCount = parseFloat(rowStr) * 1e3;
+      else rowCount = parseInt(rowStr, 10);
+      details.push({
+        name,
+        rowCount: rowCount > 0 ? Math.round(rowCount) : void 0,
+        columns: []
+        // Catalog entries have no column details
+      });
+      seenNames.add(name.toLowerCase());
+    }
   }
   if (details.length === 0) {
     const endpointPattern = /Endpoint:\s*(\S+)/g;
@@ -6976,6 +7002,9 @@ var SourceAgent = class {
    * 2. Source agent's OWN LLM generates query via tool calling
    * 3. Execute with retry — all handled internally
    * 4. Return SourceAgentResult with data + isLimited metadata
+   *
+   * For very_large tier: LLM first calls search_schema to discover tables,
+   * then writes SQL with correct table/column names.
    */
   async execute(input) {
     const startTime = Date.now();
@@ -6995,12 +7024,33 @@ var SourceAgent = class {
       const prompts = await this.buildPrompt(intent, aggregation);
       logger.logLLMPrompt(`sourceAgent:${this.tool.name}`, "system", extractPromptText(prompts.system));
       logger.logLLMPrompt(`sourceAgent:${this.tool.name}`, "user", prompts.user);
-      const llmTool = this.buildLLMToolDefinition();
+      const tools = this.buildToolDefinitions();
       let executedTool = null;
       let resultData = [];
       let queryExecuted;
       let totalRowsMatched = 0;
-      const toolHandler = async (_toolName, toolInput) => {
+      const schemaSearchFn = this.tool.schemaSearchFn;
+      const toolHandler = async (toolName, toolInput) => {
+        if (toolName.endsWith("_search_schema") && schemaSearchFn) {
+          const keywords = toolInput.keywords || [];
+          logger.info(`[SourceAgent:${this.tool.name}] Schema search: ${keywords.join(", ")}`);
+          if (this.streamBuffer.hasCallback()) {
+            this.streamBuffer.write(`\u{1F50D} **Searching schema for:** ${keywords.join(", ")}
+`);
+            await streamDelay();
+          }
+          const result = schemaSearchFn(keywords);
+          if (this.streamBuffer.hasCallback()) {
+            const matchCount = (result.match(/^• /gm) || []).length;
+            this.streamBuffer.write(`\u{1F4CB} Found ${matchCount} matching table(s)
+`);
+          }
+          return `Schema search results:
+${result}`;
+        }
         this.attempts++;
         if (this.attempts > this.config.maxRetries) {
           throw new Error(`Max retry attempts (${this.config.maxRetries}) reached for ${this.tool.name}`);
@@ -7089,10 +7139,11 @@ ${formatted}`;
 Analyze the error and try again with a corrected query.`;
         }
       };
-      const maxIterations = this.config.maxRetries + 2;
+      const hasSchemaSearch = !!schemaSearchFn;
+      const maxIterations = this.config.maxRetries + 2 + (hasSchemaSearch ? 2 : 0);
       await LLM.streamWithTools(
         { sys: prompts.system, user: prompts.user },
-        [llmTool],
+        tools,
         toolHandler,
         {
           model: this.config.sourceAgentModel || void 0,
@@ -7173,10 +7224,19 @@ Analyze the error and try again with a corrected query.`;
     const sourceName = this.tool.name;
     const sourceType = this.extractSourceType();
     const fullSchema = this.tool.fullSchema || this.tool.description || "No schema available";
+    const hasSchemaSearch = !!this.tool.schemaSearchFn;
+    const schemaSearchInstructions = hasSchemaSearch ? `## Schema Search
+This source has a large schema. The schema above shows a catalog of table names only.
+Before writing any query, you MUST use the search_schema tool to find exact table and column names.
+1. Search with keywords related to the requested data
+2. Review the returned column details carefully
+3. Then write your SQL query using the exact names from the search results
+You may search multiple times with different keywords if the first search doesn't find what you need.` : "";
     const prompts = await promptLoader.loadPrompts("agent-source-query", {
       SOURCE_NAME: sourceName,
       SOURCE_TYPE: sourceType,
       FULL_SCHEMA: fullSchema,
+      SCHEMA_SEARCH_INSTRUCTIONS: schemaSearchInstructions,
       MAX_ROWS: String(this.config.maxRowsPerSource),
       AGGREGATION_MODE: aggregation,
       GLOBAL_KNOWLEDGE_BASE: this.config.globalKnowledgeBase || "No global knowledge base available.",
@@ -7187,7 +7247,18 @@ Analyze the error and try again with a corrected query.`;
     return { system: prompts.system, user: prompts.user };
   }
   /**
-   * Build the LLM tool definition from the external tool.
+   * Build all LLM tool definitions for this source agent.
+   * Returns: [queryTool] for most tiers, [queryTool, searchSchemaTool] for very_large.
+   */
+  buildToolDefinitions() {
+    const tools = [this.buildLLMToolDefinition()];
+    if (this.tool.schemaSearchFn) {
+      tools.push(this.buildSchemaSearchToolDefinition());
+    }
+    return tools;
+  }
+  /**
+   * Build the query tool definition from the external tool.
    * Parses param descriptions like "string - Sheet name" or "array (optional) - Columns"
    * to extract the correct JSON schema type and required/optional status.
    */
@@ -7229,6 +7300,27 @@ Analyze the error and try again with a corrected query.`;
       }
     };
   }
+  /**
+   * Build the search_schema tool definition for very_large tier sources.
+   * Allows the LLM to discover table/column names before writing SQL.
+   */
+  buildSchemaSearchToolDefinition() {
+    return {
+      name: `${this.tool.id}_search_schema`,
+      description: "Search the database schema for tables and columns matching keywords. Returns full column details for matching tables. Use this BEFORE writing a query to find exact table and column names.",
+      input_schema: {
+        type: "object",
+        properties: {
+          keywords: {
+            type: "array",
+            items: { type: "string" },
+            description: 'Keywords to search for in table names and column names (e.g., ["quotation", "amount", "customer"]). Use multiple keywords for broader results.'
+          }
+        },
+        required: ["keywords"]
+      }
+    };
+  }
   /**
    * Extract source type from tool ID.
    */
@@ -10177,6 +10269,8 @@ var get_agent_user_response = async (prompt, components, anthropicApiKey, groqAp
       description: t.description,
       toolType: t.toolType,
       fullSchema: t.fullSchema,
+      schemaTier: t.schemaTier,
+      schemaSearchFn: t.schemaSearchFn,
       fn: t.fn,
       limit: t.limit,
       outputSchema: t.outputSchema,