npm - @exulu/backend - Versions diffs - 1.53.1 → 1.54.0 - Mend

@exulu/backend 1.53.1 → 1.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/index.cjs +3404 -2389
package/dist/index.d.cts +66 -4
package/dist/index.d.ts +66 -4
package/dist/index.js +4926 -3918
package/ee/agentic-retrieval/ANALYSIS.md +658 -0
package/ee/agentic-retrieval/logs/README.md +198 -0
package/ee/agentic-retrieval/v2.ts +1628 -0
package/ee/agentic-retrieval/v3/agent-loop.ts +242 -0
package/ee/agentic-retrieval/v3/classifier.ts +73 -0
package/ee/agentic-retrieval/v3/context-sampler.ts +70 -0
package/ee/agentic-retrieval/v3/dynamic-tools.ts +115 -0
package/ee/agentic-retrieval/v3/index.ts +281 -0
package/ee/agentic-retrieval/v3/strategies.ts +167 -0
package/ee/agentic-retrieval/v3/tools.ts +435 -0
package/ee/agentic-retrieval/v3/trajectory.ts +96 -0
package/ee/agentic-retrieval/v3/types.ts +59 -0
package/ee/agentic-retrieval/v4/agent-loop.ts +121 -0
package/ee/agentic-retrieval/v4/embed-preprocessor.ts +76 -0
package/ee/agentic-retrieval/v4/index.ts +181 -0
package/ee/agentic-retrieval/v4/system-prompt.ts +248 -0
package/ee/agentic-retrieval/v4/tools.ts +241 -0
package/ee/agentic-retrieval/v4/types.ts +29 -0
package/ee/chunking/markdown.ts +4 -2
package/ee/workers.ts +1 -1
package/package.json +6 -3

package/ee/agentic-retrieval/v4/agent-loop.ts ADDED Viewed

@@ -0,0 +1,121 @@
+import { generateText, stepCountIs } from "ai";
+import type { LanguageModel, Tool as AITool, ModelMessage } from "ai";
+import { withRetry } from "@SRC/utils/with-retry";
+import { harvestChunks } from "./tools";
+import type { AgenticRetrievalOutput, ChunkResult } from "./types";
+const MAX_STEPS = 10;
+/**
+ * Observe → Infer → Act loop for V4 agentic retrieval.
+ *
+ * Unlike V3 (which pre-classifies, routes to strategies, and forces tool calls),
+ * this loop simply:
+ *  1. Calls the model with toolChoice "auto"
+ *  2. Executes whatever tools the model picks
+ *  3. Harvests any chunk-shaped rows from query results
+ *  4. Repeats until the model produces a text response (no tool calls) or
+ *     the MAX_STEPS budget is exhausted
+ *
+ * The model decides when it has enough information — no finish_retrieval tool needed.
+ */
+export async function* runAgentLoop(params: {
+  query: string;
+  systemPrompt: string;
+  tools: Record<string, AITool>;
+  model: LanguageModel;
+  onStepComplete?: (step: AgenticRetrievalOutput["steps"][0]) => void;
+}): AsyncGenerator<AgenticRetrievalOutput> {
+  const { query, systemPrompt, tools, model, onStepComplete } = params;
+  const output: AgenticRetrievalOutput = {
+    steps: [],
+    reasoning: [],
+    chunks: [],
+    usage: [],
+    totalTokens: 0,
+  };
+  // Deduplicate chunks by chunk_id across all steps
+  const seenChunkIds = new Set<string>();
+  const messages: ModelMessage[] = [{ role: "user", content: query }];
+  for (let step = 0; step < MAX_STEPS; step++) {
+    console.log(`[EXULU] v4 agent loop — step ${step + 1}/${MAX_STEPS}`);
+    let result: Awaited<ReturnType<typeof generateText>>;
+    try {
+      result = await withRetry(() =>
+        generateText({
+          model,
+          temperature: 0,
+          system: systemPrompt,
+          messages,
+          tools,
+          toolChoice: "auto",
+          stopWhen: stepCountIs(1),
+        }),
+      );
+    } catch (err) {
+      console.error("[EXULU] v4 generateText failed:", err);
+      throw err;
+    }
+    // Append assistant turn + tool results to conversation history
+    messages.push(...(result.response.messages as ModelMessage[]));
+    // Harvest chunks from any execute_query tool results
+    const rawToolResults = (result.toolResults as any[]) ?? [];
+    const stepChunks: ChunkResult[] = [];
+    for (const chunk of harvestChunks(rawToolResults)) {
+      if (!chunk.chunk_id || !seenChunkIds.has(chunk.chunk_id)) {
+        if (chunk.chunk_id) seenChunkIds.add(chunk.chunk_id);
+        stepChunks.push(chunk);
+      }
+    }
+    // Record step
+    const stepRecord: AgenticRetrievalOutput["steps"][0] = {
+      stepNumber: step + 1,
+      text: result.text ?? "",
+      toolCalls:
+        (result.toolCalls as any[])?.map((tc) => ({
+          name: tc.toolName,
+          id: tc.toolCallId,
+          input: tc.input,
+        })) ?? [],
+      chunks: stepChunks,
+      tokens: result.usage?.totalTokens ?? 0,
+    };
+    output.steps.push(stepRecord);
+    output.reasoning.push({
+      text: result.text ?? "",
+      tools:
+        (result.toolCalls as any[])?.map((tc) => ({
+          name: tc.toolName,
+          id: tc.toolCallId,
+          input: tc.input,
+          output: rawToolResults.find(
+            (r: any) => (r.toolCallId ?? r.id) === tc.toolCallId,
+          )?.output,
+        })) ?? [],
+    });
+    output.chunks.push(...stepChunks);
+    output.usage.push(result.usage);
+    onStepComplete?.(stepRecord);
+    yield { ...output };
+    // Stop when the model wrote a text response without calling any tools
+    const calledTools = (result.toolCalls as any[])?.length > 0;
+    if (!calledTools) {
+      console.log(`[EXULU] v4 — model finished after step ${step + 1} (no tool calls)`);
+      break;
+    }
+  }
+  output.totalTokens = output.usage.reduce((sum, u) => sum + (u?.totalTokens ?? 0), 0);
+}

package/ee/agentic-retrieval/v4/embed-preprocessor.ts ADDED Viewed

@@ -0,0 +1,76 @@
+import type { ExuluContext } from "@SRC/exulu/context";
+import type { User } from "@EXULU_TYPES/models/user";
+/**
+ * Finds embed('text') or embed('text', 'contextId') calls in a SQL string,
+ * generates the embedding vectors using the appropriate context's embedder,
+ * and substitutes them with ARRAY[...]::vector literals so db.raw() can execute it.
+ *
+ * Examples:
+ *   embed('machine learning')         → uses first context that has an embedder
+ *   embed('machine learning', 'ctx1') → uses the embedder from context 'ctx1'
+ */
+export async function preprocessEmbedCalls(
+  sql: string,
+  contexts: ExuluContext[],
+  user?: User,
+  role?: string,
+): Promise<string> {
+  // Match embed('...') or embed('...', 'contextId')
+  // We use a global regex but process matches manually so we can await async calls
+  const EMBED_RE = /embed\('((?:[^'\\]|\\.)*)'\s*(?:,\s*'((?:[^'\\]|\\.)*)')?\)/gi;
+  const matches: { fullMatch: string; text: string; contextId?: string; index: number }[] = [];
+  let m: RegExpExecArray | null;
+  while ((m = EMBED_RE.exec(sql)) !== null) {
+    matches.push({
+      fullMatch: m[0],
+      text: m[1],
+      contextId: m[2] || undefined,
+      index: m.index,
+    });
+  }
+  if (matches.length === 0) return sql;
+  // Generate all embeddings in parallel
+  const substitutions = await Promise.all(
+    matches.map(async ({ text, contextId }) => {
+      const context = contextId
+        ? contexts.find((c) => c.id === contextId)
+        : contexts.find((c) => c.embedder != null);
+      if (!context?.embedder) {
+        throw new Error(
+          `No embedder available${contextId ? ` for context "${contextId}"` : ""}. ` +
+            `Available contexts with embedders: [${contexts.filter((c) => c.embedder).map((c) => c.id).join(", ")}]`,
+        );
+      }
+      const result = await context.embedder.generateFromQuery(
+        context.id,
+        text,
+        undefined,
+        (user as any)?.id,
+        role,
+      );
+      const vector = result?.chunks?.[0]?.vector;
+      if (!vector?.length) {
+        throw new Error(`Embedder returned no vector for text: "${text}"`);
+      }
+      return `ARRAY[${vector.join(",")}]::vector`;
+    }),
+  );
+  // Replace in reverse order so indices stay valid
+  let result = sql;
+  for (let i = matches.length - 1; i >= 0; i--) {
+    const { fullMatch, index } = matches[i];
+    result = result.slice(0, index) + substitutions[i] + result.slice(index + fullMatch.length);
+  }
+  return result;
+}

package/ee/agentic-retrieval/v4/index.ts ADDED Viewed

@@ -0,0 +1,181 @@
+import * as os from "os";
+import * as path from "path";
+import * as fs from "fs/promises";
+import { z } from "zod";
+import { randomUUID } from "crypto";
+import type { LanguageModel } from "ai";
+import type { ExuluContext } from "@SRC/exulu/context";
+import type { ExuluReranker } from "@SRC/exulu/reranker";
+import { ExuluTool } from "@SRC/exulu/tool";
+import type { User } from "@EXULU_TYPES/models/user";
+import { checkLicense } from "@EE/entitlements";
+import { createTools } from "./tools";
+import { buildSystemPrompt } from "./system-prompt";
+import { runAgentLoop } from "./agent-loop";
+import type { AgenticRetrievalOutput } from "./types";
+async function* executeV4({
+  query,
+  contexts,
+  model,
+  user,
+  role,
+  customInstructions,
+}: {
+  query: string;
+  contexts: ExuluContext[];
+  model: LanguageModel;
+  user?: User;
+  role?: string;
+  customInstructions?: string;
+}): AsyncGenerator<AgenticRetrievalOutput> {
+  // Per-call temp directory — cleaned up after the loop finishes
+  const sessionId = randomUUID();
+  const sessionDir = path.join(os.tmpdir(), `exulu-v4-${sessionId}`);
+  console.log("[EXULU] v4 — starting observe-infer-act retrieval");
+  const tools = createTools({ contexts, user, role, sessionDir });
+  const systemPrompt = buildSystemPrompt(contexts, customInstructions);
+  let finalOutput: AgenticRetrievalOutput | undefined;
+  try {
+    for await (const output of runAgentLoop({
+      query,
+      systemPrompt,
+      tools,
+      model,
+    })) {
+      finalOutput = output;
+      yield output;
+    }
+  } finally {
+    // Best-effort cleanup of temp files
+    fs.rm(sessionDir, { recursive: true, force: true }).catch(() => {});
+  }
+  if (finalOutput) {
+    console.log(
+      `[EXULU] v4 — done. steps=${finalOutput.steps.length} chunks=${finalOutput.chunks.length} tokens=${finalOutput.totalTokens}`,
+    );
+  }
+}
+/**
+ * Creates the V4 ExuluTool for agentic context retrieval.
+ *
+ * V4 uses an observe-infer-act loop with two primitive tools:
+ * - execute_query: raw PostgreSQL SELECT via db.raw (with embed() helper for semantic search)
+ * - grep: iterative search on large result files
+ *
+ * Unlike V3, there is no upfront query classification or strategy routing.
+ * The agent writes its own SQL and decides when it has found enough information.
+ */
+export function createAgenticRetrievalToolV4({
+  contexts,
+  instructions: adminInstructions,
+  rerankers,
+  user,
+  role,
+  model,
+}: {
+  contexts: ExuluContext[];
+  rerankers: ExuluReranker[];
+  user?: User;
+  role?: string;
+  model?: LanguageModel;
+  instructions?: string;
+}): ExuluTool | undefined {
+  const license = checkLicense();
+  if (!license["agentic-retrieval"]) {
+    console.warn("[EXULU] Not licensed for agentic retrieval");
+    return undefined;
+  }
+  const contextNames = contexts.map((c) => c.id).join(", ");
+  return new ExuluTool({
+    id: "agentic_context_search_v4",
+    name: "Agentic Context Search (V4)",
+    description: `Observe-infer-act retrieval using raw SQL. Searches: ${contextNames}`,
+    category: "contexts",
+    needsApproval: false,
+    type: "context",
+    config: [
+      {
+        name: "instructions",
+        description: "Custom instructions for the retrieval agent",
+        type: "string",
+        default: "",
+      },
+      {
+        name: "reasoning_model",
+        description:
+          "Override the model used by the retrieval agent (default: inherits from calling agent)",
+        type: "string",
+        default: "",
+      },
+      ...contexts.map((ctx) => ({
+        name: ctx.id,
+        description: `Enable search in "${ctx.name}". ${ctx.description}`,
+        type: "boolean" as const,
+        default: true,
+      })),
+    ],
+    inputSchema: z.object({
+      query: z.string().describe("The question or query to answer"),
+      userInstructions: z
+        .string()
+        .optional()
+        .describe("Additional instructions from the user to guide retrieval"),
+    }),
+    execute: async function* ({
+      query,
+      userInstructions,
+      toolVariablesConfig,
+    }: {
+      query: string;
+      userInstructions?: string;
+      toolVariablesConfig?: Record<string, any>;
+    }) {
+      if (!model) {
+        throw new Error("Model is required for executing the agentic retrieval tool");
+      }
+      let activeContexts = contexts;
+      let configInstructions = "";
+      if (toolVariablesConfig) {
+        configInstructions = toolVariablesConfig["instructions"] ?? "";
+        activeContexts = contexts.filter(
+          (ctx) =>
+            toolVariablesConfig[ctx.id] === true ||
+            toolVariablesConfig[ctx.id] === "true" ||
+            toolVariablesConfig[ctx.id] === 1,
+        );
+        if (activeContexts.length === 0) activeContexts = contexts;
+      }
+      const combinedInstructions = [
+        configInstructions ? `Configuration instructions: ${configInstructions}` : "",
+        adminInstructions ? `Admin instructions: ${adminInstructions}` : "",
+        userInstructions ? `User instructions: ${userInstructions}` : "",
+      ]
+        .filter(Boolean)
+        .join("\n");
+      for await (const output of executeV4({
+        query,
+        contexts: activeContexts,
+        model,
+        user,
+        role,
+        customInstructions: combinedInstructions || undefined,
+      })) {
+        yield { result: JSON.stringify(output) };
+      }
+    },
+  });
+}

package/ee/agentic-retrieval/v4/system-prompt.ts ADDED Viewed

@@ -0,0 +1,248 @@
+import { getTableName, getChunksTableName, type ExuluContext } from "@SRC/exulu/context";
+/**
+ * Builds the system prompt for the V4 observe-infer-act retrieval agent.
+ *
+ * The prompt includes:
+ *  1. The observe-infer-act loop philosophy
+ *  2. The full database schema for every available context
+ *  3. Common SQL query patterns (keyword, semantic, hybrid, aggregation)
+ *  4. Instructions on when/how to use grep for large result sets
+ *  5. The standard column alias convention the agent should follow
+ */
+export function buildSystemPrompt(
+  contexts: ExuluContext[],
+  customInstructions?: string,
+): string {
+  const schemaBlock = buildSchemaBlock(contexts);
+  const hasEmbedder = contexts.some((c) => c.embedder != null);
+  return `\
+You are a knowledge base retrieval agent. Your job is to find all information relevant to the user's query.
+## Approach: Observe → Infer → Act
+Work iteratively:
+1. **Observe** — examine what data you have and what the query asks for
+2. **Infer** — decide what SQL query will best surface relevant information
+3. **Act** — execute the query and study the results
+4. Repeat until you have found sufficient information, then write your final answer.
+Do NOT guess or hallucinate. If results are empty, try alternative queries (different keywords,
+broader filters, semantic search). Exhaust the available search strategies before concluding
+that no relevant data exists.
+---
+## Database Schema
+${schemaBlock}
+---
+## Query Patterns
+### Keyword / Full-Text Search
+\`\`\`sql
+SELECT
+  c.id          AS chunk_id,
+  c.chunk_index,
+  c.content     AS chunk_content,
+  c.metadata,
+  c.source      AS item_id,
+  i.name        AS item_name,
+  '<context_id>' AS context
+FROM <context_id>_chunks c
+JOIN <context_id>_items i ON c.source = i.id
+WHERE c.fts @@ plainto_tsquery('english', 'your search terms')
+  AND (i.archived IS FALSE OR i.archived IS NULL)
+ORDER BY ts_rank(c.fts, plainto_tsquery('english', 'your search terms')) DESC
+LIMIT 20;
+\`\`\`
+For German text use \`'german'\` instead of \`'english'\`.
+For multi-language, use \`websearch_to_tsquery\` or UNION both languages.
+${
+  hasEmbedder
+    ? `
+### Semantic Search (use embed() helper)
+\`\`\`sql
+SELECT
+  c.id          AS chunk_id,
+  c.chunk_index,
+  c.content     AS chunk_content,
+  c.metadata,
+  c.source      AS item_id,
+  i.name        AS item_name,
+  '<context_id>' AS context,
+  c.embedding <=> embed('your concept here') AS distance
+FROM <context_id>_chunks c
+JOIN <context_id>_items i ON c.source = i.id
+WHERE (i.archived IS FALSE OR i.archived IS NULL)
+ORDER BY distance ASC
+LIMIT 20;
+\`\`\`
+### Hybrid Search (keyword + semantic combined via RRF)
+\`\`\`sql
+WITH fts AS (
+  SELECT id, ROW_NUMBER() OVER (ORDER BY ts_rank(fts, q) DESC) AS rank
+  FROM <context_id>_chunks, plainto_tsquery('english', 'your query') q
+  WHERE fts @@ q
+  LIMIT 500
+),
+sem AS (
+  SELECT id, ROW_NUMBER() OVER (ORDER BY embedding <=> embed('your query') ASC) AS rank
+  FROM <context_id>_chunks
+  LIMIT 500
+),
+rrf AS (
+  SELECT
+    COALESCE(fts.id, sem.id) AS id,
+    (COALESCE(1.0 / (50 + fts.rank), 0) * 2 + COALESCE(1.0 / (50 + sem.rank), 0)) AS score
+  FROM fts FULL OUTER JOIN sem ON fts.id = sem.id
+)
+SELECT
+  c.id          AS chunk_id,
+  c.chunk_index,
+  c.content     AS chunk_content,
+  c.metadata,
+  c.source      AS item_id,
+  i.name        AS item_name,
+  '<context_id>' AS context,
+  rrf.score
+FROM rrf
+JOIN <context_id>_chunks c ON c.id = rrf.id
+JOIN <context_id>_items i ON c.source = i.id
+WHERE (i.archived IS FALSE OR i.archived IS NULL)
+ORDER BY rrf.score DESC
+LIMIT 20;
+\`\`\`
+`
+    : `
+Note: No embedder is configured for these contexts. Use keyword/full-text search only.
+`
+}
+### Browse all chunks of a specific document (in order)
+\`\`\`sql
+SELECT
+  c.id          AS chunk_id,
+  c.chunk_index,
+  c.content     AS chunk_content,
+  c.metadata,
+  c.source      AS item_id,
+  i.name        AS item_name,
+  '<context_id>' AS context
+FROM <context_id>_chunks c
+JOIN <context_id>_items i ON c.source = i.id
+WHERE c.source = '<item_id>'
+ORDER BY c.chunk_index;
+\`\`\`
+### Count / aggregate
+\`\`\`sql
+SELECT COUNT(*) FROM <context_id>_items WHERE archived IS FALSE;
+SELECT COUNT(*) FROM <context_id>_chunks;
+\`\`\`
+### Explore item names (when query is about a specific document)
+\`\`\`sql
+SELECT id, name, external_id, "createdAt"
+FROM <context_id>_items
+WHERE (archived IS FALSE OR archived IS NULL)
+  AND LOWER(name) LIKE '%keyword%'
+LIMIT 50;
+\`\`\`
+### Filter by custom metadata on chunks
+\`\`\`sql
+SELECT chunk_id, chunk_content, item_name, context
+FROM ...
+WHERE c.metadata->>'page' = '5'
+   OR c.metadata @> '{"category": "finance"}'
+\`\`\`
+---
+## Column Alias Convention
+**Always use these aliases** in queries that return chunks so results are collected correctly:
+| Alias          | Source column           |
+|----------------|-------------------------|
+| \`chunk_id\`     | \`c.id\`                  |
+| \`chunk_index\`  | \`c.chunk_index\`         |
+| \`chunk_content\`| \`c.content\`             |
+| \`item_id\`      | \`c.source\`              |
+| \`item_name\`    | \`i.name\`                |
+| \`context\`      | literal context id string |
+| \`metadata\`     | \`c.metadata\`            |
+---
+## Handling Large Results
+When execute_query returns a file path (results > 20k chars):
+1. Use \`grep\` with a specific pattern to find relevant sections
+2. Multiple grep calls are fine — narrow down iteratively
+3. Once you know specific \`item_id\` or \`chunk_id\` values, run a targeted SELECT to get full content
+---
+## Search Strategy
+- **Start broad**: use keyword or hybrid search with your main terms, LIMIT 30–50
+- **Go deeper**: if results are sparse, try alternative phrasings, synonyms, or semantic search
+- **Drill into documents**: once you find a relevant item, fetch its chunks in order to get full context
+- **Cross-context**: search multiple contexts when the query could span knowledge bases
+- **Aggregate last**: use COUNT queries only for "how many" questions
+---
+${customInstructions ? `## Additional Instructions\n\n${customInstructions}\n\n---\n` : ""}
+When you have gathered sufficient information, write a clear answer. Do not call any more tools once you have what you need.`;
+}
+function buildSchemaBlock(contexts: ExuluContext[]): string {
+  return contexts
+    .map((ctx) => {
+      const itemsTable = getTableName(ctx.id);
+      const chunksTable = getChunksTableName(ctx.id);
+      const customFields =
+        ctx.fields.length > 0
+          ? ctx.fields.map((f) => `  ${f.name} (${f.type})`).join("\n")
+          : "  (no custom fields)";
+      const embedderNote = ctx.embedder
+        ? `Embedder: ${ctx.embedder.name} — semantic search and embed() are available`
+        : "No embedder — use keyword search only";
+      return `### Context: "${ctx.name}" (id: \`${ctx.id}\`)
+${ctx.description || ""}
+${embedderNote}
+**${itemsTable}** — documents / items
+  id           (uuid, primary key)
+  name         (text)
+  external_id  (text, nullable)
+  archived     (boolean, nullable)
+  created_by   (integer, nullable)
+  rights_mode  (text, nullable)
+  "createdAt"  (timestamp)
+  "updatedAt"  (timestamp)
+  -- Custom fields:
+${customFields}
+**${chunksTable}** — text chunks (source FK → ${itemsTable}.id)
+  id           (uuid, primary key)
+  source       (uuid, FK → ${itemsTable}.id)
+  content      (text)
+  chunk_index  (integer)
+  fts          (tsvector — full-text search index)
+  embedding    (vector — pgvector, nullable)
+  metadata     (jsonb, nullable)
+  "createdAt"  (timestamp)
+  "updatedAt"  (timestamp)`;
+    })
+    .join("\n\n");
+}