@ema.co/mcp-toolkit 2026.1.26-4 → 2026.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @ema.co/mcp-toolkit might be problematic. Click here for more details.

@@ -1,11 +1,27 @@
1
1
  /**
2
- * Auto Builder Knowledge Base
2
+ * Ema Platform Knowledge Base
3
3
  *
4
- * Structured knowledge about Ema Auto Builder and AI Employee platform for MCP exposure.
5
- * Sources:
6
- * - Ema User Guide documentation
7
- * - Platform best practices
8
- * - Validated patterns from production workflows
4
+ * This file contains TWO types of content:
5
+ *
6
+ * 1. AUTO-GENERATED (API Fallbacks)
7
+ * - AGENT_CATALOG, WIDGET_CATALOG, TYPE_COMPATIBILITY
8
+ * - Generated from source repos via scheduled workflows
9
+ * - Used as fallbacks when API unavailable
10
+ * - TODO: Move to src/sdk/generated/ and import here
11
+ *
12
+ * 2. CURATED KNOWLEDGE (Human-Maintained)
13
+ * - PLATFORM_CONCEPTS, WORKFLOW_PATTERNS, GUIDANCE_TOPICS, etc.
14
+ * - Source repos are INPUT, not law
15
+ * - Optimized for customer-facing MCP users
16
+ * - Updated via knowledge scanning workflow + human review
17
+ *
18
+ * Input Sources (for curated content):
19
+ * - ema-repos/ema/docs/ - Platform documentation
20
+ * - ema-repos/ema/workflow_actions/documentation/ - Agent docs
21
+ * - ema-repos/protos/ - Type definitions
22
+ * - Support cases and real-world usage patterns
23
+ *
24
+ * See: .ctx/docs/source-repos.md for full architecture
9
25
  */
10
26
  // ─────────────────────────────────────────────────────────────────────────────
11
27
  // Platform Concepts (from Ema User Guide)
@@ -718,19 +734,66 @@ export const AGENT_CATALOG = [
718
734
  // ─────────────────────────────────────────────────────────────────────────────
719
735
  // Widget Reference
720
736
  // ─────────────────────────────────────────────────────────────────────────────
737
+ // ═══════════════════════════════════════════════════════════════════════════════════
738
+ // WIDGET_CATALOG (NO API ALTERNATIVE - Intentionally Static)
739
+ // ═══════════════════════════════════════════════════════════════════════════════════
740
+ //
741
+ // WHY STATIC: There is no API endpoint to list widget definitions. Widget metadata
742
+ // lives in persona template YAML files and is not exposed via the API.
743
+ //
744
+ // RUNTIME ALTERNATIVE: For a specific persona, fetch widgets dynamically via:
745
+ // persona(id="...", include_workflow=true) → proto_config.widgets[].name
746
+ //
747
+ // SOURCE: ema-repos/ema/ema_backend/db/system_values/persona_templates/prod/*.yaml
748
+ // SYNC: Manual - run knowledge scan workflow to detect template changes
749
+ //
750
+ // TECHNICAL NOTES:
751
+ // - Widget 'id' is the WidgetType enum value from protos
752
+ // - Widget 'name' is assigned per-template (e.g., "upload", "upload1", "upload2")
753
+ // - Proto 'case' values represent TYPE discriminators, not widget names
754
+ //
755
+ // See .ctx/docs/source-repos.md for full sync instructions
756
+ // ═══════════════════════════════════════════════════════════════════════════════════
721
757
  export const WIDGET_CATALOG = [
758
+ // ═══════════════════════════════════════════════════════════════════════════
722
759
  // Voice AI Widgets
723
- { id: 38, name: "voiceSettings", description: "Language hints, voice model selection", requiredFor: ["voice"], fields: ["languageHints", "voiceModel"] },
724
- { id: 39, name: "conversationSettings", description: "Identity, purpose, action instructions, hangup rules - core persona configuration", requiredFor: ["voice"], fields: ["welcomeMessage", "identityAndPurpose", "takeActionInstructions", "hangupInstructions", "transferCallInstructions", "speechCharacteristics", "systemPrompt", "formFillingInstructions", "waitMessage"] },
760
+ // ═══════════════════════════════════════════════════════════════════════════
761
+ // SOURCE: voicebot_ai_employee.yaml
762
+ // LAST VERIFIED: 2026-01-27
763
+ { id: 38, name: "voiceSettings", description: "Language hints, voice model selection (title: 'Voice and language')", requiredFor: ["voice"], fields: ["languageHints", "voiceModel"] },
764
+ { id: 39, name: "conversationSettings", description: "Identity, purpose, action instructions, hangup rules (title: 'Conversational behavior')", requiredFor: ["voice"], fields: ["welcomeMessage", "identityAndPurpose", "takeActionInstructions", "hangupInstructions", "transferCallInstructions", "speechCharacteristics", "systemPrompt", "formFillingInstructions", "waitMessage"] },
725
765
  { id: 43, name: "vadSettings", description: "Voice activity detection settings", requiredFor: ["voice"], fields: ["turnTimeout", "silenceEndCallTimeout", "maxConversationDuration"] },
726
766
  { id: 42, name: "dataStorageSettings", description: "Audio/transcript recording settings", requiredFor: ["voice"], fields: ["storeAudioRecording", "storeTranscripts", "storeAgentTranscript"] },
767
+ { id: 41, name: "callSettings", description: "Call forwarding, spam prevention settings", requiredFor: ["voice"], fields: ["enableCallForwarding", "callForwardingNumber", "enableSpamCallPrevention"] },
768
+ { id: 44, name: "voicebotPhoneNumber", description: "Phone number configuration (title: 'Phone numbers')", requiredFor: ["voice"], fields: ["phoneNumber"] },
769
+ { id: 40, name: "voicebotFeedbackCollection", description: "Post-call feedback collection (title: 'Feedback collection')", requiredFor: ["voice"], fields: [] },
770
+ // ═══════════════════════════════════════════════════════════════════════════
727
771
  // Chat AI Widgets
772
+ // ═══════════════════════════════════════════════════════════════════════════
773
+ // SOURCE: TBD - need to verify from chat template
728
774
  { id: 28, name: "chatbotSdkConfig", description: "Chat widget configuration and theming", requiredFor: ["chat"], fields: ["theme", "position", "initialMessage"] },
729
775
  { id: 33, name: "feedbackMessage", description: "Feedback collection settings", requiredFor: ["chat"], fields: ["enabled", "prompt"] },
776
+ // ═══════════════════════════════════════════════════════════════════════════
777
+ // Document Generation Widgets
778
+ // ═══════════════════════════════════════════════════════════════════════════
779
+ // SOURCE: ema-repos/ema/ema_backend/db/system_values/persona_templates/prod/document_proposal_manager.yaml
780
+ // NOTE: Widget names are defined per-template. These are from Document Proposal Manager.
781
+ // To sync: see .ctx/docs/source-repos.md
782
+ // LAST VERIFIED: 2026-01-27 from document_proposal_manager.yaml
783
+ { id: 3, name: "upload", description: "Content Repository - gold standard company docs (title: 'Content Repository')", requiredFor: ["document"], fields: ["tags"], uploadTarget: true },
784
+ { id: 3, name: "upload1", description: "Service Line Documents - business unit specific content (title: 'Service Line Documents')", requiredFor: ["document"], fields: ["tags"], uploadTarget: true },
785
+ { id: 3, name: "upload2", description: "Style Guide - formatting and tone guide (title: 'Style Guide')", requiredFor: ["document"], fields: ["tags"], uploadTarget: true },
786
+ { id: 29, name: "fileTagging0", description: "File tagging configuration (title: 'Set Tags')", requiredFor: ["document"], fields: ["tagTypes", "fileTagMappings"] },
787
+ { id: 5, name: "answerFormat0", description: "Proposal instructions - format, tone, language (title: 'Proposal Instructions')", requiredFor: ["document"], fields: ["textValue"] },
788
+ { id: 16, name: "sectionConfigWidget", description: "Section categories with instructions (title: 'Section Categories')", requiredFor: ["document"], fields: ["sections"] },
789
+ // ═══════════════════════════════════════════════════════════════════════════
730
790
  // Common Widgets (all types)
731
- { id: 3, name: "fileUpload", description: "Document upload configuration", requiredFor: ["voice", "chat", "dashboard"], fields: ["allowedTypes", "maxSize"] },
732
- { id: 6, name: "fusionModel", description: "EmaFusion model selection (GPT-4, Claude, etc.)", requiredFor: ["voice", "chat", "dashboard"], fields: ["allModels", "selectedModels"] },
733
- { id: 8, name: "dataProtection", description: "PII redaction settings", requiredFor: ["voice", "chat", "dashboard"], fields: ["protectedClasses"] },
791
+ // ═══════════════════════════════════════════════════════════════════════════
792
+ // These appear across multiple template types
793
+ { id: 3, name: "fileUpload", description: "Default document upload (KB files, title: 'Data sources')", requiredFor: ["voice", "chat", "dashboard"], fields: ["allowedTypes", "maxSize"], uploadTarget: true },
794
+ { id: 6, name: "fusionModel", description: "EmaFusion model selection (GPT-4, Claude, etc., title: 'EmaFusion™ model')", requiredFor: ["voice", "chat", "dashboard", "document"], fields: ["allModels", "selectedModels"] },
795
+ { id: 8, name: "dataProtection", description: "PII redaction settings", requiredFor: ["voice", "chat", "dashboard", "document"], fields: ["protectedClasses"] },
796
+ { id: 9, name: "copyrightCheck", description: "Copyright infringement checker (title: 'Copyright Checker')", requiredFor: ["document"], fields: [] },
734
797
  ];
735
798
  // Project type mapping
736
799
  export const PROJECT_TYPES = {
@@ -740,7 +803,18 @@ export const PROJECT_TYPES = {
740
803
  document: 3,
741
804
  };
742
805
  // ─────────────────────────────────────────────────────────────────────────────
743
- // Type Compatibility
806
+ // Type Compatibility (CANONICAL SOURCE)
807
+ // ─────────────────────────────────────────────────────────────────────────────
808
+ // This is the canonical, user-facing documentation of type compatibility.
809
+ //
810
+ // SOURCE: ema-repos/protos/service/workflows/v1/well_known.proto
811
+ // SYNC: TODO - Add to catalog-sync.yml workflow for automatic updates
812
+ //
813
+ // Other type compatibility definitions serve different purposes:
814
+ // - INTENT_TYPE_ROUTING (workflow-intent.ts) - routing logic during intent processing
815
+ // - SCHEMA_TYPE_COMPATIBILITY (action-schema-parser.ts) - input name matching for validation
816
+ //
817
+ // See .ctx/docs/source-repos.md for full architecture
744
818
  // ─────────────────────────────────────────────────────────────────────────────
745
819
  export const TYPE_COMPATIBILITY = [
746
820
  // Chat conversation compatibility
@@ -1461,6 +1535,13 @@ export function getAgentByName(actionName) {
1461
1535
  export function getWidgetsForPersonaType(type) {
1462
1536
  return WIDGET_CATALOG.filter(w => w.requiredFor.includes(type));
1463
1537
  }
1538
+ /**
1539
+ * Get widgets that accept file uploads for a persona type.
1540
+ * Use these widget names with data(method="upload", widget_name="...").
1541
+ */
1542
+ export function getUploadWidgetsForPersonaType(type) {
1543
+ return WIDGET_CATALOG.filter(w => w.requiredFor.includes(type) && w.uploadTarget);
1544
+ }
1464
1545
  export function checkTypeCompatibility(sourceType, targetType) {
1465
1546
  return TYPE_COMPATIBILITY.find(t => t.sourceType === sourceType && t.targetType === targetType);
1466
1547
  }
@@ -1702,570 +1783,6 @@ export function parseWorkflowDef(workflowDef) {
1702
1783
  }
1703
1784
  return nodes;
1704
1785
  }
1705
- /**
1706
- * Build an adjacency list from workflow nodes
1707
- */
1708
- function buildAdjacencyList(nodes) {
1709
- const adj = new Map();
1710
- // Initialize all nodes
1711
- for (const node of nodes) {
1712
- if (!adj.has(node.id)) {
1713
- adj.set(node.id, new Set());
1714
- }
1715
- }
1716
- // Build edges from incoming_edges (reverse direction for adjacency)
1717
- for (const node of nodes) {
1718
- if (node.incoming_edges) {
1719
- for (const edge of node.incoming_edges) {
1720
- const sourceId = edge.source_node_id;
1721
- if (!adj.has(sourceId)) {
1722
- adj.set(sourceId, new Set());
1723
- }
1724
- adj.get(sourceId).add(node.id);
1725
- }
1726
- }
1727
- }
1728
- return adj;
1729
- }
1730
- /**
1731
- * Detect cycles in the workflow graph using DFS
1732
- */
1733
- function detectCycles(nodes) {
1734
- const issues = [];
1735
- const adj = buildAdjacencyList(nodes);
1736
- const visited = new Set();
1737
- const recStack = new Set();
1738
- const cycleNodes = [];
1739
- function dfs(nodeId, path) {
1740
- visited.add(nodeId);
1741
- recStack.add(nodeId);
1742
- const neighbors = adj.get(nodeId) ?? new Set();
1743
- for (const neighbor of neighbors) {
1744
- if (!visited.has(neighbor)) {
1745
- if (dfs(neighbor, [...path, nodeId])) {
1746
- return true;
1747
- }
1748
- }
1749
- else if (recStack.has(neighbor)) {
1750
- // Found cycle
1751
- const cycleStart = path.indexOf(neighbor);
1752
- if (cycleStart >= 0) {
1753
- cycleNodes.push(...path.slice(cycleStart), nodeId);
1754
- }
1755
- else {
1756
- cycleNodes.push(neighbor, nodeId);
1757
- }
1758
- return true;
1759
- }
1760
- }
1761
- recStack.delete(nodeId);
1762
- return false;
1763
- }
1764
- for (const node of nodes) {
1765
- if (!visited.has(node.id)) {
1766
- if (dfs(node.id, [])) {
1767
- issues.push({
1768
- type: "cycle",
1769
- severity: "critical",
1770
- nodes: [...new Set(cycleNodes)],
1771
- auto_fixable: false, // Cycles require manual restructuring
1772
- reason: `Circular dependency detected: ${cycleNodes.join(" → ")}`,
1773
- });
1774
- break; // One cycle is enough to report
1775
- }
1776
- }
1777
- }
1778
- return issues;
1779
- }
1780
- /**
1781
- * Find nodes that consume outputs from a given node (have incoming_edges from it)
1782
- */
1783
- function findNodeConsumers(nodeId, nodes) {
1784
- const consumers = [];
1785
- for (const n of nodes) {
1786
- if (n.incoming_edges) {
1787
- for (const edge of n.incoming_edges) {
1788
- if (edge.source_node_id === nodeId) {
1789
- consumers.push(n.id);
1790
- break; // Only count each consumer once
1791
- }
1792
- }
1793
- }
1794
- }
1795
- return consumers;
1796
- }
1797
- /**
1798
- * Detect orphan nodes (not reachable from trigger)
1799
- * Also tracks dependent nodes for cascading fix
1800
- */
1801
- function detectOrphanNodes(nodes) {
1802
- const issues = [];
1803
- const adj = buildAdjacencyList(nodes);
1804
- // Find trigger node
1805
- const triggerNode = nodes.find(n => n.action_name?.includes("trigger") ||
1806
- n.id === "trigger" ||
1807
- (n.id && n.id.includes("trigger")));
1808
- if (!triggerNode) {
1809
- return []; // Can't detect orphans without trigger
1810
- }
1811
- // BFS from trigger
1812
- const reachable = new Set();
1813
- const queue = [triggerNode.id];
1814
- reachable.add(triggerNode.id);
1815
- while (queue.length > 0) {
1816
- const current = queue.shift();
1817
- const neighbors = adj.get(current) ?? new Set();
1818
- for (const neighbor of neighbors) {
1819
- if (!reachable.has(neighbor)) {
1820
- reachable.add(neighbor);
1821
- queue.push(neighbor);
1822
- }
1823
- }
1824
- }
1825
- // Find unreachable nodes and their consumers
1826
- for (const node of nodes) {
1827
- if (!reachable.has(node.id) && node.id !== "WORKFLOW_OUTPUT") {
1828
- const consumers = findNodeConsumers(node.id, nodes);
1829
- // Filter to only reachable consumers (we'll fix their dangling refs)
1830
- const reachableConsumers = consumers.filter(c => reachable.has(c));
1831
- issues.push({
1832
- type: "orphan",
1833
- severity: "warning",
1834
- node: node.id,
1835
- dependent_nodes: reachableConsumers.length > 0 ? reachableConsumers : undefined,
1836
- auto_fixable: true, // Always fixable - we remove node AND clean up dangling refs
1837
- reason: reachableConsumers.length > 0
1838
- ? `Node "${node.id}" is not reachable from trigger. Removing will also clean dangling refs in: ${reachableConsumers.join(", ")}`
1839
- : `Node "${node.id}" is not reachable from trigger`,
1840
- });
1841
- }
1842
- }
1843
- return issues;
1844
- }
1845
- /**
1846
- * Detect dead-end nodes (nodes that don't lead to WORKFLOW_OUTPUT)
1847
- */
1848
- function detectDeadEnds(nodes) {
1849
- const issues = [];
1850
- // Build reverse adjacency (target -> sources)
1851
- const reverseAdj = new Map();
1852
- for (const node of nodes) {
1853
- if (!reverseAdj.has(node.id)) {
1854
- reverseAdj.set(node.id, new Set());
1855
- }
1856
- if (node.incoming_edges) {
1857
- for (const edge of node.incoming_edges) {
1858
- if (!reverseAdj.has(edge.source_node_id)) {
1859
- reverseAdj.set(edge.source_node_id, new Set());
1860
- }
1861
- reverseAdj.get(node.id).add(edge.source_node_id);
1862
- }
1863
- }
1864
- }
1865
- // Find WORKFLOW_OUTPUT or nodes that connect to it
1866
- const outputNode = nodes.find(n => n.id === "WORKFLOW_OUTPUT" ||
1867
- n.action_name === "WorkflowOutputSink");
1868
- if (!outputNode) {
1869
- issues.push({
1870
- type: "missing_workflow_output",
1871
- severity: "critical",
1872
- auto_fixable: true, // Can add results mapping for Voice AI
1873
- reason: "No WORKFLOW_OUTPUT node found - workflow responses won't reach user",
1874
- });
1875
- return issues;
1876
- }
1877
- // BFS backwards from WORKFLOW_OUTPUT to find all nodes that can reach it
1878
- const canReachOutput = new Set();
1879
- const queue = [outputNode.id];
1880
- canReachOutput.add(outputNode.id);
1881
- // Also add nodes that have edges TO WORKFLOW_OUTPUT
1882
- for (const node of nodes) {
1883
- if (node.incoming_edges) {
1884
- for (const edge of node.incoming_edges) {
1885
- if (node.id === outputNode.id) {
1886
- canReachOutput.add(edge.source_node_id);
1887
- queue.push(edge.source_node_id);
1888
- }
1889
- }
1890
- }
1891
- }
1892
- // Continue BFS
1893
- while (queue.length > 0) {
1894
- const current = queue.shift();
1895
- const sources = reverseAdj.get(current) ?? new Set();
1896
- for (const source of sources) {
1897
- if (!canReachOutput.has(source)) {
1898
- canReachOutput.add(source);
1899
- queue.push(source);
1900
- }
1901
- }
1902
- }
1903
- // Find leaf nodes (no outgoing edges) that can't reach output
1904
- const adj = buildAdjacencyList(nodes);
1905
- for (const node of nodes) {
1906
- const neighbors = adj.get(node.id) ?? new Set();
1907
- if (neighbors.size === 0 &&
1908
- node.id !== outputNode.id &&
1909
- !node.id?.includes("trigger") &&
1910
- !canReachOutput.has(node.id)) {
1911
- issues.push({
1912
- type: "dead_end",
1913
- severity: "critical",
1914
- node: node.id,
1915
- missing: "WORKFLOW_OUTPUT connection",
1916
- auto_fixable: true, // Can add results mapping or edge
1917
- reason: `Node "${node.id}" doesn't lead to WORKFLOW_OUTPUT - responses won't reach user`,
1918
- });
1919
- }
1920
- }
1921
- return issues;
1922
- }
1923
- /**
1924
- * Detect nodes whose outputs are not consumed by any other node
1925
- * This catches anti-patterns like "combine_search_results but output not used"
1926
- */
1927
- function detectUnusedOutputs(nodes, workflowDef) {
1928
- const issues = [];
1929
- // Build set of all consumed node outputs
1930
- const consumedOutputs = new Set(); // Format: "nodeId.outputName"
1931
- for (const node of nodes) {
1932
- if (node.incoming_edges) {
1933
- for (const edge of node.incoming_edges) {
1934
- consumedOutputs.add(`${edge.source_node_id}.${edge.source_output}`);
1935
- // Also mark node as having at least one output consumed
1936
- consumedOutputs.add(`${edge.source_node_id}.*`);
1937
- }
1938
- }
1939
- }
1940
- // Check results mapping - outputs that go to WORKFLOW_OUTPUT are consumed
1941
- const def = workflowDef;
1942
- const results = def?.results;
1943
- if (results) {
1944
- for (const [, result] of Object.entries(results)) {
1945
- if (result?.actionName) {
1946
- consumedOutputs.add(`${result.actionName}.${result.outputName ?? "*"}`);
1947
- consumedOutputs.add(`${result.actionName}.*`);
1948
- }
1949
- }
1950
- }
1951
- // Nodes that produce output but should have downstream consumers
1952
- // Especially combiners, generators, and transformers
1953
- const NODES_THAT_PRODUCE_OUTPUT = [
1954
- "combine_search_results",
1955
- "personalized_content_generator",
1956
- "generate_document",
1957
- "json_mapper",
1958
- "entity_extraction",
1959
- "entity_extraction_with_documents",
1960
- "conversation_summarizer",
1961
- "custom_agent",
1962
- "creative_ideation_agent",
1963
- "response_validator",
1964
- ];
1965
- for (const node of nodes) {
1966
- const actionName = node.action_name?.toLowerCase()?.replace(/_/g, "") ?? "";
1967
- const nodeId = node.id ?? "";
1968
- const nodeIdNormalized = nodeId.toLowerCase().replace(/_/g, "");
1969
- // Skip trigger and output nodes
1970
- if (nodeId.includes("trigger") || nodeId === "WORKFLOW_OUTPUT") {
1971
- continue;
1972
- }
1973
- // Check if this is a node that should have its output consumed
1974
- // Normalize both sides by removing underscores for comparison
1975
- const shouldHaveOutputConsumed = NODES_THAT_PRODUCE_OUTPUT.some(name => {
1976
- const normalized = name.replace(/_/g, "");
1977
- return actionName.includes(normalized) || nodeIdNormalized.includes(normalized);
1978
- });
1979
- if (shouldHaveOutputConsumed) {
1980
- // Check if any output from this node is consumed
1981
- const isConsumed = consumedOutputs.has(`${nodeId}.*`);
1982
- if (!isConsumed) {
1983
- // Determine what the expected output would be
1984
- let expectedOutput = "output";
1985
- if (actionName.includes("combine")) {
1986
- expectedOutput = "combined_results";
1987
- }
1988
- else if (actionName.includes("generate_document") || nodeId.toLowerCase().includes("generate_document")) {
1989
- expectedOutput = "document_link";
1990
- }
1991
- else if (actionName.includes("entity_extraction") || nodeId.toLowerCase().includes("entity")) {
1992
- expectedOutput = "extracted_entities";
1993
- }
1994
- else if (actionName.includes("json_mapper") || nodeId.toLowerCase().includes("mapper")) {
1995
- expectedOutput = "mapped_output";
1996
- }
1997
- else if (actionName.includes("content_generator") || nodeId.toLowerCase().includes("content")) {
1998
- expectedOutput = "generated_content";
1999
- }
2000
- issues.push({
2001
- type: "unused_output",
2002
- severity: "warning",
2003
- node: nodeId,
2004
- current: expectedOutput,
2005
- auto_fixable: false, // Requires understanding intent to wire correctly
2006
- reason: `Node "${nodeId}" produces output "${expectedOutput}" but it's not consumed by any downstream node. This node is doing work that goes nowhere.`,
2007
- });
2008
- }
2009
- }
2010
- }
2011
- return issues;
2012
- }
2013
- /**
2014
- * Detect categorizer issues (missing fallback, missing category edges)
2015
- */
2016
- function detectCategorizerIssues(nodes) {
2017
- const issues = [];
2018
- const categorizers = nodes.filter(n => n.action_name?.includes("categorizer") ||
2019
- n.id?.includes("categorizer") ||
2020
- n.id?.includes("classifier"));
2021
- for (const categorizer of categorizers) {
2022
- // Check if there are any outgoing edges (category routes)
2023
- const adj = buildAdjacencyList(nodes);
2024
- const outgoing = adj.get(categorizer.id) ?? new Set();
2025
- if (outgoing.size === 0) {
2026
- issues.push({
2027
- type: "missing_category_edge",
2028
- severity: "critical",
2029
- node: categorizer.id,
2030
- auto_fixable: false, // Requires defining categories and handlers
2031
- reason: `Categorizer "${categorizer.id}" has no outgoing category edges - routing won't work`,
2032
- });
2033
- }
2034
- // Check for Fallback - multiple ways to configure:
2035
- // 1. default_category input set to a fallback-like value
2036
- // 2. Outgoing edges with "fallback" in the source_output
2037
- // 3. Category handlers wired to fallback output
2038
- let hasFallback = false;
2039
- // Check default_category input (most common way to set fallback)
2040
- const params = categorizer.parameters;
2041
- if (params?.default_category) {
2042
- const defaultCat = params.default_category;
2043
- const defaultValue = defaultCat?.inline?.enumValue?.toLowerCase() || "";
2044
- if (defaultValue && ["fallback", "other", "general", "unknown", "default"].includes(defaultValue)) {
2045
- hasFallback = true;
2046
- }
2047
- }
2048
- // Also check for edges with fallback in the name
2049
- if (!hasFallback) {
2050
- for (const node of nodes) {
2051
- if (node.incoming_edges) {
2052
- for (const edge of node.incoming_edges) {
2053
- if (edge.source_node_id === categorizer.id &&
2054
- (edge.source_output?.toLowerCase().includes("fallback"))) {
2055
- hasFallback = true;
2056
- break;
2057
- }
2058
- }
2059
- }
2060
- if (hasFallback)
2061
- break;
2062
- }
2063
- }
2064
- if (!hasFallback && outgoing.size > 0) {
2065
- issues.push({
2066
- type: "missing_fallback",
2067
- severity: "critical",
2068
- node: categorizer.id,
2069
- auto_fixable: true, // Can add Fallback category to enumType
2070
- reason: `Categorizer "${categorizer.id}" appears to be missing a Fallback category`,
2071
- });
2072
- }
2073
- }
2074
- return issues;
2075
- }
2076
- /**
2077
- * Detect HITL issues (missing success or failure paths)
2078
- */
2079
- function detectHitlIssues(nodes) {
2080
- const issues = [];
2081
- const hitlNodes = nodes.filter(n => n.action_name?.includes("hitl") ||
2082
- n.action_name?.includes("human_collaboration") ||
2083
- n.id?.includes("hitl") ||
2084
- n.id?.includes("approval"));
2085
- for (const hitl of hitlNodes) {
2086
- let hasSuccess = false;
2087
- let hasFailure = false;
2088
- // Look for edges from this HITL node (direct input connections)
2089
- for (const node of nodes) {
2090
- if (node.incoming_edges) {
2091
- for (const edge of node.incoming_edges) {
2092
- if (edge.source_node_id === hitl.id) {
2093
- const output = edge.source_output?.toLowerCase() ?? "";
2094
- if (output.includes("success")) {
2095
- hasSuccess = true;
2096
- }
2097
- if (output.includes("fail")) {
2098
- hasFailure = true;
2099
- }
2100
- }
2101
- }
2102
- }
2103
- // Also check runIf conditions (conditional execution pattern)
2104
- // This handles workflows that use runIf instead of direct edges
2105
- // Pattern: runIf.lhs.actionOutput.actionName === hitl.id && runIf.rhs.enumValue contains "Success"/"Failure"
2106
- if (node.runIf && typeof node.runIf === "object") {
2107
- const runIf = node.runIf;
2108
- const lhs = runIf.lhs;
2109
- const rhs = runIf.rhs;
2110
- if (lhs?.actionOutput) {
2111
- const actionOutput = lhs.actionOutput;
2112
- if (actionOutput.actionName === hitl.id) {
2113
- // This node has a runIf condition referencing the HITL node
2114
- const rhsValue = rhs?.inline?.enumValue ?? "";
2115
- const rhsLower = rhsValue.toLowerCase();
2116
- if (rhsLower.includes("success")) {
2117
- hasSuccess = true;
2118
- }
2119
- if (rhsLower.includes("fail")) {
2120
- hasFailure = true;
2121
- }
2122
- }
2123
- }
2124
- }
2125
- }
2126
- if (!hasSuccess && !hasFailure) {
2127
- issues.push({
2128
- type: "incomplete_hitl",
2129
- severity: "critical",
2130
- node: hitl.id,
2131
- missing: "both success and failure paths",
2132
- auto_fixable: false, // Requires defining response handlers
2133
- reason: `HITL node "${hitl.id}" missing both success AND failure paths`,
2134
- });
2135
- }
2136
- else if (!hasSuccess) {
2137
- issues.push({
2138
- type: "incomplete_hitl",
2139
- severity: "critical",
2140
- node: hitl.id,
2141
- missing: "success_path",
2142
- auto_fixable: false, // Requires defining success response
2143
- reason: `HITL node "${hitl.id}" missing success path`,
2144
- });
2145
- }
2146
- else if (!hasFailure) {
2147
- issues.push({
2148
- type: "incomplete_hitl",
2149
- severity: "critical",
2150
- node: hitl.id,
2151
- missing: "failure_path",
2152
- auto_fixable: false, // Requires defining failure response
2153
- reason: `HITL node "${hitl.id}" missing failure path`,
2154
- });
2155
- }
2156
- }
2157
- return issues;
2158
- }
2159
- /**
2160
- * Detect wrong input source issues (e.g., user_query for categorizer instead of chat_conversation)
2161
- */
2162
- function detectWrongInputSource(nodes) {
2163
- const issues = [];
2164
- for (const node of nodes) {
2165
- const actionName = node.action_name ?? node.id;
2166
- // Use the shared validation rules (single source of truth)
2167
- const rule = findInputSourceRule(actionName);
2168
- if (!rule)
2169
- continue;
2170
- // For send_email nodes, the rule only applies to email_to/recipient inputs
2171
- // Other inputs (subject, body) can legitimately use LLM/text outputs
2172
- const isEmailNode = actionName.includes("email") || actionName.includes("send_email");
2173
- if (node.incoming_edges) {
2174
- for (const edge of node.incoming_edges) {
2175
- const sourceOutput = edge.source_output?.toLowerCase() ?? "";
2176
- const targetInput = edge.target_input?.toLowerCase() ?? "";
2177
- // For email nodes, only check the email_to/recipient input, not all inputs
2178
- if (isEmailNode) {
2179
- const isRecipientInput = targetInput.includes("email_to") ||
2180
- targetInput.includes("to_email") ||
2181
- targetInput.includes("recipient") ||
2182
- targetInput.includes("to_address");
2183
- if (!isRecipientInput)
2184
- continue; // Skip non-recipient inputs for email nodes
2185
- }
2186
- // Check if source is in the allowlist (skip if explicitly allowed)
2187
- const allowlist = rule.allowlist ?? [];
2188
- if (allowlist.length > 0) {
2189
- const isAllowed = allowlist.some(allowed => sourceOutput.includes(allowed.toLowerCase().replace(/_/g, "")) ||
2190
- sourceOutput.replace(/_/g, "").includes(allowed.toLowerCase().replace(/_/g, "")));
2191
- if (isAllowed)
2192
- continue;
2193
- }
2194
- // Check if using an avoided input
2195
- for (const avoid of rule.avoid) {
2196
- if (sourceOutput.includes(avoid.toLowerCase()) ||
2197
- (targetInput.includes("query") && sourceOutput.includes("chat_conversation"))) {
2198
- issues.push({
2199
- type: "wrong_input_source",
2200
- severity: rule.severity,
2201
- node: node.id,
2202
- current: sourceOutput,
2203
- recommended: rule.recommended,
2204
- reason: rule.reason,
2205
- recommendation: rule.fix,
2206
- auto_fixable: true, // Can rebind to correct source
2207
- });
2208
- }
2209
- }
2210
- }
2211
- }
2212
- }
2213
- return issues;
2214
- }
2215
- /**
2216
- * Detect email-specific issues:
2217
- * 1. Email recipient from text output (should be entity_extraction)
2218
- * 2. Email without HITL confirmation
2219
- * 3. Missing entity_extraction before email
2220
- */
2221
- function detectEmailIssues(nodes) {
2222
- const issues = [];
2223
- // NOTE: We don't flag email input sources as errors since the backend accepts
2224
- // TEXT_WITH_SOURCES for email_to. The UI type system is the source of truth.
2225
- // Common valid pattern: entity_extraction → fixed_response('{{email}}') → send_email
2226
- // Find email sending nodes
2227
- const emailNodes = nodes.filter(n => n.action_name?.includes("send_email") ||
2228
- n.action_name?.includes("email_agent") ||
2229
- n.id?.toLowerCase().includes("send_email"));
2230
- // Find HITL nodes
2231
- const hitlNodes = nodes.filter(n => n.action_name?.includes("hitl") ||
2232
- n.action_name?.includes("human_collaboration") ||
2233
- n.id?.toLowerCase().includes("hitl") ||
2234
- n.id?.toLowerCase().includes("approval"));
2235
- for (const emailNode of emailNodes) {
2236
- // Email input validation removed - backend accepts what UI allows
2237
- // The UI type system enforces compatibility at connection time
2238
- // Check 3: Is there HITL confirmation before email?
2239
- // Check if any HITL node's output leads to this email node
2240
- let hasHitlUpstream = false;
2241
- if (emailNode.incoming_edges) {
2242
- for (const edge of emailNode.incoming_edges) {
2243
- const sourceNode = nodes.find(n => n.id === edge.source_node_id);
2244
- if (sourceNode && (sourceNode.action_name?.includes("hitl") ||
2245
- sourceNode.id?.toLowerCase().includes("hitl"))) {
2246
- hasHitlUpstream = true;
2247
- break;
2248
- }
2249
- }
2250
- }
2251
- // Also check runIf for HITL Success
2252
- const hasHitlRunIf = emailNode.runIf &&
2253
- typeof emailNode.runIf === "object" &&
2254
- JSON.stringify(emailNode.runIf).toLowerCase().includes("hitl");
2255
- if (!hasHitlUpstream && !hasHitlRunIf && hitlNodes.length === 0) {
2256
- issues.push({
2257
- type: "side_effect_without_hitl",
2258
- severity: "info", // Changed from warning - HITL is optional, not required
2259
- node: emailNode.id,
2260
- auto_fixable: false,
2261
- reason: `NOTE: Email node "${emailNode.id}" has no HITL confirmation. ` +
2262
- `Consider adding HITL if approval is desired before sending. ` +
2263
- `(HITL can be added later via workflow(mode="extend") with "add approval before email".)`,
2264
- });
2265
- }
2266
- }
2267
- return issues;
2268
- }
2269
1786
  /**
2270
1787
  * Validate all edge connections for type compatibility
2271
1788
  */
@@ -2336,1368 +1853,6 @@ export function validateWorkflowConnections(workflowDef) {
2336
1853
  }
2337
1854
  return validations;
2338
1855
  }
2339
- /**
2340
- * Detect performance issues (redundant searches, consolidation opportunities)
2341
- *
2342
- * This checks for:
2343
- * 1. Multiple search nodes using the same query source - should consolidate
2344
- * 2. Conditional searches that could be a single search with filtered results
2345
- * 3. Sequential operations that could run in parallel
2346
- */
2347
- function detectPerformanceIssues(nodes) {
2348
- const issues = [];
2349
- // Find all search nodes
2350
- const searchNodes = nodes.filter(n => n.action_name?.includes("search") ||
2351
- n.id?.includes("search"));
2352
- if (searchNodes.length <= 1) {
2353
- return issues; // No consolidation opportunity with 0-1 search nodes
2354
- }
2355
- // Group search nodes by their query source
2356
- const searchesByQuerySource = new Map();
2357
- for (const search of searchNodes) {
2358
- // Find the query input edge
2359
- const queryEdge = search.incoming_edges?.find(e => e.target_input?.toLowerCase().includes("query"));
2360
- if (queryEdge) {
2361
- const querySource = `${queryEdge.source_node_id}.${queryEdge.source_output}`;
2362
- const existing = searchesByQuerySource.get(querySource) ?? [];
2363
- existing.push(search);
2364
- searchesByQuerySource.set(querySource, existing);
2365
- }
2366
- }
2367
- // Check for redundant searches (same query source)
2368
- for (const [querySource, searches] of searchesByQuerySource) {
2369
- if (searches.length > 1) {
2370
- // Check if these are conditional (runIf) searches
2371
- const conditionalSearches = searches.filter(s =>
2372
- // Check if node has runIf condition (indicates branch-specific execution)
2373
- s.id?.includes("client_update") ||
2374
- s.id?.includes("client_review") ||
2375
- s.id?.includes("market_impact") ||
2376
- s.id?.includes("_1") || s.id?.includes("_2") || s.id?.includes("_3"));
2377
- if (conditionalSearches.length > 1) {
2378
- // Multiple conditional searches with same query = consolidation opportunity
2379
- issues.push({
2380
- type: "redundant_search",
2381
- severity: "warning",
2382
- nodes: searches.map(s => s.id),
2383
- query_source: querySource,
2384
- reason: `${searches.length} search nodes all use the same query source (${querySource}). Consider consolidating into a single search - only one branch executes at a time, so multiple searches add complexity without benefit.`,
2385
- recommendation: "Replace multiple conditional searches with a single search node. Pass results to all response nodes via named_inputs. This reduces workflow complexity and maintenance burden.",
2386
- });
2387
- }
2388
- else {
2389
- // Non-conditional redundant searches (actual duplicates)
2390
- issues.push({
2391
- type: "redundant_search",
2392
- severity: "info",
2393
- nodes: searches.map(s => s.id),
2394
- query_source: querySource,
2395
- reason: `${searches.length} search nodes share query source (${querySource}). Verify these are intentionally different (e.g., different file filters) or consolidate.`,
2396
- recommendation: "If searches have different file filters, consider using a single search with broader filters and letting the LLM filter relevant results.",
2397
- });
2398
- }
2399
- }
2400
- }
2401
- // Check for sequential searches that could parallelize
2402
- for (const search of searchNodes) {
2403
- // Find nodes that depend on this search
2404
- const dependentSearches = searchNodes.filter(s => s.id !== search.id &&
2405
- s.incoming_edges?.some(e => e.source_node_id === search.id));
2406
- if (dependentSearches.length > 0) {
2407
- issues.push({
2408
- type: "sequential_search",
2409
- severity: "info",
2410
- node: search.id,
2411
- dependent_nodes: dependentSearches.map(s => s.id),
2412
- reason: `Search "${search.id}" has dependent searches that run sequentially. If searches are independent, they could run in parallel.`,
2413
- recommendation: "Review if sequential dependency is necessary. Independent searches should branch from the same source to enable parallel execution.",
2414
- });
2415
- }
2416
- }
2417
- // Check for multiple LLM calls that could consolidate
2418
- const llmNodes = nodes.filter(n => n.action_name?.includes("call_llm") ||
2419
- n.action_name?.includes("respond") ||
2420
- n.id?.includes("respond"));
2421
- // Group LLM nodes by their search input source
2422
- const llmsBySearchSource = new Map();
2423
- for (const llm of llmNodes) {
2424
- const searchInputEdge = llm.incoming_edges?.find(e => e.source_output?.toLowerCase().includes("search_result"));
2425
- if (searchInputEdge) {
2426
- const source = searchInputEdge.source_node_id;
2427
- const existing = llmsBySearchSource.get(source) ?? [];
2428
- existing.push(llm);
2429
- llmsBySearchSource.set(source, existing);
2430
- }
2431
- }
2432
- // Report if multiple LLM nodes use same search results
2433
- for (const [searchSource, llms] of llmsBySearchSource) {
2434
- if (llms.length > 1) {
2435
- // This is often OK (conditional responses based on categorizer)
2436
- // Only flag if they're not conditional
2437
- const nonConditionalCount = llms.filter(l => !l.id?.includes("update") &&
2438
- !l.id?.includes("review") &&
2439
- !l.id?.includes("impact") &&
2440
- !l.id?.includes("fallback")).length;
2441
- if (nonConditionalCount > 1) {
2442
- issues.push({
2443
- type: "duplicate_llm_processing",
2444
- severity: "info",
2445
- nodes: llms.map(l => l.id),
2446
- search_source: searchSource,
2447
- reason: `Multiple LLM nodes process results from "${searchSource}". Consider consolidating if they produce similar outputs.`,
2448
- recommendation: "Use a single call_llm with comprehensive instructions instead of multiple calls. This produces more coherent responses.",
2449
- });
2450
- }
2451
- }
2452
- }
2453
- return issues;
2454
- }
2455
- /**
2456
- * Detect all workflow issues
2457
- */
2458
- /**
2459
- * Detect malformed runIf conditions
2460
- *
2461
- * Common mistake: using "category_<Name>" as output and comparing to "true"
2462
- * Correct format: output="category", compare to enumValue="<Name>"
2463
- */
2464
- function detectMalformedRunIf(workflowDef) {
2465
- const issues = [];
2466
- const def = workflowDef;
2467
- if (!def)
2468
- return issues;
2469
- const actions = def.actions;
2470
- if (!actions)
2471
- return issues;
2472
- for (const action of actions) {
2473
- const runIf = action.runIf;
2474
- if (!runIf)
2475
- continue;
2476
- const lhs = runIf.lhs;
2477
- const rhs = runIf.rhs;
2478
- if (!lhs || !rhs)
2479
- continue;
2480
- const actionOutput = lhs.actionOutput;
2481
- if (!actionOutput)
2482
- continue;
2483
- const output = String(actionOutput.output ?? "");
2484
- const inlineRhs = rhs.inline;
2485
- const enumValue = String(inlineRhs?.enumValue ?? "");
2486
- // Detect malformed pattern: output="category_<Name>" compared to enumValue="true"
2487
- if (output.startsWith("category_") && (enumValue === "true" || enumValue === "false")) {
2488
- // Extract the category name from the malformed output
2489
- const categoryName = output.replace(/^category_/, "");
2490
- issues.push({
2491
- type: "malformed_runif",
2492
- severity: "critical",
2493
- node: String(action.name ?? ""),
2494
- current: `output="${output}" compared to enumValue="${enumValue}"`,
2495
- recommended: `output="category" compared to enumValue="${categoryName}"`,
2496
- reason: `Malformed runIf condition: comparing "${output}" to "${enumValue}" won't work. The categorizer output is "category", not "category_<Name>".`,
2497
- auto_fixable: true,
2498
- });
2499
- }
2500
- }
2501
- return issues;
2502
- }
2503
- /**
2504
- * Detect unused categories - categories defined in enumType but with no handler
2505
- * This causes SILENT FAILURES where requests match the category but nothing executes
2506
- */
2507
- function detectUnusedCategories(nodes, workflowDef) {
2508
- const issues = [];
2509
- const def = workflowDef;
2510
- if (!def)
2511
- return issues;
2512
- const enumTypes = def.enumTypes;
2513
- const actions = def.actions;
2514
- if (!enumTypes || !actions)
2515
- return issues;
2516
- // Find all categorizer nodes
2517
- const categorizers = nodes.filter(n => n.action_name === "chat_categorizer" ||
2518
- n.action_name === "intent_classifier");
2519
- for (const categorizer of categorizers) {
2520
- // Find the enumType for this categorizer using typeArguments reference (robust)
2521
- // The action's typeArguments.categories.enumType points to the correct enumType
2522
- const categorizerAction = actions.find(a => a.name === categorizer.id);
2523
- const typeArgs = categorizerAction?.typeArguments;
2524
- const enumTypeRefName = typeArgs?.categories?.enumType?.name?.name;
2525
- const enumType = enumTypes.find(e => {
2526
- const name = e.name;
2527
- const innerName = name?.name;
2528
- const nameStr = String(innerName?.name ?? name?.name ?? "");
2529
- // Match by typeArguments reference (preferred) or by categorizer ID (fallback)
2530
- // DO NOT use generic "enumType" fallback - that causes cross-categorizer pollution
2531
- return (enumTypeRefName && nameStr === enumTypeRefName) || nameStr.includes(categorizer.id);
2532
- });
2533
- if (!enumType)
2534
- continue;
2535
- const options = enumType.options;
2536
- if (!options)
2537
- continue;
2538
- // Get all defined categories
2539
- const definedCategories = options.map(o => String(o.name ?? "")).filter(n => n.length > 0);
2540
- // Find which categories have handlers (via runIf conditions)
2541
- const handledCategories = new Set();
2542
- for (const action of actions) {
2543
- const runIf = action.runIf;
2544
- if (!runIf)
2545
- continue;
2546
- const lhs = runIf.lhs;
2547
- const actionOutput = lhs?.actionOutput;
2548
- // Check if this runIf references our categorizer
2549
- if (actionOutput?.actionName === categorizer.id) {
2550
- const rhs = runIf.rhs;
2551
- const inline = rhs?.inline;
2552
- const enumValue = String(inline?.enumValue ?? "");
2553
- if (enumValue) {
2554
- handledCategories.add(enumValue);
2555
- }
2556
- }
2557
- }
2558
- // Check for config-based routing (document_synthesis with Config.tasks pattern)
2559
- // Find Config.tasks keys from fixed_response nodes
2560
- const configTaskKeys = new Set();
2561
- for (const action of actions) {
2562
- const actionInfo = action.action;
2563
- const actionName = actionInfo?.name;
2564
- if (actionName?.name !== "fixed_response")
2565
- continue;
2566
- const inputs = action.inputs;
2567
- const template = inputs?.template;
2568
- const inline = template?.inline;
2569
- const wellKnown = inline?.wellKnown;
2570
- const textWithSources = wellKnown?.textWithSources;
2571
- const text = String(textWithSources?.text ?? "");
2572
- try {
2573
- if (text.includes('"tasks"')) {
2574
- const config = JSON.parse(text);
2575
- const tasks = config.tasks;
2576
- if (tasks) {
2577
- for (const taskKey of Object.keys(tasks)) {
2578
- configTaskKeys.add(taskKey);
2579
- }
2580
- }
2581
- }
2582
- }
2583
- catch {
2584
- // Not JSON, skip
2585
- }
2586
- }
2587
- // Check if category is passed to any downstream node (including via named_inputs)
2588
- // Use flexible string matching to handle any JSON formatting
2589
- let usesConfigBasedRouting = false;
2590
- for (const action of actions) {
2591
- const inputs = action.inputs;
2592
- if (!inputs)
2593
- continue;
2594
- // Check regular inputs and named_inputs for reference to categorizer's category output
2595
- const inputStr = JSON.stringify(inputs).replace(/\s+/g, "");
2596
- const categorizerRef = `"actionName":"${categorizer.id}"`.replace(/\s+/g, "");
2597
- const categoryOutput = `"output":"category"`.replace(/\s+/g, "");
2598
- if (inputStr.includes(categorizerRef) && inputStr.includes(categoryOutput)) {
2599
- // This action references the categorizer's category output
2600
- const actionInfo = action.action;
2601
- const actionName = actionInfo?.name;
2602
- const actionType = String(actionName?.name ?? "");
2603
- // If it's a synthesis/generation node and we have config tasks, it's config-based routing
2604
- if (["document_synthesis", "call_llm", "custom_agent"].includes(actionType) && configTaskKeys.size > 0) {
2605
- usesConfigBasedRouting = true;
2606
- }
2607
- }
2608
- }
2609
- // If we have config tasks defined, assume config-based routing even if not explicitly detected
2610
- // This handles cases where the config might be in an orphan node but still used at runtime
2611
- if (configTaskKeys.size > 0 && !usesConfigBasedRouting) {
2612
- // Check if any downstream node is a document_synthesis or custom_agent
2613
- for (const node of nodes) {
2614
- if (node.action_name === "document_synthesis" || node.action_name === "custom_agent") {
2615
- usesConfigBasedRouting = true;
2616
- break;
2617
- }
2618
- }
2619
- }
2620
- // Find unused categories
2621
- const unusedCategories = definedCategories.filter(c => !handledCategories.has(c));
2622
- for (const category of unusedCategories) {
2623
- // Skip Fallback - already detected by missing_fallback
2624
- if (category === "Fallback")
2625
- continue;
2626
- // If using config-based routing, check if category has a task
2627
- if (usesConfigBasedRouting) {
2628
- if (!configTaskKeys.has(category)) {
2629
- issues.push({
2630
- type: "unused_category",
2631
- severity: "critical",
2632
- node: categorizer.id,
2633
- category,
2634
- categories: Array.from(configTaskKeys),
2635
- auto_fixable: false,
2636
- reason: `Category "${category}" is defined but has NO TASK in Config.tasks. ` +
2637
- `Requests matching this category will SILENTLY FAIL - the workflow looks for Config.tasks["${category}"] which doesn't exist. ` +
2638
- `Config has tasks for: ${Array.from(configTaskKeys).join(", ")}. ` +
2639
- `Either add "${category}" to Config.tasks, or remove the category from the enum.`,
2640
- });
2641
- }
2642
- }
2643
- else {
2644
- // No config-based routing - need explicit runIf handler
2645
- issues.push({
2646
- type: "unused_category",
2647
- severity: "critical",
2648
- node: categorizer.id,
2649
- category,
2650
- categories: unusedCategories,
2651
- auto_fixable: false,
2652
- reason: `Category "${category}" is defined in ${categorizer.display_name || categorizer.id} but has NO HANDLER. ` +
2653
- `Requests matching this category will SILENTLY FAIL - no response sent to user. ` +
2654
- `Either add a node with runIf condition for "${category}", add to Config.tasks, or remove the category.`,
2655
- });
2656
- }
2657
- }
2658
- }
2659
- return issues;
2660
- }
2661
- /**
2662
- * Detect category name mismatches - e.g., SEND_EMAIL in enum vs SEND_COMMUNICATION in config
2663
- */
2664
- function detectCategoryNameMismatches(nodes, workflowDef) {
2665
- const issues = [];
2666
- const def = workflowDef;
2667
- if (!def)
2668
- return issues;
2669
- const enumTypes = def.enumTypes;
2670
- const actions = def.actions;
2671
- if (!enumTypes || !actions)
2672
- return issues;
2673
- // Find fixed_response nodes that might contain config with tasks
2674
- const configNodes = actions.filter(a => {
2675
- const actionInfo = a.action;
2676
- const actionName = actionInfo?.name;
2677
- return actionName?.name === "fixed_response";
2678
- });
2679
- for (const configNode of configNodes) {
2680
- // Try to extract config content
2681
- const inputs = configNode.inputs;
2682
- const template = inputs?.template;
2683
- const inline = template?.inline;
2684
- const wellKnown = inline?.wellKnown;
2685
- const textWithSources = wellKnown?.textWithSources;
2686
- const text = String(textWithSources?.text ?? "");
2687
- // Try to parse as JSON to find tasks object
2688
- try {
2689
- if (text.includes('"tasks"')) {
2690
- const config = JSON.parse(text);
2691
- const tasks = config.tasks;
2692
- if (!tasks)
2693
- continue;
2694
- const taskNames = new Set(Object.keys(tasks));
2695
- // Compare with enum categories
2696
- for (const enumType of enumTypes) {
2697
- const options = enumType.options;
2698
- if (!options)
2699
- continue;
2700
- const enumCategories = options.map(o => String(o.name ?? "")).filter(n => n.length > 0);
2701
- // Find similar but not exact matches
2702
- for (const enumCat of enumCategories) {
2703
- if (enumCat === "Fallback")
2704
- continue;
2705
- // Check for name mismatches (similar but different)
2706
- const normalizedEnum = enumCat.toLowerCase().replace(/_/g, "");
2707
- for (const taskName of taskNames) {
2708
- const normalizedTask = taskName.toLowerCase().replace(/_/g, "");
2709
- // Similar names but not exact match
2710
- if (normalizedEnum !== normalizedTask &&
2711
- (normalizedEnum.includes(normalizedTask.slice(0, 4)) ||
2712
- normalizedTask.includes(normalizedEnum.slice(0, 4)))) {
2713
- // Check if it's actually a mismatch (enum not in tasks)
2714
- if (!taskNames.has(enumCat)) {
2715
- issues.push({
2716
- type: "category_name_mismatch",
2717
- severity: "critical",
2718
- node: String(configNode.name ?? "config"),
2719
- category: enumCat,
2720
- config_key: taskName,
2721
- auto_fixable: true,
2722
- reason: `Category name mismatch: Enum has "${enumCat}" but Config.tasks has "${taskName}". ` +
2723
- `This routing will FAIL because Config.tasks["${enumCat}"] doesn't exist. ` +
2724
- `Either rename the enum category to "${taskName}" or add "${enumCat}" to Config.tasks.`,
2725
- });
2726
- }
2727
- }
2728
- }
2729
- }
2730
- }
2731
- }
2732
- }
2733
- catch {
2734
- // Not JSON config, skip
2735
- }
2736
- }
2737
- return issues;
2738
- }
2739
- /**
2740
- * Detect late categorizer placement - categorizer should be early to avoid wasted processing
2741
- */
2742
- function detectLateCategorizer(nodes) {
2743
- const issues = [];
2744
- const categorizers = nodes.filter(n => n.action_name === "chat_categorizer" ||
2745
- n.action_name === "intent_classifier");
2746
- // Heavy processing nodes
2747
- const heavyNodes = new Set(["call_llm", "custom_agent", "document_synthesis", "search", "knowledge_search"]);
2748
- for (const categorizer of categorizers) {
2749
- // Find all nodes upstream of this categorizer
2750
- const upstreamHeavy = [];
2751
- const visited = new Set();
2752
- function findUpstream(nodeId) {
2753
- if (visited.has(nodeId))
2754
- return;
2755
- visited.add(nodeId);
2756
- const node = nodes.find(n => n.id === nodeId);
2757
- if (!node?.incoming_edges)
2758
- return;
2759
- for (const edge of node.incoming_edges) {
2760
- const sourceNode = nodes.find(n => n.id === edge.source_node_id);
2761
- if (sourceNode) {
2762
- if (heavyNodes.has(sourceNode.action_name || "")) {
2763
- upstreamHeavy.push(sourceNode.display_name || sourceNode.id);
2764
- }
2765
- findUpstream(sourceNode.id);
2766
- }
2767
- }
2768
- }
2769
- findUpstream(categorizer.id);
2770
- if (upstreamHeavy.length > 0) {
2771
- issues.push({
2772
- type: "late_categorizer",
2773
- severity: "warning",
2774
- node: categorizer.id,
2775
- nodes: upstreamHeavy,
2776
- auto_fixable: false,
2777
- reason: `Categorizer "${categorizer.display_name || categorizer.id}" runs AFTER heavy processing: ${upstreamHeavy.join(", ")}. ` +
2778
- `This wastes compute - all that processing happens even for simple requests. ` +
2779
- `RECOMMENDATION: Move categorizer earlier in the flow. Categorize FIRST, then process only the relevant branch.`,
2780
- });
2781
- }
2782
- }
2783
- return issues;
2784
- }
2785
- /**
2786
- * Detect excessive LLM calls per execution path
2787
- */
2788
- function detectExcessiveLLMCalls(nodes) {
2789
- const issues = [];
2790
- const trigger = nodes.find(n => n.action_name === "trigger" || n.id === "trigger");
2791
- if (!trigger)
2792
- return issues;
2793
- const llmNodes = new Set(["call_llm", "custom_agent", "document_synthesis"]);
2794
- // Build adjacency list
2795
- const adj = new Map();
2796
- for (const node of nodes) {
2797
- adj.set(node.id, new Set());
2798
- }
2799
- for (const node of nodes) {
2800
- if (node.incoming_edges) {
2801
- for (const edge of node.incoming_edges) {
2802
- adj.get(edge.source_node_id)?.add(node.id);
2803
- }
2804
- }
2805
- }
2806
- // Find all paths from trigger and count LLMs
2807
- const pathsWithHighLLMCount = [];
2808
- function dfs(nodeId, path, llmCount) {
2809
- const node = nodes.find(n => n.id === nodeId);
2810
- const newPath = [...path, nodeId];
2811
- const newLLMCount = llmCount + (node && llmNodes.has(node.action_name || "") ? 1 : 0);
2812
- const neighbors = adj.get(nodeId) || new Set();
2813
- if (neighbors.size === 0) {
2814
- // End of path
2815
- if (newLLMCount > 3) {
2816
- pathsWithHighLLMCount.push({ path: newPath, llm_count: newLLMCount });
2817
- }
2818
- return;
2819
- }
2820
- // Limit depth to avoid infinite loops
2821
- if (newPath.length > 15)
2822
- return;
2823
- for (const neighbor of neighbors) {
2824
- if (!path.includes(neighbor)) {
2825
- dfs(neighbor, newPath, newLLMCount);
2826
- }
2827
- }
2828
- }
2829
- dfs(trigger.id, [], 0);
2830
- // Report worst paths
2831
- pathsWithHighLLMCount.sort((a, b) => b.llm_count - a.llm_count);
2832
- for (const { path, llm_count } of pathsWithHighLLMCount.slice(0, 3)) {
2833
- const llmNodesInPath = path.filter(p => {
2834
- const node = nodes.find(n => n.id === p);
2835
- return node && llmNodes.has(node.action_name || "");
2836
- });
2837
- issues.push({
2838
- type: "excessive_llm_calls",
2839
- severity: llm_count > 5 ? "warning" : "info",
2840
- nodes: llmNodesInPath,
2841
- llm_count,
2842
- path,
2843
- auto_fixable: false,
2844
- reason: `Execution path has ${llm_count} LLM calls: ${llmNodesInPath.join(" → ")}. ` +
2845
- `Each LLM call adds 1-3 seconds latency. Users typically wait max 5-8 seconds. ` +
2846
- `RECOMMENDATION: Consolidate LLM calls, parallelize where possible, or cache repeated lookups.`,
2847
- });
2848
- }
2849
- return issues;
2850
- }
2851
- /**
2852
- * Detect suboptimal node choices - using heavy nodes where lighter ones suffice
2853
- */
2854
- function detectSuboptimalNodeChoices(nodes) {
2855
- const issues = [];
2856
- for (const node of nodes) {
2857
- const actionName = node.action_name || "";
2858
- const displayName = node.display_name || node.id;
2859
- // Check for document_synthesis when respond_with_sources might suffice
2860
- if (actionName === "document_synthesis") {
2861
- // Check if it has simple inputs (not complex multi-source)
2862
- const hasSearchInput = node.incoming_edges?.some(e => e.source_output?.includes("search_results"));
2863
- const hasMultipleSearchInputs = (node.incoming_edges?.filter(e => e.source_output?.includes("search_results")) || []).length > 1;
2864
- if (hasSearchInput && !hasMultipleSearchInputs) {
2865
- issues.push({
2866
- type: "suboptimal_node_choice",
2867
- severity: "info",
2868
- node: node.id,
2869
- actual_node: "document_synthesis",
2870
- recommended_node: "respond_with_sources or call_llm",
2871
- use_case: "single search source",
2872
- reason: `"${displayName}" uses document_synthesis (2-5 LLM calls) but only has one search source. ` +
2873
- `Consider using respond_with_sources (1 call) for simple Q&A, or call_llm with named_inputs for custom generation. ` +
2874
- `document_synthesis is best for multi-source research with search-plan-search patterns.`,
2875
- });
2876
- }
2877
- }
2878
- // Check for call_llm/custom_agent that might be doing extraction
2879
- if (["call_llm", "custom_agent"].includes(actionName)) {
2880
- // Look for JSON extraction patterns in instructions
2881
- const inputStr = JSON.stringify(node.incoming_edges || []);
2882
- const nodeId = node.id.toLowerCase();
2883
- const display = displayName.toLowerCase();
2884
- // Check if this looks like an extraction node
2885
- if (nodeId.includes("extract") ||
2886
- display.includes("extract") ||
2887
- nodeId.includes("parse") ||
2888
- display.includes("parse")) {
2889
- issues.push({
2890
- type: "suboptimal_node_choice",
2891
- severity: "warning",
2892
- node: node.id,
2893
- actual_node: actionName,
2894
- recommended_node: "entity_extraction",
2895
- use_case: "structured data extraction",
2896
- reason: `"${displayName}" appears to be doing extraction but uses ${actionName}. ` +
2897
- `Use entity_extraction instead - it's optimized for structured data, provides typed output, ` +
2898
- `and is more reliable for extracting specific fields (names, emails, dates, amounts).`,
2899
- });
2900
- }
2901
- }
2902
- // Check for LLM nodes that might be doing simple transforms
2903
- if (["call_llm", "custom_agent", "document_synthesis"].includes(actionName)) {
2904
- const display = displayName.toLowerCase();
2905
- const nodeId = node.id.toLowerCase();
2906
- // Check if it looks like a simple formatter/transformer
2907
- if ((display.includes("format") && !display.includes("information")) ||
2908
- display.includes("convert") ||
2909
- display.includes("template") ||
2910
- nodeId.includes("formatter") ||
2911
- nodeId.includes("converter")) {
2912
- issues.push({
2913
- type: "suboptimal_node_choice",
2914
- severity: "info",
2915
- node: node.id,
2916
- actual_node: actionName,
2917
- recommended_node: "json_mapper or fixed_response",
2918
- use_case: "formatting/transformation",
2919
- reason: `"${displayName}" might be doing simple formatting with ${actionName} (LLM-based). ` +
2920
- `If this is just data transformation without reasoning, consider json_mapper (no LLM, <100ms) ` +
2921
- `or fixed_response with template variables (no LLM, <50ms).`,
2922
- });
2923
- }
2924
- }
2925
- }
2926
- return issues;
2927
- }
2928
- /**
2929
- * Detect unnecessary search nodes (search before simple tasks that don't need KB)
2930
- */
2931
- function detectUnnecessarySearch(nodes) {
2932
- const issues = [];
2933
- // Find search nodes
2934
- const searchNodes = nodes.filter(n => n.action_name === "search" ||
2935
- n.action_name === "knowledge_search");
2936
- // Check what consumes each search
2937
- for (const searchNode of searchNodes) {
2938
- // Find downstream consumers
2939
- const consumers = nodes.filter(n => n.incoming_edges?.some(e => e.source_node_id === searchNode.id));
2940
- // Check if downstream is just fallback/greeting
2941
- for (const consumer of consumers) {
2942
- const consumerName = (consumer.display_name || consumer.id).toLowerCase();
2943
- if (consumerName.includes("fallback") ||
2944
- consumerName.includes("greeting") ||
2945
- consumerName.includes("clarif")) {
2946
- issues.push({
2947
- type: "unnecessary_search",
2948
- severity: "info",
2949
- node: searchNode.id,
2950
- reason: `Search "${searchNode.display_name || searchNode.id}" feeds into "${consumer.display_name || consumer.id}" (fallback/greeting). ` +
2951
- `Fallback responses typically don't need KB search - they're about clarification, not retrieval. ` +
2952
- `Consider removing search from fallback path to reduce latency.`,
2953
- });
2954
- }
2955
- }
2956
- }
2957
- return issues;
2958
- }
2959
- export function detectWorkflowIssues(workflowDef) {
2960
- const nodes = parseWorkflowDef(workflowDef);
2961
- if (nodes.length === 0) {
2962
- return [{
2963
- type: "orphan",
2964
- severity: "critical",
2965
- reason: "No workflow nodes found - workflow definition may be empty or invalid format",
2966
- }];
2967
- }
2968
- let issues = [
2969
- ...detectCycles(nodes),
2970
- ...detectOrphanNodes(nodes),
2971
- ...detectDeadEnds(nodes),
2972
- ...detectUnusedOutputs(nodes, workflowDef),
2973
- ...detectCategorizerIssues(nodes),
2974
- ...detectHitlIssues(nodes),
2975
- ...detectWrongInputSource(nodes),
2976
- ...detectEmailIssues(nodes),
2977
- ...detectPerformanceIssues(nodes),
2978
- ...detectMalformedRunIf(workflowDef),
2979
- // NEW: Category/routing structure issues
2980
- ...detectUnusedCategories(nodes, workflowDef),
2981
- ...detectCategoryNameMismatches(nodes, workflowDef),
2982
- ...detectLateCategorizer(nodes),
2983
- ...detectExcessiveLLMCalls(nodes),
2984
- // NEW: Node selection issues
2985
- ...detectSuboptimalNodeChoices(nodes),
2986
- ...detectUnnecessarySearch(nodes),
2987
- ];
2988
- // Add type mismatch issues from connection validation
2989
- const connections = validateWorkflowConnections(workflowDef);
2990
- for (const conn of connections) {
2991
- if (!conn.compatible) {
2992
- issues.push({
2993
- type: "type_mismatch",
2994
- severity: "critical",
2995
- source: `${conn.source_node}.${conn.source_output}`,
2996
- target: `${conn.target_node}.${conn.target_input}`,
2997
- expected: conn.target_type,
2998
- got: conn.source_type,
2999
- reason: conn.note ?? `Type mismatch: ${conn.source_type} → ${conn.target_type}`,
3000
- });
3001
- }
3002
- }
3003
- // Filter out false positives for Voice AI workflows
3004
- const def = workflowDef;
3005
- if (def) {
3006
- const results = def.results;
3007
- const actions = def.actions;
3008
- // If workflow has results mapping, it's Voice AI - WORKFLOW_OUTPUT is not required
3009
- if (results && Object.keys(results).length > 0) {
3010
- issues = issues.filter(i => i.type !== "missing_workflow_output");
3011
- }
3012
- // Check if categorizers use runIf pattern (valid alternative to explicit edges)
3013
- if (actions) {
3014
- const categorizerIssues = issues.filter(i => i.type === "missing_category_edge");
3015
- for (const catIssue of categorizerIssues) {
3016
- const categorizerName = catIssue.node;
3017
- // Count nodes that have runIf referencing this categorizer
3018
- const nodesUsingCategorizer = actions.filter(a => {
3019
- const runIf = a.runIf;
3020
- if (!runIf?.lhs)
3021
- return false;
3022
- const lhs = runIf.lhs;
3023
- const actionOutput = lhs.actionOutput;
3024
- return actionOutput?.actionName === categorizerName;
3025
- });
3026
- // If there are nodes with runIf conditions using this categorizer, it's valid
3027
- if (nodesUsingCategorizer.length > 0) {
3028
- issues = issues.filter(i => !(i.type === "missing_category_edge" && i.node === categorizerName));
3029
- }
3030
- }
3031
- }
3032
- }
3033
- return issues;
3034
- }
3035
- /**
3036
- * Analyze a workflow comprehensively
3037
- */
3038
- export function analyzeWorkflow(workflowDef, metadata) {
3039
- const nodes = parseWorkflowDef(workflowDef);
3040
- const issues = detectWorkflowIssues(workflowDef);
3041
- // Count edges
3042
- let totalEdges = 0;
3043
- for (const node of nodes) {
3044
- totalEdges += node.incoming_edges?.length ?? 0;
3045
- }
3046
- // Check for trigger
3047
- const hasTrigger = nodes.some(n => n.action_name?.includes("trigger") ||
3048
- n.id === "trigger" ||
3049
- n.id?.includes("trigger"));
3050
- // Check for WORKFLOW_OUTPUT
3051
- const hasWorkflowOutput = nodes.some(n => n.id === "WORKFLOW_OUTPUT" ||
3052
- n.action_name === "WorkflowOutputSink");
3053
- // Count categorizers
3054
- const categorizersCount = nodes.filter(n => n.action_name?.includes("categorizer") ||
3055
- n.id?.includes("categorizer") ||
3056
- n.id?.includes("classifier")).length;
3057
- // Count HITL nodes
3058
- const hitlNodesCount = nodes.filter(n => n.action_name?.includes("hitl") ||
3059
- n.action_name?.includes("human_collaboration") ||
3060
- n.id?.includes("hitl") ||
3061
- n.id?.includes("approval")).length;
3062
- // Summarize issues by severity
3063
- const issueSummary = {
3064
- critical: issues.filter(i => i.severity === "critical").length,
3065
- warning: issues.filter(i => i.severity === "warning").length,
3066
- info: issues.filter(i => i.severity === "info").length,
3067
- };
3068
- return {
3069
- ...metadata,
3070
- summary: {
3071
- total_nodes: nodes.length,
3072
- total_edges: totalEdges,
3073
- has_trigger: hasTrigger,
3074
- has_workflow_output: hasWorkflowOutput,
3075
- categorizers_count: categorizersCount,
3076
- hitl_nodes_count: hitlNodesCount,
3077
- },
3078
- issues,
3079
- issue_summary: issueSummary,
3080
- validation_passed: issueSummary.critical === 0,
3081
- };
3082
- }
3083
- /**
3084
- * Suggest fixes for detected workflow issues
3085
- */
3086
- export function suggestWorkflowFixes(issues) {
3087
- const fixes = [];
3088
- for (const issue of issues) {
3089
- let fix = null;
3090
- switch (issue.type) {
3091
- case "missing_fallback":
3092
- fix = {
3093
- issue_type: issue.type,
3094
- description: `Add Fallback category to categorizer "${issue.node}"`,
3095
- after: `# Add to enumType.options for ${issue.node}:
3096
- - name: "Fallback"
3097
- description: "Default for unclear or ambiguous requests"
3098
- examples:
3099
- - "Hello"
3100
- - "Help"
3101
- - "I'm not sure"
3102
-
3103
- # Add edge for Fallback category:
3104
- - source_node: ${issue.node}
3105
- source_output: category_Fallback
3106
- target_node: fallback_handler
3107
- target_input: trigger_when`,
3108
- validation: "Verify categorizer has Fallback in enumType.options AND has outgoing edge for category_Fallback",
3109
- };
3110
- break;
3111
- case "incomplete_hitl":
3112
- fix = {
3113
- issue_type: issue.type,
3114
- description: `Add ${issue.missing} for HITL node "${issue.node}"`,
3115
- after: `# Add ${issue.missing === "success_path" ? "success" : issue.missing === "failure_path" ? "failure" : "both"} handler(s):
3116
- ${issue.missing?.includes("success") || issue.missing?.includes("both") ? `
3117
- - name: "${issue.node}_success_response"
3118
- runIf:
3119
- lhs:
3120
- actionOutput:
3121
- actionName: "${issue.node}"
3122
- output: "hitl_status"
3123
- autoDetectedBinding: false
3124
- operator: 1 # OPERATOR_EQ
3125
- rhs:
3126
- inline:
3127
- enumValue: "HITL Success"
3128
- autoDetectedBinding: false
3129
- action:
3130
- name:
3131
- namespaces: ["actions", "emainternal"]
3132
- name: "call_llm"
3133
- # Connect to WORKFLOW_OUTPUT` : ""}
3134
- ${issue.missing?.includes("failure") || issue.missing?.includes("both") ? `
3135
- - name: "${issue.node}_failure_response"
3136
- runIf:
3137
- lhs:
3138
- actionOutput:
3139
- actionName: "${issue.node}"
3140
- output: "hitl_status"
3141
- autoDetectedBinding: false
3142
- operator: 1 # OPERATOR_EQ
3143
- rhs:
3144
- inline:
3145
- enumValue: "HITL Failure"
3146
- autoDetectedBinding: false
3147
- action:
3148
- name:
3149
- namespaces: ["actions", "emainternal"]
3150
- name: "call_llm"
3151
- # Connect to WORKFLOW_OUTPUT` : ""}`,
3152
- validation: "Verify both 'hitl_status_HITL Success' and 'hitl_status_HITL Failure' (with space, not underscore) have handler nodes and connect to WORKFLOW_OUTPUT",
3153
- };
3154
- break;
3155
- case "dead_end":
3156
- fix = {
3157
- issue_type: issue.type,
3158
- description: `Connect "${issue.node}" to WORKFLOW_OUTPUT`,
3159
- after: `# Add edge from ${issue.node} to WORKFLOW_OUTPUT:
3160
- - source_node: ${issue.node}
3161
- source_output: response_with_sources # or appropriate output
3162
- target_node: WORKFLOW_OUTPUT
3163
- target_input: ${issue.node}.response_with_sources`,
3164
- validation: "Verify node's output is mapped to WORKFLOW_OUTPUT",
3165
- };
3166
- break;
3167
- case "type_mismatch":
3168
- fix = {
3169
- issue_type: issue.type,
3170
- description: `Fix type mismatch: ${issue.source} → ${issue.target}`,
3171
- before: `# Current: ${issue.source} (${issue.got}) → ${issue.target} (${issue.expected})`,
3172
- after: issue.got === "WELL_KNOWN_TYPE_CHAT_CONVERSATION" && issue.expected === "WELL_KNOWN_TYPE_TEXT_WITH_SOURCES"
3173
- ? `# Insert conversation_to_search_query between source and target:
3174
- - name: "summarizer"
3175
- action:
3176
- name:
3177
- namespaces: ["actions", "emainternal"]
3178
- name: "conversation_to_search_query"
3179
- inputs:
3180
- conversation:
3181
- actionOutput:
3182
- actionName: "[source_trigger]"
3183
- output: "chat_conversation"
3184
-
3185
- # Then update target to use summarizer output:
3186
- - target input should reference summarizer.summarized_conversation`
3187
- : `# Either:
3188
- # 1. Use an intermediate conversion node
3189
- # 2. Connect to a named_inputs_* (accepts ANY type)
3190
- # 3. Use a different source output that matches the expected type`,
3191
- validation: `Verify source output type (${issue.got}) matches target input type (${issue.expected})`,
3192
- };
3193
- break;
3194
- case "wrong_input_source":
3195
- fix = {
3196
- issue_type: issue.type,
3197
- description: `Change input for "${issue.node}" from ${issue.current} to ${issue.recommended}`,
3198
- before: `# Current: using ${issue.current}`,
3199
- after: `# Update incoming edge to use ${issue.recommended}:
3200
- - source_node: trigger # or appropriate source
3201
- source_output: ${issue.recommended}
3202
- target_node: ${issue.node}
3203
- target_input: ${issue.recommended === "chat_conversation" ? "conversation" : "query"}`,
3204
- validation: `Verify ${issue.node} receives ${issue.recommended} - ${issue.reason}`,
3205
- };
3206
- break;
3207
- case "missing_category_edge":
3208
- fix = {
3209
- issue_type: issue.type,
3210
- description: `Add outgoing category edges for categorizer "${issue.node}"`,
3211
- after: `# Add edges for each category in the categorizer:
3212
- # For each category in enumType.options, add:
3213
- - source_node: ${issue.node}
3214
- source_output: category_<CategoryName>
3215
- target_node: <handler_for_category>
3216
- target_input: trigger_when
3217
-
3218
- # Example categories: Product_Info, Support, Sales, Fallback`,
3219
- validation: "Verify each category in enumType.options has a corresponding outgoing edge",
3220
- };
3221
- break;
3222
- case "orphan":
3223
- fix = {
3224
- issue_type: issue.type,
3225
- description: `Connect orphan node "${issue.node}" to workflow`,
3226
- after: `# Option 1: Connect as target of a categorizer:
3227
- - source_node: <categorizer>
3228
- source_output: category_<SomeCategory>
3229
- target_node: ${issue.node}
3230
- target_input: trigger_when
3231
-
3232
- # Option 2: Remove the node if no longer needed`,
3233
- validation: "Verify node is reachable from trigger via edges",
3234
- };
3235
- break;
3236
- case "unused_output":
3237
- fix = {
3238
- issue_type: issue.type,
3239
- description: `Connect output of "${issue.node}" to downstream consumer or remove the node`,
3240
- after: `# The node "${issue.node}" produces "${issue.current}" but nothing uses it.
3241
- #
3242
- # Option 1: Connect output to a downstream node that needs it:
3243
- - node: <downstream_node>
3244
- inputs:
3245
- <input_name>:
3246
- actionOutput:
3247
- actionName: ${issue.node}
3248
- output: ${issue.current}
3249
-
3250
- # Option 2: If using combine_search_results, connect to respond_with_sources:
3251
- # NOTE: respond_with_sources.search_results expects SEARCH_RESULT type
3252
- # combined_results is TEXT_WITH_SOURCES - use original search output OR named_inputs
3253
- - node: respond_with_sources
3254
- inputs:
3255
- search_results:
3256
- actionOutput:
3257
- actionName: <original_search_node> # NOT ${issue.node}
3258
- output: search_results
3259
- named_inputs: # Add combined for context
3260
- multiBinding:
3261
- elements:
3262
- - namedBinding:
3263
- name: combined_context
3264
- value:
3265
- actionOutput:
3266
- actionName: ${issue.node}
3267
- output: ${issue.current}
3268
-
3269
- # Option 3: Remove the node if not needed:
3270
- # Delete the "${issue.node}" action from the workflow`,
3271
- validation: `Verify "${issue.node}.${issue.current}" is consumed by a downstream node or WORKFLOW_OUTPUT`,
3272
- };
3273
- break;
3274
- case "unsafe_email_recipient":
3275
- fix = {
3276
- issue_type: issue.type,
3277
- description: `Use entity_extraction to get email address instead of text content`,
3278
- before: `# WRONG: Connecting text output to email recipient
3279
- - node: ${issue.node}
3280
- inputs:
3281
- email_to:
3282
- actionOutput:
3283
- actionName: ${issue.source}
3284
- output: ${issue.current} # ❌ This is TEXT, not an email address!`,
3285
- after: `# CORRECT: Extract email address via entity_extraction
3286
- # Step 1: Add entity_extraction node to extract email from conversation
3287
- - name: extract_email
3288
- action:
3289
- name:
3290
- namespaces: ["agents"]
3291
- name: entity_extraction
3292
- inputs:
3293
- text:
3294
- actionOutput:
3295
- actionName: trigger
3296
- output: chat_conversation
3297
- displaySettings:
3298
- displayName: "Extract Email Address"
3299
- # Configure extraction schema for email
3300
- parameters:
3301
- extraction_schema:
3302
- email_address:
3303
- type: string
3304
- required: true
3305
- recipient_name:
3306
- type: string
3307
- required: false
3308
-
3309
- # Step 2: Add HITL to confirm before sending
3310
- - name: confirm_email
3311
- action:
3312
- name:
3313
- namespaces: ["agents"]
3314
- name: hitl
3315
- inputs:
3316
- request_text:
3317
- actionOutput:
3318
- actionName: extract_email
3319
- output: extracted_entities
3320
- displaySettings:
3321
- displayName: "Confirm Email Recipient"
3322
-
3323
- # Step 3: Send email ONLY after HITL approval with extracted email
3324
- - name: ${issue.node}
3325
- action:
3326
- name:
3327
- namespaces: ["agents"]
3328
- name: send_email_agent
3329
- inputs:
3330
- email_to:
3331
- actionOutput:
3332
- actionName: extract_email
3333
- output: email_address # ✅ Structured email from extraction
3334
- email_body:
3335
- actionOutput:
3336
- actionName: ${issue.source}
3337
- output: ${issue.current} # Text content is fine for body
3338
- runIf:
3339
- enum:
3340
- enumType: HITL_STATUS
3341
- enumValue: "HITL Success"`,
3342
- validation: "Verify email_to receives output from entity_extraction.email_address, not text content",
3343
- };
3344
- break;
3345
- case "missing_entity_extraction":
3346
- fix = {
3347
- issue_type: issue.type,
3348
- description: `Add entity_extraction upstream of email node to extract structured data`,
3349
- after: `# Add entity_extraction before email to extract required fields
3350
- - name: extract_email_data
3351
- action:
3352
- name:
3353
- namespaces: ["agents"]
3354
- name: entity_extraction
3355
- inputs:
3356
- text:
3357
- actionOutput:
3358
- actionName: trigger
3359
- output: chat_conversation
3360
- displaySettings:
3361
- displayName: "Extract Email Data"
3362
- # Define what to extract
3363
- parameters:
3364
- extraction_schema:
3365
- email_address:
3366
- type: string
3367
- required: true
3368
- description: "Recipient email address"
3369
- subject:
3370
- type: string
3371
- required: false
3372
- description: "Email subject line"
3373
- recipient_name:
3374
- type: string
3375
- required: false
3376
-
3377
- # Then connect extracted data to email node:
3378
- - node: ${issue.node}
3379
- inputs:
3380
- email_to:
3381
- actionOutput:
3382
- actionName: extract_email_data
3383
- output: email_address`,
3384
- validation: "Verify entity_extraction exists upstream of email node",
3385
- };
3386
- break;
3387
- case "side_effect_without_hitl":
3388
- fix = {
3389
- issue_type: issue.type,
3390
- description: `Add HITL confirmation before executing action with side effects`,
3391
- after: `# Add HITL node before ${issue.node} to confirm with user
3392
- - name: confirm_before_send
3393
- action:
3394
- name:
3395
- namespaces: ["agents"]
3396
- name: hitl
3397
- inputs:
3398
- request_text:
3399
- constant:
3400
- value: "Please confirm you want to proceed with this action."
3401
- displaySettings:
3402
- displayName: "Confirm Before Sending"
3403
-
3404
- # Update ${issue.node} to only run after HITL approval
3405
- - name: ${issue.node}
3406
- # ... existing config ...
3407
- runIf:
3408
- enum:
3409
- enumType: HITL_STATUS
3410
- enumValue: "HITL Success"
3411
-
3412
- # Add failure handling (IMPORTANT: don't leave HITL incomplete)
3413
- - name: cancelled_response
3414
- action:
3415
- name:
3416
- namespaces: ["generation"]
3417
- name: call_llm
3418
- inputs:
3419
- query:
3420
- constant:
3421
- value: "The action was cancelled by the user."
3422
- runIf:
3423
- enum:
3424
- enumType: HITL_STATUS
3425
- enumValue: "HITL Failure"`,
3426
- validation: "Verify HITL node exists before action and both success/failure paths are handled",
3427
- };
3428
- break;
3429
- case "cycle":
3430
- fix = {
3431
- issue_type: issue.type,
3432
- description: `Remove circular dependency in: ${issue.nodes?.join(" → ")}`,
3433
- after: `# Identify and remove the edge creating the cycle.
3434
- # The cycle involves: ${issue.nodes?.join(", ")}
3435
- #
3436
- # Common solutions:
3437
- # 1. Remove the back-edge (edge pointing to earlier node)
3438
- # 2. Restructure to use proper exit conditions
3439
- # 3. Use a separate node for the "loop back" case that terminates properly`,
3440
- validation: "Verify no circular paths exist in the workflow graph",
3441
- };
3442
- break;
3443
- case "missing_workflow_output":
3444
- fix = {
3445
- issue_type: issue.type,
3446
- description: "Add WORKFLOW_OUTPUT node and connect all response paths",
3447
- after: `# Add WORKFLOW_OUTPUT node:
3448
- - id: WORKFLOW_OUTPUT
3449
- action_name: WorkflowOutputSink
3450
- incoming_edges: []
3451
-
3452
- # Connect all response nodes to it:
3453
- # For each response/call_llm node that produces final output:
3454
- - source_node: <response_node>
3455
- source_output: response_with_sources
3456
- target_node: WORKFLOW_OUTPUT
3457
- target_input: <response_node>.response_with_sources`,
3458
- validation: "Verify WORKFLOW_OUTPUT exists and all terminal response nodes connect to it",
3459
- };
3460
- break;
3461
- // Performance optimization fixes
3462
- case "redundant_search":
3463
- fix = {
3464
- issue_type: issue.type,
3465
- description: `Consolidate ${issue.nodes?.length ?? 0} redundant search nodes into a single search`,
3466
- before: `# Current: ${issue.nodes?.length ?? 0} separate search nodes, all using ${issue.query_source}
3467
- # Nodes: ${issue.nodes?.join(", ")}`,
3468
- after: `# RECOMMENDED: Replace multiple searches with a single consolidated search:
3469
-
3470
- - name: "knowledge_search"
3471
- action:
3472
- name:
3473
- namespaces: ["actions", "emainternal"]
3474
- name: "search"
3475
- inputs:
3476
- query:
3477
- actionOutput:
3478
- actionName: "${issue.query_source?.split(".")[0] ?? "conversation_summarizer"}"
3479
- output: "${issue.query_source?.split(".")[1] ?? "summarized_conversation"}"
3480
- # Remove file_name_filters to search all files
3481
- # OR use broad filters that cover all needed data
3482
- max_extractive_segment_count:
3483
- inline:
3484
- wellKnown:
3485
- int64Value: "25" # Increase to get comprehensive results
3486
-
3487
- # Then update ALL response nodes to use this single search:
3488
- - name: "respond_client_update" # and other response nodes
3489
- inputs:
3490
- named_inputs:
3491
- multiBinding:
3492
- elements:
3493
- - namedBinding:
3494
- name: "Search Results"
3495
- value:
3496
- actionOutput:
3497
- actionName: "knowledge_search"
3498
- output: "search_results"`,
3499
- validation: "Verify single search provides sufficient results for all response branches",
3500
- };
3501
- break;
3502
- case "sequential_search":
3503
- fix = {
3504
- issue_type: issue.type,
3505
- description: `Parallelize sequential searches starting from "${issue.node}"`,
3506
- before: `# Current: Sequential execution
3507
- # ${issue.node} → ${issue.dependent_nodes?.join(" → ")}`,
3508
- after: `# RECOMMENDED: Run independent searches in parallel by branching from same source:
3509
-
3510
- # Instead of:
3511
- # search_1 → search_2 → search_3
3512
- #
3513
- # Use:
3514
- # ┌─> search_1 ─┐
3515
- # │ │
3516
- # source ─> search_2 ─> combine_search_results
3517
- # │ │
3518
- # └─> search_3 ─┘
3519
-
3520
- # If searches need different inputs, ensure they branch from the categorizer
3521
- # or conversation_summarizer rather than depending on each other.`,
3522
- validation: "Verify searches don't actually depend on each other's results",
3523
- };
3524
- break;
3525
- case "duplicate_llm_processing":
3526
- fix = {
3527
- issue_type: issue.type,
3528
- description: `Consider consolidating ${issue.nodes?.length ?? 0} LLM nodes processing "${issue.search_source}"`,
3529
- before: `# Current: ${issue.nodes?.length ?? 0} LLM nodes all process results from ${issue.search_source}
3530
- # Nodes: ${issue.nodes?.join(", ")}`,
3531
- after: `# OPTION 1: If these are conditional responses (different intents), keep as-is
3532
- # This is the CORRECT pattern for categorizer-based routing
3533
-
3534
- # OPTION 2: If these produce similar outputs, consolidate into one:
3535
- - name: "unified_response"
3536
- action:
3537
- name:
3538
- namespaces: ["actions", "emainternal"]
3539
- name: "call_llm"
3540
- inputs:
3541
- query:
3542
- actionOutput:
3543
- actionName: "trigger"
3544
- output: "user_query"
3545
- named_inputs:
3546
- multiBinding:
3547
- elements:
3548
- - namedBinding:
3549
- name: "Search Results"
3550
- value:
3551
- actionOutput:
3552
- actionName: "${issue.search_source}"
3553
- output: "search_results"
3554
- instructions:
3555
- inline:
3556
- wellKnown:
3557
- stringValue: "Comprehensive instructions that handle all response types..."`,
3558
- validation: "Verify consolidation doesn't lose intent-specific response quality",
3559
- };
3560
- break;
3561
- // NEW: Category/routing structure fixes
3562
- case "unused_category":
3563
- fix = {
3564
- issue_type: issue.type,
3565
- description: `Add handler for unused category "${issue.category}"`,
3566
- after: `# CRITICAL: Category "${issue.category}" has NO HANDLER - requests will SILENTLY FAIL!
3567
- #
3568
- # OPTION 1: Add a handler node with runIf condition:
3569
- - name: "${issue.category?.toLowerCase().replace(/_/g, "_")}_handler"
3570
- action:
3571
- name:
3572
- namespaces: ["actions", "emainternal"]
3573
- name: "call_llm"
3574
- runIf:
3575
- lhs:
3576
- actionOutput:
3577
- actionName: "${issue.node}"
3578
- output: "category"
3579
- operator: 1 # EQUALS
3580
- rhs:
3581
- inline:
3582
- enumValue: "${issue.category}"
3583
- inputs:
3584
- # ... configure LLM for this category ...
3585
- displaySettings:
3586
- displayName: "${issue.category} Handler"
3587
-
3588
- # OPTION 2: Add to Config.tasks if using config-based routing:
3589
- # In your fixed_response config node, add:
3590
- "tasks": {
3591
- "${issue.category}": "Task instructions for handling ${issue.category} requests..."
3592
- }
3593
-
3594
- # OPTION 3: Remove the category if not needed:
3595
- # Delete "${issue.category}" from enumType.options`,
3596
- validation: `Verify "${issue.category}" has a handler (runIf condition or Config.tasks entry)`,
3597
- };
3598
- break;
3599
- case "category_name_mismatch":
3600
- fix = {
3601
- issue_type: issue.type,
3602
- description: `Fix category name mismatch: "${issue.category}" vs "${issue.config_key}"`,
3603
- before: `# PROBLEM: Enum defines "${issue.category}" but Config.tasks has "${issue.config_key}"
3604
- # The workflow looks for Config.tasks["${issue.category}"] which doesn't exist!`,
3605
- after: `# OPTION 1: Rename the enum category to match config:
3606
- # In enumType.options, change:
3607
- - name: "${issue.category}" # OLD
3608
- + name: "${issue.config_key}" # NEW (matches Config.tasks)
3609
-
3610
- # OPTION 2: Add the missing key to Config.tasks:
3611
- # In your fixed_response config, add:
3612
- "tasks": {
3613
- "${issue.category}": "... task instructions ..." # ADD THIS
3614
- }
3615
-
3616
- # OPTION 3: If both names should exist, add both to Config.tasks:
3617
- "tasks": {
3618
- "${issue.config_key}": "...", # existing
3619
- "${issue.category}": "..." # add alias or separate task
3620
- }`,
3621
- validation: `Verify enum category name matches Config.tasks key exactly`,
3622
- };
3623
- break;
3624
- case "late_categorizer":
3625
- fix = {
3626
- issue_type: issue.type,
3627
- description: `Move categorizer "${issue.node}" earlier in the workflow`,
3628
- before: `# PROBLEM: Heavy processing runs BEFORE categorization
3629
- # Nodes before categorizer: ${issue.nodes?.join(", ")}
3630
- # This wastes compute for ALL requests, not just the branch that needs it.`,
3631
- after: `# RECOMMENDED STRUCTURE: Categorize FIRST, process LATE
3632
- #
3633
- # ✅ EFFICIENT:
3634
- # trigger → categorizer → [branch A: search + LLM]
3635
- # → [branch B: search + LLM]
3636
- # → [branch C: just LLM]
3637
- #
3638
- # ❌ WASTEFUL:
3639
- # trigger → summarize → search → categorizer → [branches]
3640
- # (ALL requests pay this cost!)
3641
- #
3642
- # TO FIX:
3643
- # 1. Move summarization/search to AFTER categorizer, inside each branch
3644
- # 2. OR use a lightweight "pre-categorizer" that only needs trigger.user_query
3645
- #
3646
- # Minimal categorizer pattern:
3647
- - name: "intent_classifier"
3648
- action:
3649
- name:
3650
- namespaces: ["actions", "emainternal"]
3651
- name: "chat_categorizer"
3652
- inputs:
3653
- conversation:
3654
- actionOutput:
3655
- actionName: "trigger"
3656
- output: "chat_conversation" # Light input, no heavy processing
3657
- # Optional: provide examples in custom_data for better accuracy`,
3658
- validation: `Verify categorizer runs early with minimal upstream processing`,
3659
- };
3660
- break;
3661
- case "excessive_llm_calls":
3662
- fix = {
3663
- issue_type: issue.type,
3664
- description: `Reduce LLM calls from ${issue.llm_count} to ≤3 per request`,
3665
- before: `# Current path has ${issue.llm_count} LLM calls: ${issue.nodes?.join(" → ")}
3666
- # Each call adds 1-3 seconds. Total: ${(issue.llm_count ?? 0) * 2}+ seconds worst case.
3667
- # Users typically abandon after 5-8 seconds.`,
3668
- after: `# STRATEGIES TO REDUCE LLM CALLS:
3669
- #
3670
- # 1. CONSOLIDATE: Merge multiple LLM tasks into one
3671
- # Instead of: summarize → extract → respond (3 calls)
3672
- # Use: single call_llm with combined instructions
3673
- #
3674
- # 2. PARALLELIZE: Run independent LLMs simultaneously
3675
- # Instead of: A → B → C (sequential)
3676
- # Use: [A, B, C in parallel] → combine (1 effective wait)
3677
- #
3678
- # 3. ELIMINATE: Remove unnecessary LLM steps
3679
- # - Use json_mapper instead of LLM for simple transformations
3680
- # - Use fixed_response for static content
3681
- # - Use entity_extraction (1 call) instead of custom_agent (1 call)
3682
- #
3683
- # 4. CACHE: Reuse results across requests
3684
- # - Pre-compute common responses
3685
- # - Store client summaries in knowledge base
3686
- #
3687
- # RECOMMENDED: Max 3 LLM calls per request
3688
- # - 1x categorizer/extraction (optional, can use rules)
3689
- # - 1x main processing (search + reasoning)
3690
- # - 1x response formatting (if needed)`,
3691
- validation: `Verify critical paths have ≤3 LLM calls total`,
3692
- };
3693
- break;
3694
- }
3695
- if (fix) {
3696
- fixes.push(fix);
3697
- }
3698
- }
3699
- return fixes;
3700
- }
3701
1856
  // ─────────────────────────────────────────────────────────────────────────────
3702
1857
  // Deprecated Actions Registry (GENERATED - DO NOT EDIT)
3703
1858
  //
@@ -3712,6 +1867,12 @@ export { DEPRECATED_ACTIONS_WITH_REPLACEMENT, DEPRECATED_ACTIONS_NO_REPLACEMENT,
3712
1867
  /**
3713
1868
  * Constraints that must be satisfied for a workflow to be enabled.
3714
1869
  * These are checked by the Ema backend when attempting to enable a persona.
1870
+ *
1871
+ * SOURCE: ema-repos/ema/docs/workflow-architecture-handoff.md (lines 85-200, constraint checklist 153-167)
1872
+ * STATUS: Can be auto-generated from source doc
1873
+ * TODO: Add to catalog-sync.yml workflow for automatic updates
1874
+ *
1875
+ * See .ctx/docs/source-repos.md for sync details
3715
1876
  */
3716
1877
  export const WORKFLOW_ENABLING_CONSTRAINTS = [
3717
1878
  {