npm - @sparkleideas/agentic-flow - Versions diffs - 2.0.2-alpha-patch.1 - Mend

@sparkleideas/agentic-flow 2.0.2-alpha-patch.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (641) hide show

package/agentic-flow/dist/reasoningbank/config/reasoningbank.yaml ADDED Viewed

@@ -0,0 +1,145 @@
+reasoningbank:
+  version: "1.0.0"
+  enabled: true
+  # ============================================================================
+  # Retrieval Configuration (Algorithm 1)
+  # ============================================================================
+  retrieve:
+    k: 3                           # Top-k memories to inject into system prompt
+    alpha: 0.65                    # Weight: semantic similarity (cosine)
+    beta: 0.15                     # Weight: recency (exponential decay)
+    gamma: 0.20                    # Weight: reliability (confidence * usage)
+    delta: 0.10                    # Weight: diversity penalty (MMR)
+    recency_half_life_days: 45     # Exponential decay half-life for age
+    duplicate_threshold: 0.87      # Cosine similarity threshold for deduplication
+    min_score: 0.3                 # Don't inject memories below this score
+    max_age_days: 365              # Ignore memories older than this
+  # ============================================================================
+  # Embedding Configuration
+  # ============================================================================
+  embeddings:
+    provider: "local"              # "claude" | "openai" | "huggingface" | "local"
+    model: "Xenova/all-MiniLM-L6-v2"  # local transformers.js model
+    # model: "claude-sonnet-4-5-20250929"  # for Claude provider
+    # model: "text-embedding-3-large"    # for OpenAI provider
+    dimensions: 384                # vector dimensions (local: 384, OpenAI: 1536/3072)
+    cache_ttl_seconds: 3600        # cache embeddings for 1 hour
+    batch_size: 16                 # batch embeddings for efficiency
+  # ============================================================================
+  # Judge Configuration (Algorithm 2)
+  # ============================================================================
+  judge:
+    model: "claude-sonnet-4-5-20250929"
+    temperature: 0                 # deterministic evaluation
+    max_tokens: 512
+    timeout_ms: 10000
+    cache_verdicts: true           # cache judgments by trajectory hash
+    retry_on_parse_error: true
+    fallback_label: "Failure"      # conservative fallback on judge error
+    fallback_confidence: 0.5
+  # ============================================================================
+  # Distillation Configuration (Algorithm 3)
+  # ============================================================================
+  distill:
+    max_items_per_trajectory: 3    # extract up to N memories per trajectory
+    success_confidence_prior: 0.75 # initial confidence for success-derived memories
+    failure_confidence_prior: 0.60 # initial confidence for failure-derived guardrails
+    redact_pii: true               # scrub PII before storing
+    redact_patterns:
+      - '\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'  # emails
+      - '\b(?:\d{3}-\d{2}-\d{4}|\d{9})\b'                       # SSN
+      - '\b(?:sk-[a-zA-Z0-9]{48}|ghp_[a-zA-Z0-9]{36})\b'        # API keys (Anthropic, GitHub)
+      - '\b(?:xoxb-[a-zA-Z0-9\-]+)\b'                           # Slack tokens
+      - '\b(?:\d{13,19})\b'                                     # Credit card numbers
+    min_content_length: 20         # reject memories with content too short
+    max_content_length: 2000       # truncate if too long
+  # ============================================================================
+  # Consolidation Configuration (Algorithm 4)
+  # ============================================================================
+  consolidate:
+    enabled: true
+    run_every_new_items: 20        # consolidate after N new memories
+    contradiction_threshold: 0.60  # NLI probability threshold for contradictions
+    prune_age_days: 180            # hard delete if unused and old
+    min_confidence_keep: 0.30      # prune if confidence drops below this
+    max_contradictions_allowed: 5  # quarantine if contradicts > N high-usage items
+    dedup_similarity_threshold: 0.87  # merge if cosine > this
+    merge_strategy: "keep_highest_usage"  # "keep_highest_usage" | "keep_most_recent"
+  # ============================================================================
+  # MaTTS Configuration (Algorithm 5)
+  # ============================================================================
+  matts:
+    enabled: true
+    # Parallel mode: k independent rollouts with self-contrast aggregation
+    parallel:
+      k: 6                         # number of parallel rollouts
+      diversity_temperature: 0.9   # sampling temperature for diversity
+      max_concurrent: 3            # concurrent executions (rate limiting)
+      aggregation_model: "claude-sonnet-4-5-20250929"
+      aggregation_max_tokens: 2048
+    # Sequential mode: r iterative refinements with check-and-correct
+    sequential:
+      r: 3                         # number of refinement iterations
+      check_instruction: "Review your previous attempt. Identify errors or missing steps. Correct and continue."
+      max_iterations: 5            # hard cap to prevent infinite loops
+      stop_on_success: true        # stop if judge labels Success before r iterations
+  # ============================================================================
+  # Governance and Compliance
+  # ============================================================================
+  governance:
+    pii_scrubber: true             # enable PII redaction
+    tenant_scoped: false           # set true for multi-tenant deployments
+    audit_trail: true              # log all memory operations to events table
+    max_memory_age_days: 365       # absolute max age before forced deletion
+    require_approval_for_high_impact: false  # flag high-impact memories for review
+  # ============================================================================
+  # Performance and Observability
+  # ============================================================================
+  performance:
+    log_metrics: true              # write to performance_metrics table
+    export_csv_interval_days: 7   # export metrics CSV weekly
+    alert_on_degradation: true     # alert if success rate drops
+    success_rate_threshold: 0.70   # baseline threshold for alerting
+    max_retrieve_latency_ms: 500   # alert if retrieval takes longer
+    max_judge_latency_ms: 5000     # alert if judge takes longer
+  # ============================================================================
+  # Learning Rate and Confidence Updates
+  # ============================================================================
+  learning:
+    eta: 0.05                      # learning rate for confidence updates
+    success_boost: 1.0             # confidence += eta * success_boost when used in success
+    failure_penalty: -0.5          # confidence += eta * failure_penalty when used in failure
+    usage_boost_sigmoid: true      # apply sigmoid to usage_count for reliability score
+  # ============================================================================
+  # Feature Flags
+  # ============================================================================
+  features:
+    enable_pre_task_hook: true     # retrieve and inject memories before task
+    enable_post_task_hook: true    # judge, distill, consolidate after task
+    enable_matts_parallel: true    # allow parallel MaTTS
+    enable_matts_sequential: true  # allow sequential MaTTS
+    enable_contradiction_detection: true
+    enable_auto_pruning: true
+    enable_memory_merging: true
+  # ============================================================================
+  # Development and Debugging
+  # ============================================================================
+  debug:
+    verbose_logging: false
+    save_trajectories: true        # persist all trajectories to task_trajectories
+    save_embeddings: true          # persist embeddings for inspection
+    log_retrieval_scores: false    # log detailed scoring breakdown
+    dry_run: false                 # don't actually upsert memories (testing only)

package/agentic-flow/dist/reasoningbank/prompts/distill-failure.json ADDED Viewed

@@ -0,0 +1,111 @@
+{
+  "name": "reasoning_bank_distill_failure",
+  "version": "1.0.0",
+  "description": "Extract failure guardrails and preventative patterns from failed trajectories. Creates counterfactual memories.",
+  "model": "deepseek/deepseek-chat",
+  "temperature": 0.3,
+  "max_tokens": 2048,
+  "system": "You are a failure analysis specialist. Your role is to analyze failed task trajectories and extract guardrails, pitfalls, and recovery strategies. Focus on preventable errors and how to detect/avoid them.",
+  "template": "Given a task and its failed trajectory, extract up to {{max_items}} failure guardrail principles.\n\nTask: {{task_query}}\n\nFailed Trajectory:\n{{trajectory}}\n\nExtract guardrail items with this schema:\n- title: Brief title describing the failure mode (5-10 words)\n- description: One-sentence summary of the pitfall\n- content: 3-8 numbered steps covering detection, avoidance, and recovery\n\nGuidelines:\n1. Focus on WHY the failure occurred and HOW to prevent it\n2. Include early warning signs and detection criteria\n3. Specify checks to perform before risky operations\n4. Provide recovery steps if failure is detected\n5. Generalize the pattern (avoid task-specific details)\n\nRespond with JSON:\n{\n  \"memories\": [\n    {\n      \"title\": \"Guardrail title\",\n      \"description\": \"One-sentence summary of pitfall\",\n      \"content\": \"1) Detection: How to identify risk. 2) Prevention: Steps to avoid. 3) Recovery: What to do if encountered.\",\n      \"tags\": [\"failure\", \"tag1\", \"tag2\"],\n      \"domain\": \"optional domain hint\"\n    }\n  ]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "***"
+            },
+            "note": "Missing CSRF token"
+          },
+          {
+            "action": "submit",
+            "result": "403 Forbidden"
+          },
+          {
+            "action": "retry",
+            "result": "403 Forbidden"
+          }
+        ]
+      },
+      "expected_response": {
+        "memories": [
+          {
+            "title": "Avoid 403 errors by handling CSRF tokens",
+            "description": "Missing CSRF tokens cause repeated 403 Forbidden errors on POST requests.",
+            "content": "1) Detection: 403 error on form submission despite correct credentials. 2) Prevention: Always check for CSRF token requirements (meta tag, form input, or cookie) before POST. 3) Recovery: Refresh page to get new token and retry with token included. 4) Validation: Verify token is present in request before submitting.",
+            "tags": [
+              "failure",
+              "csrf",
+              "403",
+              "web"
+            ],
+            "domain": "webarena.admin"
+          },
+          {
+            "title": "Detect infinite retry loops and stop",
+            "description": "Retrying the same failing operation without changing approach causes loops.",
+            "content": "1) Detection: Same error occurs 2+ times with identical parameters. 2) Prevention: After first failure, analyze error response and adjust approach. 3) Recovery: Stop after 2 failures, log diagnostic info, and try alternative method or request human intervention. 4) Never retry more than 3 times without changing strategy.",
+            "tags": [
+              "failure",
+              "retry",
+              "loop"
+            ],
+            "domain": "general"
+          }
+        ]
+      }
+    },
+    {
+      "task": "Extract product prices from infinite scroll page",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "scroll_down",
+            "new_items": 20
+          },
+          {
+            "action": "scroll_down",
+            "new_items": 20
+          },
+          {
+            "action": "scroll_down",
+            "new_items": 20
+          },
+          {
+            "note": "Repeated indefinitely, never reached end"
+          }
+        ]
+      },
+      "expected_response": {
+        "memories": [
+          {
+            "title": "Prevent infinite pagination loops",
+            "description": "Infinite scroll pages can cause endless loops if end condition is not detected.",
+            "content": "1) Detection: Track number of scrolls and items loaded. If scroll_count > 50 or no new items after 3 consecutive scrolls, likely at end. 2) Prevention: Set hard limit (e.g., max 100 scrolls) and monitor for repeated DOM states. 3) Recovery: Stop scrolling, summarize partial results, and report limited dataset. 4) Use sentinel values or page metadata when available.",
+            "tags": [
+              "failure",
+              "pagination",
+              "infinite-scroll",
+              "web"
+            ],
+            "domain": "webarena.shopping"
+          }
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Failure memories are equally valuable as success memories",
+    "Focus on root cause, not symptoms",
+    "Include both detection and recovery strategies",
+    "Tag with 'failure' to distinguish from success-derived memories",
+    "Lower confidence prior (0.60) reflects need for validation"
+  ]
+}

package/agentic-flow/dist/reasoningbank/prompts/distill-success.json ADDED Viewed

@@ -0,0 +1,74 @@
+{
+  "name": "reasoning_bank_distill_success",
+  "version": "1.0.0",
+  "description": "Extract reusable strategy principles from successful trajectories. Creates title/description/content memories.",
+  "model": "deepseek/deepseek-chat",
+  "temperature": 0.3,
+  "max_tokens": 2048,
+  "system": "You are a knowledge extraction specialist. Your role is to analyze successful task trajectories and extract reusable, generalizable strategy principles. Each principle should be concise, actionable, and avoid task-specific details like URLs, IDs, or PII.",
+  "template": "Given a task and its successful trajectory, extract up to {{max_items}} reusable strategy principles.\n\nTask: {{task_query}}\n\nTrajectory:\n{{trajectory}}\n\nExtract memory items with this schema:\n- title: Brief, descriptive title (5-10 words)\n- description: One-sentence summary of the strategy\n- content: 3-8 numbered steps with clear decision criteria and recovery actions\n\nGuidelines:\n1. Generalize beyond this specific task (avoid URLs, IDs, constants)\n2. Focus on transferable patterns and decision logic\n3. Include preconditions, main steps, and error recovery\n4. Use imperative voice (\"Load page\", \"Verify token\", etc.)\n5. Highlight critical checks and validation steps\n\nRespond with JSON:\n{\n  \"memories\": [\n    {\n      \"title\": \"Strategy title\",\n      \"description\": \"One-sentence summary\",\n      \"content\": \"1) Step one with decision criteria. 2) Step two with validation. 3) Recovery if failure.\",\n      \"tags\": [\"tag1\", \"tag2\"],\n      \"domain\": \"optional domain hint\"\n    }\n  ]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel with CSRF protection and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "extract_csrf",
+            "selector": "meta[name=csrf-token]",
+            "value": "abc123"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "***",
+              "csrf_token": "abc123"
+            }
+          },
+          {
+            "action": "submit_and_verify",
+            "success": true
+          }
+        ]
+      },
+      "expected_response": {
+        "memories": [
+          {
+            "title": "Handle login flows with CSRF tokens",
+            "description": "Always fetch and include CSRF token before POST to avoid 403 errors.",
+            "content": "1) Load login page and parse CSRF from form input, meta tag, or cookie. 2) Include token in POST request as form field or header. 3) If 403 or 419 error, refresh page and retry with new token. 4) Verify successful authentication before proceeding.",
+            "tags": [
+              "web",
+              "auth",
+              "csrf",
+              "security"
+            ],
+            "domain": "webarena.admin"
+          },
+          {
+            "title": "Verify authentication state before data extraction",
+            "description": "Check for authentication indicators before attempting protected operations.",
+            "content": "1) After login, verify presence of session cookie or auth token. 2) Check for redirect to dashboard or user-specific content. 3) Look for logout button or user menu as positive signal. 4) If still on login page or see auth error, retry login flow.",
+            "tags": [
+              "web",
+              "auth",
+              "verification"
+            ],
+            "domain": "webarena"
+          }
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Use temperature=0.3 for some creativity while maintaining structure",
+    "Aim for 1-3 memories per trajectory, not more unless truly distinct",
+    "Content should be 3-8 steps, not a paragraph",
+    "Tags help with retrieval filtering",
+    "Domain hints improve retrieval precision for specialized tasks"
+  ]
+}

package/agentic-flow/dist/reasoningbank/prompts/judge.json ADDED Viewed

@@ -0,0 +1,101 @@
+{
+  "name": "reasoning_bank_judge",
+  "version": "1.0.0",
+  "description": "LLM-as-judge for trajectory evaluation. Returns Success or Failure with confidence score.",
+  "model": "deepseek/deepseek-chat",
+  "temperature": 0,
+  "max_tokens": 512,
+  "system": "You are a strict evaluator for task completion. Your role is to judge whether a task trajectory achieved its goal based on the final state and outputs. Be conservative: only label Success if the acceptance criteria are clearly met. Respond with pure JSON.",
+  "template": "Task: {{task_query}}\n\nTrajectory:\n{{trajectory}}\n\nEvaluate if the final state meets the acceptance criteria for this task.\n\nConsider:\n1. Was the stated goal achieved?\n2. Are all required outputs present and correct?\n3. Did the trajectory avoid critical errors or incomplete steps?\n4. Does the final state satisfy implicit requirements (e.g., proper authentication, data consistency)?\n\nRespond with JSON:\n{\n  \"label\": \"Success\" or \"Failure\",\n  \"confidence\": 0.0 to 1.0,\n  \"reasons\": [\"reason 1\", \"reason 2\", ...]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "***"
+            }
+          },
+          {
+            "action": "click",
+            "selector": "button[type=submit]"
+          },
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/users"
+          },
+          {
+            "action": "extract",
+            "data": [
+              {
+                "id": 1,
+                "name": "Alice"
+              },
+              {
+                "id": 2,
+                "name": "Bob"
+              }
+            ]
+          }
+        ]
+      },
+      "expected_response": {
+        "label": "Success",
+        "confidence": 0.95,
+        "reasons": [
+          "Successfully authenticated as admin",
+          "Navigated to users page",
+          "Extracted user list with expected fields"
+        ]
+      }
+    },
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectory": {
+        "steps": [
+          {
+            "action": "navigate",
+            "url": "https://admin.example.com/login"
+          },
+          {
+            "action": "fill_form",
+            "fields": {
+              "username": "admin",
+              "password": "wrong"
+            }
+          },
+          {
+            "action": "click",
+            "selector": "button[type=submit]"
+          },
+          {
+            "action": "observe",
+            "content": "Invalid credentials"
+          }
+        ]
+      },
+      "expected_response": {
+        "label": "Failure",
+        "confidence": 0.98,
+        "reasons": [
+          "Authentication failed with invalid credentials",
+          "Did not reach users page",
+          "No user list extracted"
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Use temperature=0 for deterministic evaluation",
+    "Be conservative: prefer Failure when ambiguous",
+    "Confidence should reflect certainty of judgment based on available evidence",
+    "If trajectory is malformed or incomplete, return Failure with low confidence"
+  ]
+}

package/agentic-flow/dist/reasoningbank/prompts/matts-aggregate.json ADDED Viewed

@@ -0,0 +1,119 @@
+{
+  "name": "reasoning_bank_matts_aggregate",
+  "version": "1.0.0",
+  "description": "Self-contrast aggregation for parallel MaTTS. Compares multiple trajectories to extract high-quality, generalizable memories.",
+  "model": "deepseek/deepseek-chat",
+  "temperature": 0.2,
+  "max_tokens": 3072,
+  "system": "You are a meta-learning specialist analyzing multiple attempts at the same task. Your role is to identify patterns that distinguish successful approaches from failures, and extract robust, generalizable strategies.",
+  "template": "We have {{k}} independent trajectories for the same task. Compare and contrast them to extract high-quality memory items.\n\nTask: {{task_query}}\n\nTrajectories:\n{{trajectories}}\n\nAnalyze:\n1. Patterns present in most successful attempts but absent in failures\n2. Pitfalls present in failures but not in successes\n3. Critical decision points where trajectories diverged\n4. Common suboptimal approaches even in successes\n\nExtract 1-3 distilled memory items that:\n- Generalize across successful attempts\n- Avoid task-specific details (URLs, IDs, etc.)\n- Capture robust decision criteria\n- Include failure modes to avoid\n\nRespond with JSON:\n{\n  \"memories\": [\n    {\n      \"title\": \"Strategy title\",\n      \"description\": \"One-sentence summary\",\n      \"content\": \"1) Step with decision criteria. 2) Validation check. 3) Recovery if needed.\",\n      \"confidence_boost\": 0.0 to 0.2,\n      \"evidence\": [\"trajectory_id_1\", \"trajectory_id_2\"],\n      \"tags\": [\"tag1\", \"tag2\"]\n    }\n  ],\n  \"insights\": [\n    \"Key observation 1 from comparison\",\n    \"Key observation 2 from comparison\"\n  ]\n}",
+  "examples": [
+    {
+      "task": "Login to admin panel and extract user list",
+      "trajectories": [
+        {
+          "id": "traj_1",
+          "label": "Success",
+          "confidence": 0.95,
+          "steps": [
+            "Navigate to login",
+            "Extract CSRF token from meta tag",
+            "Fill form with token",
+            "Submit and verify redirect",
+            "Navigate to users page",
+            "Extract user list"
+          ]
+        },
+        {
+          "id": "traj_2",
+          "label": "Success",
+          "confidence": 0.92,
+          "steps": [
+            "Navigate to login",
+            "Extract CSRF token from hidden input",
+            "Fill form with token",
+            "Submit and check for auth cookie",
+            "Navigate to users page",
+            "Extract user list"
+          ]
+        },
+        {
+          "id": "traj_3",
+          "label": "Failure",
+          "confidence": 0.88,
+          "steps": [
+            "Navigate to login",
+            "Fill form without token",
+            "Submit",
+            "Receive 403 error",
+            "Retry without token",
+            "Fail again"
+          ]
+        },
+        {
+          "id": "traj_4",
+          "label": "Success",
+          "confidence": 0.90,
+          "steps": [
+            "Navigate to login",
+            "Extract CSRF from cookie",
+            "Fill form with token",
+            "Submit and wait for dashboard",
+            "Navigate to users",
+            "Extract list"
+          ]
+        }
+      ],
+      "expected_response": {
+        "memories": [
+          {
+            "title": "CSRF token extraction is critical for protected forms",
+            "description": "All successful attempts extracted and included CSRF token; failure did not.",
+            "content": "1) Before submitting protected forms, search for CSRF token in: meta tags (name=csrf-token), hidden form inputs (name=_token or csrf), or cookies (XSRF-TOKEN). 2) Include token in request as form field or X-CSRF-TOKEN header. 3) If 403/419 error, token is likely missing or stale—refresh page and retry. 4) Verify token extraction succeeded before submission.",
+            "confidence_boost": 0.15,
+            "evidence": [
+              "traj_1",
+              "traj_2",
+              "traj_4"
+            ],
+            "tags": [
+              "csrf",
+              "web",
+              "auth",
+              "critical"
+            ]
+          },
+          {
+            "title": "Multiple CSRF token locations require flexible parsing",
+            "description": "Successful attempts used different token sources (meta, input, cookie).",
+            "content": "1) CSRF tokens may appear in multiple locations: meta tags, hidden inputs, cookies, or response headers. 2) Try common locations in order: meta[name=csrf-token], input[name=_token], document.cookie XSRF-TOKEN. 3) If first location fails, check alternatives before giving up. 4) Cache token location for subsequent requests to same domain.",
+            "confidence_boost": 0.10,
+            "evidence": [
+              "traj_1",
+              "traj_2",
+              "traj_4"
+            ],
+            "tags": [
+              "csrf",
+              "parsing",
+              "flexibility"
+            ]
+          }
+        ],
+        "insights": [
+          "All successes extracted CSRF token before submission; failure did not",
+          "Token sources varied (meta, input, cookie) but all successes found it",
+          "Failure retried without changing approach, demonstrating need for error analysis",
+          "Verification step (redirect, cookie, dashboard) was present in all successes"
+        ]
+      }
+    }
+  ],
+  "notes": [
+    "Use temperature=0.2 for focused analysis with minimal creativity",
+    "Confidence boost (0-0.2) reflects strength of cross-trajectory evidence",
+    "Evidence array links memory to supporting trajectories",
+    "Insights provide debugging context for future analysis",
+    "Aim for 1-3 memories, not more—quality over quantity"
+  ]
+}