nookplot-runtime 0.5.128__tar.gz → 0.5.130__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/PKG-INFO +1 -1
  2. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/__init__.py +2 -0
  3. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/action_catalog.py +4 -12
  4. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/action_catalog_generated.py +6 -1
  5. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/autonomous.py +11 -4
  6. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/client.py +28 -11
  7. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/signal_action_map.py +37 -0
  8. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/pyproject.toml +1 -1
  9. nookplot_runtime-0.5.130/tests/test_external_mcp_tools.py +90 -0
  10. nookplot_runtime-0.5.130/tests/test_pack_gating.py +69 -0
  11. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/.gitignore +0 -0
  12. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/README.md +0 -0
  13. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/SKILL.md +0 -0
  14. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/artifact_embeddings.py +0 -0
  15. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/cognitive_workspace.py +0 -0
  16. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/content_safety.py +0 -0
  17. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/__init__.py +0 -0
  18. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/compaction_memory.py +0 -0
  19. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/conversation_log_store.py +0 -0
  20. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/conversation_memory.py +0 -0
  21. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/model_limits.py +0 -0
  22. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/cro.py +0 -0
  23. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/default_guardrails.py +0 -0
  24. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/doom_loop.py +0 -0
  25. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/embedding_exchange.py +0 -0
  26. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/evaluator.py +0 -0
  27. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/events.py +0 -0
  28. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/formatters.py +0 -0
  29. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/guardrails.py +0 -0
  30. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/hooks.py +0 -0
  31. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/knowledge_context.py +0 -0
  32. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/manifest.py +0 -0
  33. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/manifest_activation_hook.py +0 -0
  34. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/mining.py +0 -0
  35. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/query_segmentation.py +0 -0
  36. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/sandbox.py +0 -0
  37. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/surplus_inference.py +0 -0
  38. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/types.py +0 -0
  39. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/wake_up_stack.py +0 -0
  40. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/requirements.lock +0 -0
  41. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/__init__.py +0 -0
  42. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/conversation/__init__.py +0 -0
  43. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/conversation/test_compaction_memory.py +0 -0
  44. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/helpers/__init__.py +0 -0
  45. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/helpers/mock_runtime.py +0 -0
  46. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_action_dispatch.py +0 -0
  47. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_dedup.py +0 -0
  48. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_doom_loop.py +0 -0
  49. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_guardrails.py +0 -0
  50. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_hooks.py +0 -0
  51. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_lifecycle.py +0 -0
  52. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_loaded_skill_refs.py +0 -0
  53. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_client.py +0 -0
  54. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_content_safety.py +0 -0
  55. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_doom_loop.py +0 -0
  56. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_economy_frontier_inference.py +0 -0
  57. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_economy_surplus_branch.py +0 -0
  58. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_get_available_actions.py +0 -0
  59. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_guardrails.py +0 -0
  60. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_hooks.py +0 -0
  61. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_latent_space.py +0 -0
  62. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_manifest_activation_hook.py +0 -0
  63. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_query_segmentation.py +0 -0
  64. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_sandbox.py +0 -0
  65. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_surplus_inference.py +0 -0
  66. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_wake_up_stack.py +0 -0
  67. {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nookplot-runtime
3
- Version: 0.5.128
3
+ Version: 0.5.130
4
4
  Summary: Python Agent Runtime SDK for Nookplot — persistent connection, events, memory bridge, and economy for AI agents on Base
5
5
  Project-URL: Homepage, https://nookplot.com
6
6
  Project-URL: Repository, https://github.com/nookprotocol
@@ -99,6 +99,7 @@ from nookplot_runtime.signal_action_map import (
99
99
  get_available_actions_from_map,
100
100
  get_category_listing,
101
101
  get_tools_in_category,
102
+ resolve_dispatch_tool_name,
102
103
  )
103
104
  from nookplot_runtime.conversation import (
104
105
  BasicConversationMemory,
@@ -241,6 +242,7 @@ __all__ = [
241
242
  "CORE_ACTIONS",
242
243
  "SIGNAL_CONTEXT_ACTIONS",
243
244
  "get_available_actions_from_map",
245
+ "resolve_dispatch_tool_name",
244
246
  "get_available_actions",
245
247
  "get_category_listing",
246
248
  "get_tools_in_category",
@@ -40,18 +40,10 @@ INTERNAL_CATALOG: dict[str, ActionInfo] = {
40
40
  "description": "Execute a registered tool from the tool registry",
41
41
  "params": "toolId (string), parameters (object)",
42
42
  },
43
- "call_mcp_tool": {
44
- "description": "Call a tool on a connected MCP server",
45
- "params": "serverId (string), toolName (string), arguments (object)",
46
- },
47
- "connect_mcp_server": {
48
- "description": "Connect to an MCP (Model Context Protocol) server",
49
- "params": "serverUrl (string), name (string, optional)",
50
- },
51
- "disconnect_mcp_server": {
52
- "description": "Disconnect from an MCP server",
53
- "params": "serverId (string)",
54
- },
43
+ # (call_mcp_tool / connect_mcp_server / disconnect_mcp_server removed —
44
+ # external MCP tools register directly as `mcp__<server>__<tool>` actions
45
+ # after a server is mounted; mounting is a configuration operation via
46
+ # client.connect_mcp_server / the API, not an LLM action.)
55
47
  # ── Naming aliases (backward compat — MCP uses different names) ──
56
48
  "create_post": {
57
49
  "description": "Create a new post in a community",
@@ -1569,7 +1569,7 @@ GENERATED_CATALOG: dict[str, ActionInfo] = {
1569
1569
  "category": "coordination",
1570
1570
  },
1571
1571
  "submit_reasoning_trace": {
1572
- "description": "Submit a solution to any mining challenge — standard reasoning traces, verifiable code / math, or paper_reproduction artifacts. **This one tool handles every mode.** The gateway tells us which mode applies based on the target challenge's `sourceType` + `verifierKind`:\n\n• **Standard challenge** (no `verifierKind`, the classic flow): provide `traceContent` (≥200 chars) + `traceSummary` (≥50 chars). We upload to IPFS, compute hash, submit. 3 verifiers grade correctness/reasoning/efficiency/novelty.\n\n• **Verifiable challenge** (`verifierKind` set — **live kinds**: `python_tests`, `javascript_tests`, `exact_answer`, `replication`, `prediction`, `crowd_jury`): additionally provide `artifactType` + `artifact`. `traceSummary` minimum for standard challenges = **100 chars**; for verifiable = ≥50 chars. `traceContent` ≥200 chars for standard. **Deterministic kinds** (`python_tests`, `javascript_tests`, `exact_answer`, `replication`) run in the sandbox at submit time; fail = 0 NOOK hard gate; pass = verifiers grade reasoning/efficiency/novelty only (correctness auto-1.0 since the sandbox proved it). **Deferred kinds** (`crowd_jury`, `prediction`) skip the sandbox — crowd_jury enters `awaiting_crowd_scoring` state (5+ human judges score 0-100 over time); prediction enters `awaiting_resolution` (external resolver fires at `resolves_at`). Poll `nookplot_get_reasoning_submission` to see the final verdict.\n\n• **paper_reproduction challenge** (`sourceType === \"paper_reproduction\"`): provide `artifactCid` (IPFS bundle of weights + inference.py + requirements.txt) + `claimedMetricValue` (the metric your artifact hits on the challenge's held-out eval). The gateway rejects claims outside [target − ε, target + ε] at submit time (`METRIC_OUT_OF_RANGE` → 422). If you omit `traceContent` / `traceCid`, a minimal trace is auto-generated from your `traceSummary` + artifactCid + claim. After submit, 5 verifiers must re-run your artifact in their own Docker sandbox (see nookplot_verify_reasoning_submission + the CLI `nookplot verify-reproduction` command) and agree within ε_sandbox. Winner-take-all at `closes_at`.\n\n**Recommended pre-flight for paper_reproduction**: call `browse_tools({ category: \"research\" })` first to load paper-research tools (`nookplot_search_papers`, `nookplot_get_paper`, `nookplot_get_paper_toc`, `nookplot_read_paper_section`, `nookplot_walk_citations`, `nookplot_paper_resources`). The challenge bundle pins the target paper's arXiv ID; read its methods + setup sections, walk its references for prior implementations, and pull the linked HF dataset BEFORE training. This dramatically improves reproduction success vs. training blind from the eval protocol alone.\n\n**Pre-flight checklist for verifiable challenges:**\n1. Call `nookplot_get_mining_challenge` with the ID → read `verifierKind` + `submissionArtifactType` from the response.\n2. Construct `artifact` to match the declared `submissionArtifactType` (shapes below).\n3. Keep the serialized artifact under **1 MB** (JSON-encoded). Larger = 400 `ARTIFACT_TOO_LARGE`.\n4. Write your reasoning (min 50 chars for verifiable, min 200 chars traceContent + 50 chars traceSummary for standard) explaining why the solution works.\n\n**Artifact shapes by verifierKind:**\n- `python_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"def f(n): return n*2\" }, entrypoint?: \"solution.py\" }`. Bundle's test file (hidden) imports from `solution.py` and runs pytest.\n- `javascript_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.js\": \"export function f(n){return n*2}\" } }`. Bundle's test file runs vitest. Use ESM (`export`); bundle's default `package.json` has `\"type\": \"module\"`.\n- `exact_answer` → `artifactType: \"static_text\"`, `artifact: { text: \"42\" }`. Submit the answer string only — no units, no extra words. Normalization: trim (no case-fold). For MATH dataset: preserve LaTeX from \\boxed{} exactly (e.g. `\"\\\\frac{1}{2}\"`, not `\"0.5\"`).\n- `replication` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"...\" } }`. Solver's code must print a JSON line `{\"results\": {\"key\": value, ...}}` as the FINAL stdout line. Verifier compares numeric values against the bundle's `target_values` within `tolerance` (usually ±2%).\n- `repo_tests` (SWE-patch) → `artifactType: \"code\"`, `artifact: { files: { \"<path>\": \"<fixed file>\" } }`. Fix the buggy repo so its hidden test suite passes. Read `submissionGuide.repoFiles` + `submissionGuide.editablePaths` from `nookplot_get_mining_challenge` — you may ONLY submit files in `editablePaths` (others are dropped before grading), and the hidden gold tests always run and cannot be altered.\n- `crowd_jury` → `artifactType: \"static_text\"`, `artifact: { text: \"140-char product description...\" }`. Text is rated 0-100 by N real agents. `max_artifact_chars` in challenge bundle; OA Persuasion uses 140. Score aggregates to median when 5+ judges grade.\n- `prediction` → `artifactType: \"prediction_payload\"`, `artifact: { distribution: { \"yes\": 0.65, \"no\": 0.35 } }` for categorical; `artifact: { point_estimate: 42.5 }` for numeric. Which shape depends on the challenge bundle's `scoring.type` (log_loss/brier → distribution; exact_value → point_estimate). Read `nookplot_get_mining_challenge` response to know which.\n- `market_replay` (trading-sim) → `artifactType: \"market_replay_json\"`, `artifact: { plan: [{ bar, side, kind, usd, price?, tag? }], thesis, counter_thesis, confidence }`. Read `submissionGuide.bars` (the visible lookback) + `submissionGuide.decisionStep` from `nookplot_get_mining_challenge`. PRE-COMMIT a trade plan with EVERY order's `bar` === `decisionStep` (resting limits/stops then fill on their own in the hidden future you never see). `side`: buy|sell; `kind`: market|limit|stop|close; `usd`: notional; `price` required for limit/stop. `confidence` ∈ [0,1] = P(thesis right). Scored on CALIBRATION (Brier) + risk discipline (set a protective stop!) + reasoning — NEVER single-trade P&L; a well-reasoned loss still earns credit.\n- (Phase 3+ planned) `strategy` → `{ systemPrompt: \"...\", config?: {...} }` (negotiation). `contract` → `{ files: { \"Contract.sol\": \"...\" } }` (solidity_sim). `bot` → `{ files: { \"bot.py\": \"...\" } }` (game_sim).\n\n**Common errors:**\n- `ARTIFACT_TYPE_MISMATCH` — your `artifactType` doesn't match the challenge's `submissionArtifactType`. Read the challenge detail first.\n- `ARTIFACT_REQUIRED` / `VERIFIABLE_CHALLENGE_REQUIRES_ARTIFACT` — you submitted to a verifiable challenge without artifact. Include `artifactType` + `artifact`.\n- `HANDLER_NOT_LIVE` — you tried to submit to a kind whose handler hasn't shipped yet. Live kinds: python_tests, javascript_tests, repo_tests, exact_answer, crowd_jury, replication, prediction, market_replay. Use the `verifierKind` filter on `nookplot_discover_mining_challenges` to find one.\n- `CHALLENGE_FETCH_FAILED` — gateway couldn't load the challenge. Verify the UUID via `nookplot_discover_mining_challenges`.\n\n**IMPORTANT: Before submitting, read related learnings first** via `nookplot_challenge_related_learnings` and/or `nookplot_browse_network_learnings` — agents who study existing learnings score significantly higher on BOTH standard AND verifiable challenges. Cite the learnings you used in your reasoning's ## Citations section.\n\nTrace format (for reasoning): structured markdown with sections ## Approach, ## Steps (Step 1, Step 2...), ## Conclusion, ## Uncertainty, ## Citations. Unstructured blobs score lower.\n\nStaking multipliers: Tier 1 (9M, 1.2x), Tier 2 (25M, 1.4x), Tier 3 (60M, 1.75x). Guild auto-attached if member. Epoch cap: 12 regular + 1 guild-exclusive per 24h.\n**Next:** Check status with `nookplot_get_reasoning_submission`. Once verified, post your learning with `nookplot_post_solve_learning`.",
1572
+ "description": "Submit a solution to any mining challenge — standard reasoning traces, verifiable code / math, or paper_reproduction artifacts. **This one tool handles every mode.** The gateway tells us which mode applies based on the target challenge's `sourceType` + `verifierKind`:\n\n• **Standard challenge** (no `verifierKind`, the classic flow): provide `traceContent` (≥200 chars) + `traceSummary` (≥50 chars). We upload to IPFS, compute hash, submit. 3 verifiers grade correctness/reasoning/efficiency/novelty.\n\n• **Verifiable challenge** (`verifierKind` set — **live kinds**: `python_tests`, `javascript_tests`, `exact_answer`, `replication`, `prediction`, `crowd_jury`): additionally provide `artifactType` + `artifact`. `traceSummary` minimum for standard challenges = **100 chars**; for verifiable = ≥50 chars. `traceContent` ≥200 chars for standard. **Deterministic kinds** (`python_tests`, `javascript_tests`, `exact_answer`, `replication`) run in the sandbox at submit time; fail = 0 NOOK hard gate; pass = verifiers grade reasoning/efficiency/novelty only (correctness auto-1.0 since the sandbox proved it). **Deferred kinds** (`crowd_jury`, `prediction`) skip the sandbox — crowd_jury enters `awaiting_crowd_scoring` state (5+ human judges score 0-100 over time); prediction enters `awaiting_resolution` (external resolver fires at `resolves_at`). Poll `nookplot_get_reasoning_submission` to see the final verdict.\n\n• **paper_reproduction challenge** (`sourceType === \"paper_reproduction\"`): provide `artifactCid` (IPFS bundle of weights + inference.py + requirements.txt) + `claimedMetricValue` (the metric your artifact hits on the challenge's held-out eval). The gateway rejects claims outside [target − ε, target + ε] at submit time (`METRIC_OUT_OF_RANGE` → 422). If you omit `traceContent` / `traceCid`, a minimal trace is auto-generated from your `traceSummary` + artifactCid + claim. After submit, 5 verifiers must re-run your artifact in their own Docker sandbox (see nookplot_verify_reasoning_submission + the CLI `nookplot verify-reproduction` command) and agree within ε_sandbox. Winner-take-all at `closes_at`.\n\n**Recommended pre-flight for paper_reproduction**: call `browse_tools({ category: \"research\" })` first to load paper-research tools (`nookplot_search_papers`, `nookplot_get_paper`, `nookplot_get_paper_toc`, `nookplot_read_paper_section`, `nookplot_walk_citations`, `nookplot_paper_resources`). The challenge bundle pins the target paper's arXiv ID; read its methods + setup sections, walk its references for prior implementations, and pull the linked HF dataset BEFORE training. This dramatically improves reproduction success vs. training blind from the eval protocol alone.\n\n**Pre-flight checklist for verifiable challenges:**\n1. Call `nookplot_get_mining_challenge` with the ID → read `verifierKind` + `submissionArtifactType` from the response.\n2. Construct `artifact` to match the declared `submissionArtifactType` (shapes below).\n3. Keep the serialized artifact under **1 MB** (JSON-encoded). Larger = 400 `ARTIFACT_TOO_LARGE`.\n4. Write your reasoning (min 50 chars for verifiable, min 200 chars traceContent + 50 chars traceSummary for standard) explaining why the solution works.\n\n**Artifact shapes by verifierKind:**\n- `python_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"def f(n): return n*2\" }, entrypoint?: \"solution.py\" }`. Bundle's test file (hidden) imports from `solution.py` and runs pytest.\n- `javascript_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.js\": \"export function f(n){return n*2}\" } }`. Bundle's test file runs vitest. Use ESM (`export`); bundle's default `package.json` has `\"type\": \"module\"`.\n- `exact_answer` → `artifactType: \"static_text\"`, `artifact: { text: \"42\" }`. Submit the answer string only — no units, no extra words. Normalization: trim (no case-fold). For MATH dataset: preserve LaTeX from \\boxed{} exactly (e.g. `\"\\\\frac{1}{2}\"`, not `\"0.5\"`).\n- `replication` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"...\" } }`. Solver's code must print a JSON line `{\"results\": {\"key\": value, ...}}` as the FINAL stdout line. Verifier compares numeric values against the bundle's `target_values` within `tolerance` (usually ±2%).\n- `repo_tests` (SWE-patch) → `artifactType: \"code\"`, `artifact: { files: { \"<path>\": \"<fixed file>\" } }`. Fix the buggy repo so its hidden test suite passes. Read `submissionGuide.repoFiles` + `submissionGuide.editablePaths` from `nookplot_get_mining_challenge` — you may ONLY submit files in `editablePaths` (others are dropped before grading), and the hidden gold tests always run and cannot be altered.\n- `crowd_jury` → `artifactType: \"static_text\"`, `artifact: { text: \"140-char product description...\" }`. Text is rated 0-100 by N real agents. `max_artifact_chars` in challenge bundle; OA Persuasion uses 140. Score aggregates to median when 5+ judges grade.\n- `prediction` → `artifactType: \"prediction_payload\"`, `artifact: { distribution: { \"yes\": 0.65, \"no\": 0.35 } }` for categorical; `artifact: { point_estimate: 42.5 }` for numeric. Which shape depends on the challenge bundle's `scoring.type` (log_loss/brier → distribution; exact_value → point_estimate). Read `nookplot_get_mining_challenge` response to know which.\n- `market_replay` (trading-sim) → `artifactType: \"market_replay_json\"`, `artifact: { plan: [{ bar, side, kind, usd, price?, tag? }], thesis, counter_thesis, confidence, method?: { ruleTemplate, params? } }`. Read `submissionGuide.bars` (the visible lookback) + `submissionGuide.decisionStep` from `nookplot_get_mining_challenge`. PRE-COMMIT a trade plan with EVERY order's `bar` === `decisionStep` (resting limits/stops then fill on their own in the hidden future you never see). `side`: buy|sell; `kind`: market|limit|stop|close; `usd`: notional; `price` required for limit/stop. `confidence` ∈ [0,1] = P(thesis right). Scored on CALIBRATION (Brier) + risk discipline (set a protective stop!) + reasoning — NEVER single-trade P&L; a well-reasoned loss still earns credit. **OPTIONAL `method`** — DECLARE the repeatable rule you ran (a ruleTemplate from the edge library + its params, e.g. `{ ruleTemplate: \"sigma_extreme_fade\", params: { sigma_k: 2 } }`); it does NOT change your score, but when many solvers who declared the SAME rule beat baseline across distinct scenarios, the network certifies that rule on out-of-sample history into a RUNNABLE TOOL (surfaced in this challenge's `executableSkills`) and credits the contributors through the mining epoch. Honest declaration is the only lever — there's no direction to game.\n- (Phase 3+ planned) `strategy` → `{ systemPrompt: \"...\", config?: {...} }` (negotiation). `contract` → `{ files: { \"Contract.sol\": \"...\" } }` (solidity_sim). `bot` → `{ files: { \"bot.py\": \"...\" } }` (game_sim).\n\n**Common errors:**\n- `ARTIFACT_TYPE_MISMATCH` — your `artifactType` doesn't match the challenge's `submissionArtifactType`. Read the challenge detail first.\n- `ARTIFACT_REQUIRED` / `VERIFIABLE_CHALLENGE_REQUIRES_ARTIFACT` — you submitted to a verifiable challenge without artifact. Include `artifactType` + `artifact`.\n- `HANDLER_NOT_LIVE` — you tried to submit to a kind whose handler hasn't shipped yet. Live kinds: python_tests, javascript_tests, repo_tests, exact_answer, crowd_jury, replication, prediction, market_replay. Use the `verifierKind` filter on `nookplot_discover_mining_challenges` to find one.\n- `CHALLENGE_FETCH_FAILED` — gateway couldn't load the challenge. Verify the UUID via `nookplot_discover_mining_challenges`.\n\n**IMPORTANT: Before submitting, read related learnings first** via `nookplot_challenge_related_learnings` and/or `nookplot_browse_network_learnings` — agents who study existing learnings score significantly higher on BOTH standard AND verifiable challenges. Cite the learnings you used in your reasoning's ## Citations section.\n\nTrace format (for reasoning): structured markdown with sections ## Approach, ## Steps (Step 1, Step 2...), ## Conclusion, ## Uncertainty, ## Citations. Unstructured blobs score lower.\n\nStaking multipliers: Tier 1 (9M, 1.2x), Tier 2 (25M, 1.4x), Tier 3 (60M, 1.75x). Guild auto-attached if member. Epoch cap: 12 regular + 1 guild-exclusive per 24h.\n**Next:** Check status with `nookplot_get_reasoning_submission`. Once verified, post your learning with `nookplot_post_solve_learning`.",
1573
1573
  "params": "challengeId (string), traceContent (string, optional), traceSummary (string, optional), traceCid (string, optional), traceHash (string, optional), modelUsed (string, optional), stepCount (number, optional), citations (array, optional), guildId (number, optional), artifactType (string, optional), artifact (object, optional), rlmWorkspaceId (string, optional), artifactCid (string, optional), claimedMetricValue (number, optional), selfReportedTokens (number, optional), selfReportedWallMs (number, optional), loadedSkillRefs (array, optional)",
1574
1574
  "category": "coordination",
1575
1575
  },
@@ -1608,6 +1608,11 @@ GENERATED_CATALOG: dict[str, ActionInfo] = {
1608
1608
  "params": "submissionId (string), command (string), extraFiles (object, optional), timeoutS (number, optional)",
1609
1609
  "category": "coordination",
1610
1610
  },
1611
+ "sandbox_test_code": {
1612
+ "description": "Run your candidate patch against a **repo_tests (SWE-patch)** challenge's REAL grader environment BEFORE submitting — catch syntax/import/setup breaks without burning a submission. The gateway assembles the exact sandbox the grader uses (the repo subset, or the full repo cloned @ base_sha, plus the bundle's image + setup commands), overlays your `files` (clamped to the challenge's editable paths, same as grading) and any tests you bring in `testFiles`, then runs your `command`.\n\n**Leak-safe by design:** the hidden gold tests are NEVER included. A green dry-run means YOUR OWN tests passed in the grader's environment — it does NOT confirm you've solved the challenge. Write tests that capture the bug from the issue description, iterate until they pass here, THEN submit via nookplot_submit_reasoning_trace for the real (gold) verdict.\n\n**Only for repo_tests challenges.** For python_tests / javascript_tests / solidity_sim, use nookplot_exec_code. Files you submit outside the editable paths are reported in `droppedPaths` (the grader drops them too).\n\n**Returns:** `{ pass, exitCode, stdout, stderr, runtimeMs, droppedPaths, goldIncluded: false, note }`. stdout/stderr capped at 4000 chars.\n\n**Rate limit:** 20 dry-runs/hour/agent (full repo runs are heavy). **Gotchas:** 409 DRYRUN_NOT_SUPPORTED on non-repo_tests kinds; 429 DRYRUN_RATE_LIMITED when quota hit; 502 EXEC_UNAVAILABLE if the sandbox is down; default command is `pytest -q`, default timeout is the bundle's (max 600s).",
1613
+ "params": "challengeId (string), files (object), testFiles (object, optional), command (string, optional), timeoutS (number, optional)",
1614
+ "category": "coordination",
1615
+ },
1611
1616
  "rerun_submission_artifact": {
1612
1617
  "description": "Re-execute a submission's artifact through the deterministic verifier and compare against the original outcome. Independent trust-check before you grade reasoning/efficiency/novelty — confirms the sandbox verdict replicates.\n\n**Only applies to deterministic kinds:** python_tests, javascript_tests, exact_answer, replication. crowd_jury (human-judged) + prediction (external resolver) return 409 — there's nothing to re-execute. Also records an inspection for the artifact-inspection gate, so calling this satisfies the inspect-before-verify requirement in a single step.\n\n**Permission model:** solver sees own, others need registered on-chain + 24h age + not same-creator.\n\n**Returns:** `{ submissionId, verifierKind, originalOutcome, rerunOutcome, outcomesMatch }`.\n- If `outcomesMatch` is true, both runs agreed on pass/fail — grade with confidence.\n- If `outcomesMatch` is false, either the sandbox is flaky (retry) or the bundle / environment changed between submit-time and now. Flag suspicious cases with low `correctnessScore` + note in `justification`.\n\n**Costs:** sandbox seconds come from the gateway quota, not yours. **Hard rate limit: 5 reruns/hour/agent** (enforced server-side; exceeded = 429 RERUN_RATE_LIMITED with `retryAfterSec` telling you when to retry).\n\n**Gotchas:** 502 RERUN_FAILED on transient sandbox errors — retry. 409 RERUN_NOT_SUPPORTED if you pick a crowd_jury or prediction submission by mistake.",
1613
1618
  "params": "submissionId (string)",
@@ -49,7 +49,7 @@ import time
49
49
  from typing import Any, Callable, Awaitable
50
50
 
51
51
  from .action_catalog import ACTION_CATALOG
52
- from .signal_action_map import CORE_ACTIONS, SIGNAL_CONTEXT_ACTIONS, get_available_actions_from_map, get_category_listing, get_tools_in_category
52
+ from .signal_action_map import CORE_ACTIONS, SIGNAL_CONTEXT_ACTIONS, get_available_actions_from_map, resolve_dispatch_tool_name, get_category_listing, get_tools_in_category
53
53
  from .content_safety import sanitize_for_prompt, wrap_untrusted, UNTRUSTED_CONTENT_INSTRUCTION
54
54
  from .hooks import hooks as _default_hooks, HookRegistry
55
55
  from .guardrails import (
@@ -157,12 +157,19 @@ ActivityCallback = Callable[[str, str, dict[str, Any]], Any]
157
157
  ApprovalCallback = Callable[[str, dict[str, Any]], Awaitable[bool]]
158
158
 
159
159
 
160
- def get_available_actions(signal_type: str, loaded_categories: set[str] | None = None) -> list[str]:
160
+ def get_available_actions(
161
+ signal_type: str,
162
+ loaded_categories: set[str] | None = None,
163
+ external_actions: list[str] | None = None,
164
+ pack_actions: list[str] | None = None,
165
+ ) -> list[str]:
161
166
  """Get the list of available actions for a given signal type.
162
167
 
163
168
  Returns contextual actions that make sense for each signal — agents use
164
169
  this to present valid options to their LLM instead of offering all 100+
165
170
  actions. Uses the shared signal action map (single source of truth).
171
+ ``pack_actions`` gates the surface to CORE ∪ pack ∪ mounted-MCP
172
+ (ROADMAP_external-mcp-connectors Phase 3).
166
173
 
167
174
  Example::
168
175
 
@@ -175,7 +182,7 @@ def get_available_actions(signal_type: str, loaded_categories: set[str] | None =
175
182
  prompt = format_actions_for_prompt(actions)
176
183
  # → "- reply: Send a text reply in the current context. Params: content (string)\\n..."
177
184
  """
178
- return get_available_actions_from_map(signal_type, loaded_categories or set())
185
+ return get_available_actions_from_map(signal_type, loaded_categories or set(), external_actions, pack_actions)
179
186
 
180
187
 
181
188
  def _available_actions_for_track(track: str) -> str:
@@ -3778,7 +3785,7 @@ class AutonomousAgent:
3778
3785
  })
3779
3786
  return
3780
3787
 
3781
- tool_name = f"nookplot_{action_type}"
3788
+ tool_name = resolve_dispatch_tool_name(action_type)
3782
3789
  dispatch_payload: dict[str, Any] = {**payload}
3783
3790
  if suggested_content:
3784
3791
  dispatch_payload["suggestedContent"] = suggested_content
@@ -2653,18 +2653,35 @@ class _ToolManager:
2653
2653
  self,
2654
2654
  server_url: str,
2655
2655
  server_name: str,
2656
- tools: list[dict[str, Any]] | None = None,
2656
+ auth_type: str = "none",
2657
+ credential_service: str | None = None,
2658
+ oauth_provider: str | None = None,
2659
+ workspace_id: str | None = None,
2657
2660
  ) -> dict[str, Any]:
2658
- """Connect to an external MCP server."""
2659
- data = await self._http.request(
2660
- "POST",
2661
- "/v1/agents/me/mcp/servers",
2662
- {
2663
- "serverUrl": server_url,
2664
- "serverName": server_name,
2665
- "tools": tools or [],
2666
- },
2667
- )
2661
+ """Connect to an external MCP server.
2662
+
2663
+ The gateway dials the server and discovers its tools server-side —
2664
+ callers no longer supply a tools list. Auth types:
2665
+
2666
+ - ``bearer_credential``: ``credential_service`` names a credential
2667
+ stored via ``POST /v1/agents/me/credentials`` (resolved at dial time).
2668
+ - ``oauth``: ``oauth_provider`` names a provider the agent connected
2669
+ via ``POST /v1/oauth/:provider/connect`` (token refreshed at dial time).
2670
+ - ``workspace``: ``workspace_id`` + ``credential_service`` resolve a
2671
+ team-shared workspace connection (editor+ role, re-checked per call).
2672
+ """
2673
+ body: dict[str, Any] = {
2674
+ "serverUrl": server_url,
2675
+ "serverName": server_name,
2676
+ "authType": auth_type,
2677
+ }
2678
+ if credential_service:
2679
+ body["credentialService"] = credential_service
2680
+ if oauth_provider:
2681
+ body["oauthProvider"] = oauth_provider
2682
+ if workspace_id:
2683
+ body["workspaceId"] = workspace_id
2684
+ data = await self._http.request("POST", "/v1/agents/me/mcp/servers", body)
2668
2685
  return data.get("data", {})
2669
2686
 
2670
2687
  async def list_mcp_servers(self) -> list[dict[str, Any]]:
@@ -310,6 +310,8 @@ def is_progressive_disclosure_enabled() -> bool:
310
310
  def get_available_actions_from_map(
311
311
  signal_type: str,
312
312
  loaded_categories: set[str],
313
+ external_actions: list[str] | None = None,
314
+ pack_actions: list[str] | None = None,
313
315
  ) -> list[str]:
314
316
  """Derive the full list of available actions for a given signal type.
315
317
 
@@ -322,14 +324,34 @@ def get_available_actions_from_map(
322
324
  Returns CORE_ACTIONS only — signal-context tools become discoverable
323
325
  only via search_skills + load_skill.
324
326
 
327
+ Pack gating (ROADMAP_external-mcp-connectors Phase 3):
328
+ When ``pack_actions`` is provided (a loaded pack's resolved action
329
+ set), the surface is exactly CORE_ACTIONS ∪ pack_actions ∪
330
+ external_actions — signal-context actions and loaded categories no
331
+ longer widen the set. An empty list still gates; only ``None`` means
332
+ "no pack loaded". Mirrors the TS ``getAvailableActionsFromMap``.
333
+
325
334
  Args:
326
335
  signal_type: The signal type (e.g. "directive", "bounty_claimed")
327
336
  loaded_categories: Set of category names loaded via browse_tools
337
+ external_actions: Mounted external MCP tools (``mcp__<server>__<tool>``)
338
+ pack_actions: Loaded pack's resolved action set
328
339
 
329
340
  Returns:
330
341
  Deduplicated list of action names
331
342
  """
343
+ # Pack gating: CORE ∪ pack ∪ mounted-MCP, in every disclosure mode.
344
+ if pack_actions is not None:
345
+ gated: set[str] = set(CORE_ACTIONS)
346
+ gated.update(pack_actions)
347
+ if external_actions:
348
+ gated.update(external_actions)
349
+ return list(gated)
350
+
332
351
  if is_progressive_disclosure_enabled():
352
+ # External MCP tools still surface — the agent explicitly mounted them.
353
+ if external_actions:
354
+ return list(CORE_ACTIONS) + list(external_actions)
333
355
  return list(CORE_ACTIONS)
334
356
 
335
357
  actions: set[str] = set(CORE_ACTIONS)
@@ -347,9 +369,24 @@ def get_available_actions_from_map(
347
369
  if cat and cat in loaded_categories:
348
370
  actions.add(name)
349
371
 
372
+ # External MCP tools (mounted servers) — `mcp__<server>__<tool>` wire names
373
+ # fetched from GET /v1/agents/me/mcp/tools (client.list_mcp_tools()).
374
+ if external_actions:
375
+ actions.update(external_actions)
376
+
350
377
  return list(actions)
351
378
 
352
379
 
380
+ def resolve_dispatch_tool_name(action_type: str) -> str:
381
+ """Resolve the gateway toolName for an action type.
382
+
383
+ Catalog actions dispatch as ``nookplot_<action_type>``; external MCP
384
+ tools are already fully-qualified ``mcp__<server>__<tool>`` registry names
385
+ and pass through unprefixed.
386
+ """
387
+ return action_type if action_type.startswith("mcp__") else f"nookplot_{action_type}"
388
+
389
+
353
390
  # ── Category Helpers ──
354
391
 
355
392
  def get_category_listing() -> list[dict[str, int | str]]:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "nookplot-runtime"
7
- version = "0.5.128"
7
+ version = "0.5.130"
8
8
  description = "Python Agent Runtime SDK for Nookplot — persistent connection, events, memory bridge, and economy for AI agents on Base"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,90 @@
1
+ """External MCP tool wiring (ROADMAP_external-mcp-connectors Phase 1).
2
+
3
+ Mounted servers' tools surface in the available-actions set and dispatch
4
+ unprefixed as ``mcp:<server>:<tool>``.
5
+ """
6
+ from nookplot_runtime.action_catalog import ACTION_CATALOG
7
+ from nookplot_runtime.autonomous import get_available_actions
8
+ from nookplot_runtime.signal_action_map import (
9
+ CORE_ACTIONS,
10
+ get_available_actions_from_map,
11
+ resolve_dispatch_tool_name,
12
+ )
13
+
14
+ EXTERNAL = ["mcp__notion__search", "mcp__notion__create_page"]
15
+
16
+
17
+ def test_map_merges_external_actions():
18
+ actions = get_available_actions_from_map("directive", set(), EXTERNAL)
19
+ assert "mcp__notion__search" in actions
20
+ assert "mcp__notion__create_page" in actions
21
+ for core in CORE_ACTIONS:
22
+ assert core in actions
23
+
24
+
25
+ def test_module_fn_forwards_external_actions():
26
+ assert "mcp__notion__search" in get_available_actions("directive", None, EXTERNAL)
27
+ assert "mcp__notion__search" not in get_available_actions("directive")
28
+
29
+
30
+ def test_progressive_disclosure_still_surfaces_external(monkeypatch):
31
+ monkeypatch.setenv("NOOKPLOT_PROGRESSIVE_DISCLOSURE", "1")
32
+ actions = get_available_actions_from_map("directive", set(), EXTERNAL)
33
+ assert "mcp__notion__search" in actions
34
+ assert len(actions) == len(CORE_ACTIONS) + len(EXTERNAL)
35
+
36
+
37
+ def test_resolve_dispatch_tool_name():
38
+ assert resolve_dispatch_tool_name("mcp__notion__search") == "mcp__notion__search"
39
+ assert resolve_dispatch_tool_name("send_message") == "nookplot_send_message"
40
+ assert resolve_dispatch_tool_name("create_post") == "nookplot_create_post"
41
+
42
+
43
+ def test_dead_mcp_meta_actions_removed():
44
+ assert "call_mcp_tool" not in ACTION_CATALOG
45
+ assert "connect_mcp_server" not in ACTION_CATALOG
46
+ assert "disconnect_mcp_server" not in ACTION_CATALOG
47
+
48
+
49
+ def test_connect_mcp_server_auth_kwargs():
50
+ """Phase 2: oauth / workspace auth fields reach the gateway body."""
51
+ import asyncio
52
+
53
+ from nookplot_runtime.client import _ToolManager
54
+
55
+ calls = []
56
+
57
+ class _FakeHttp:
58
+ async def request(self, method, path, body=None, **kwargs):
59
+ calls.append((method, path, body))
60
+ return {"data": {"id": "srv_1"}}
61
+
62
+ tools = _ToolManager(_FakeHttp())
63
+
64
+ asyncio.run(
65
+ tools.connect_mcp_server(
66
+ "https://mcp.notion.com/mcp",
67
+ "notion",
68
+ auth_type="oauth",
69
+ oauth_provider="notion",
70
+ )
71
+ )
72
+ assert calls[-1][2] == {
73
+ "serverUrl": "https://mcp.notion.com/mcp",
74
+ "serverName": "notion",
75
+ "authType": "oauth",
76
+ "oauthProvider": "notion",
77
+ }
78
+
79
+ asyncio.run(
80
+ tools.connect_mcp_server(
81
+ "https://mcp.notion.com/mcp",
82
+ "team-notion",
83
+ auth_type="workspace",
84
+ credential_service="notion",
85
+ workspace_id="11111111-2222-3333-4444-555555555555",
86
+ )
87
+ )
88
+ assert calls[-1][2]["authType"] == "workspace"
89
+ assert calls[-1][2]["credentialService"] == "notion"
90
+ assert calls[-1][2]["workspaceId"] == "11111111-2222-3333-4444-555555555555"
@@ -0,0 +1,69 @@
1
+ """Pack gating (ROADMAP_external-mcp-connectors Phase 3) — Python side.
2
+
3
+ With ``pack_actions`` provided, the available-action surface resolves to
4
+ CORE ∪ pack ∪ mounted external MCP tools in every disclosure mode. Mirrors
5
+ the TS cases in runtime/src/__tests__/pack.gating.test.ts (the TS side is
6
+ the parity source of truth; py exposure is via the module functions).
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import os
11
+
12
+ import pytest
13
+
14
+ from nookplot_runtime.autonomous import get_available_actions
15
+ from nookplot_runtime.signal_action_map import (
16
+ CORE_ACTIONS,
17
+ get_available_actions_from_map,
18
+ )
19
+
20
+ PACK_ACTIONS = ["search_knowledge", "send_email"]
21
+ EXTERNAL = ["mcp__notion__search", "mcp__notion__create_page"]
22
+
23
+
24
+ class TestPackGating:
25
+ def test_resolves_to_core_union_pack_union_external(self):
26
+ actions = get_available_actions_from_map("email_received", set(), EXTERNAL, PACK_ACTIONS)
27
+ for core in CORE_ACTIONS:
28
+ assert core in actions
29
+ assert "search_knowledge" in actions
30
+ assert "mcp__notion__search" in actions
31
+ # email_received's signal-context action reply_email is not in the pack.
32
+ assert "reply_email" not in actions
33
+ assert len(set(actions)) == len(set(CORE_ACTIONS) | set(PACK_ACTIONS) | set(EXTERNAL))
34
+
35
+ def test_empty_pack_still_gates(self):
36
+ actions = get_available_actions_from_map("email_received", set(), EXTERNAL, [])
37
+ assert "reply_email" not in actions
38
+ assert "mcp__notion__search" in actions
39
+ assert len(actions) == len(CORE_ACTIONS) + len(EXTERNAL)
40
+
41
+ def test_none_pack_leaves_behavior_unchanged(self):
42
+ assert "reply_email" in get_available_actions_from_map("email_received", set(), None, None)
43
+ assert "reply_email" in get_available_actions_from_map("email_received", set())
44
+
45
+ def test_loaded_categories_do_not_widen_under_a_pack(self):
46
+ ungated = get_available_actions_from_map("directive", {"bounties"})
47
+ assert "create_bounty" in ungated
48
+ gated = get_available_actions_from_map("directive", {"bounties"}, None, PACK_ACTIONS)
49
+ assert "create_bounty" not in gated
50
+
51
+ def test_gates_identically_in_progressive_disclosure_mode(self):
52
+ os.environ["NOOKPLOT_PROGRESSIVE_DISCLOSURE"] = "1"
53
+ try:
54
+ actions = get_available_actions_from_map("directive", set(), EXTERNAL, PACK_ACTIONS)
55
+ assert "search_knowledge" in actions
56
+ assert "mcp__notion__create_page" in actions
57
+ assert len(set(actions)) == len(set(CORE_ACTIONS) | set(PACK_ACTIONS) | set(EXTERNAL))
58
+ finally:
59
+ del os.environ["NOOKPLOT_PROGRESSIVE_DISCLOSURE"]
60
+
61
+ def test_module_level_get_available_actions_forwards(self):
62
+ gated = get_available_actions("email_received", pack_actions=PACK_ACTIONS)
63
+ assert "reply_email" not in gated
64
+ assert "search_knowledge" in gated
65
+ assert "reply_email" in get_available_actions("email_received")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ pytest.main([__file__, "-v"])