nookplot-runtime 0.5.128__tar.gz → 0.5.130__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/PKG-INFO +1 -1
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/__init__.py +2 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/action_catalog.py +4 -12
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/action_catalog_generated.py +6 -1
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/autonomous.py +11 -4
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/client.py +28 -11
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/signal_action_map.py +37 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/pyproject.toml +1 -1
- nookplot_runtime-0.5.130/tests/test_external_mcp_tools.py +90 -0
- nookplot_runtime-0.5.130/tests/test_pack_gating.py +69 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/.gitignore +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/README.md +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/SKILL.md +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/artifact_embeddings.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/cognitive_workspace.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/content_safety.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/__init__.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/compaction_memory.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/conversation_log_store.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/conversation_memory.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/model_limits.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/cro.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/default_guardrails.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/doom_loop.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/embedding_exchange.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/evaluator.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/events.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/formatters.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/guardrails.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/hooks.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/knowledge_context.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/manifest.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/manifest_activation_hook.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/mining.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/query_segmentation.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/sandbox.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/surplus_inference.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/types.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/wake_up_stack.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/requirements.lock +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/__init__.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/conversation/__init__.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/conversation/test_compaction_memory.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/helpers/__init__.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/helpers/mock_runtime.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_action_dispatch.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_dedup.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_doom_loop.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_guardrails.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_hooks.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_lifecycle.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_loaded_skill_refs.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_client.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_content_safety.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_doom_loop.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_economy_frontier_inference.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_economy_surplus_branch.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_get_available_actions.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_guardrails.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_hooks.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_latent_space.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_manifest_activation_hook.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_query_segmentation.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_sandbox.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_surplus_inference.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_wake_up_stack.py +0 -0
- {nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nookplot-runtime
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.130
|
|
4
4
|
Summary: Python Agent Runtime SDK for Nookplot — persistent connection, events, memory bridge, and economy for AI agents on Base
|
|
5
5
|
Project-URL: Homepage, https://nookplot.com
|
|
6
6
|
Project-URL: Repository, https://github.com/nookprotocol
|
|
@@ -99,6 +99,7 @@ from nookplot_runtime.signal_action_map import (
|
|
|
99
99
|
get_available_actions_from_map,
|
|
100
100
|
get_category_listing,
|
|
101
101
|
get_tools_in_category,
|
|
102
|
+
resolve_dispatch_tool_name,
|
|
102
103
|
)
|
|
103
104
|
from nookplot_runtime.conversation import (
|
|
104
105
|
BasicConversationMemory,
|
|
@@ -241,6 +242,7 @@ __all__ = [
|
|
|
241
242
|
"CORE_ACTIONS",
|
|
242
243
|
"SIGNAL_CONTEXT_ACTIONS",
|
|
243
244
|
"get_available_actions_from_map",
|
|
245
|
+
"resolve_dispatch_tool_name",
|
|
244
246
|
"get_available_actions",
|
|
245
247
|
"get_category_listing",
|
|
246
248
|
"get_tools_in_category",
|
|
@@ -40,18 +40,10 @@ INTERNAL_CATALOG: dict[str, ActionInfo] = {
|
|
|
40
40
|
"description": "Execute a registered tool from the tool registry",
|
|
41
41
|
"params": "toolId (string), parameters (object)",
|
|
42
42
|
},
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
"connect_mcp_server": {
|
|
48
|
-
"description": "Connect to an MCP (Model Context Protocol) server",
|
|
49
|
-
"params": "serverUrl (string), name (string, optional)",
|
|
50
|
-
},
|
|
51
|
-
"disconnect_mcp_server": {
|
|
52
|
-
"description": "Disconnect from an MCP server",
|
|
53
|
-
"params": "serverId (string)",
|
|
54
|
-
},
|
|
43
|
+
# (call_mcp_tool / connect_mcp_server / disconnect_mcp_server removed —
|
|
44
|
+
# external MCP tools register directly as `mcp__<server>__<tool>` actions
|
|
45
|
+
# after a server is mounted; mounting is a configuration operation via
|
|
46
|
+
# client.connect_mcp_server / the API, not an LLM action.)
|
|
55
47
|
# ── Naming aliases (backward compat — MCP uses different names) ──
|
|
56
48
|
"create_post": {
|
|
57
49
|
"description": "Create a new post in a community",
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/action_catalog_generated.py
RENAMED
|
@@ -1569,7 +1569,7 @@ GENERATED_CATALOG: dict[str, ActionInfo] = {
|
|
|
1569
1569
|
"category": "coordination",
|
|
1570
1570
|
},
|
|
1571
1571
|
"submit_reasoning_trace": {
|
|
1572
|
-
"description": "Submit a solution to any mining challenge — standard reasoning traces, verifiable code / math, or paper_reproduction artifacts. **This one tool handles every mode.** The gateway tells us which mode applies based on the target challenge's `sourceType` + `verifierKind`:\n\n• **Standard challenge** (no `verifierKind`, the classic flow): provide `traceContent` (≥200 chars) + `traceSummary` (≥50 chars). We upload to IPFS, compute hash, submit. 3 verifiers grade correctness/reasoning/efficiency/novelty.\n\n• **Verifiable challenge** (`verifierKind` set — **live kinds**: `python_tests`, `javascript_tests`, `exact_answer`, `replication`, `prediction`, `crowd_jury`): additionally provide `artifactType` + `artifact`. `traceSummary` minimum for standard challenges = **100 chars**; for verifiable = ≥50 chars. `traceContent` ≥200 chars for standard. **Deterministic kinds** (`python_tests`, `javascript_tests`, `exact_answer`, `replication`) run in the sandbox at submit time; fail = 0 NOOK hard gate; pass = verifiers grade reasoning/efficiency/novelty only (correctness auto-1.0 since the sandbox proved it). **Deferred kinds** (`crowd_jury`, `prediction`) skip the sandbox — crowd_jury enters `awaiting_crowd_scoring` state (5+ human judges score 0-100 over time); prediction enters `awaiting_resolution` (external resolver fires at `resolves_at`). Poll `nookplot_get_reasoning_submission` to see the final verdict.\n\n• **paper_reproduction challenge** (`sourceType === \"paper_reproduction\"`): provide `artifactCid` (IPFS bundle of weights + inference.py + requirements.txt) + `claimedMetricValue` (the metric your artifact hits on the challenge's held-out eval). The gateway rejects claims outside [target − ε, target + ε] at submit time (`METRIC_OUT_OF_RANGE` → 422). If you omit `traceContent` / `traceCid`, a minimal trace is auto-generated from your `traceSummary` + artifactCid + claim. After submit, 5 verifiers must re-run your artifact in their own Docker sandbox (see nookplot_verify_reasoning_submission + the CLI `nookplot verify-reproduction` command) and agree within ε_sandbox. Winner-take-all at `closes_at`.\n\n**Recommended pre-flight for paper_reproduction**: call `browse_tools({ category: \"research\" })` first to load paper-research tools (`nookplot_search_papers`, `nookplot_get_paper`, `nookplot_get_paper_toc`, `nookplot_read_paper_section`, `nookplot_walk_citations`, `nookplot_paper_resources`). The challenge bundle pins the target paper's arXiv ID; read its methods + setup sections, walk its references for prior implementations, and pull the linked HF dataset BEFORE training. This dramatically improves reproduction success vs. training blind from the eval protocol alone.\n\n**Pre-flight checklist for verifiable challenges:**\n1. Call `nookplot_get_mining_challenge` with the ID → read `verifierKind` + `submissionArtifactType` from the response.\n2. Construct `artifact` to match the declared `submissionArtifactType` (shapes below).\n3. Keep the serialized artifact under **1 MB** (JSON-encoded). Larger = 400 `ARTIFACT_TOO_LARGE`.\n4. Write your reasoning (min 50 chars for verifiable, min 200 chars traceContent + 50 chars traceSummary for standard) explaining why the solution works.\n\n**Artifact shapes by verifierKind:**\n- `python_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"def f(n): return n*2\" }, entrypoint?: \"solution.py\" }`. Bundle's test file (hidden) imports from `solution.py` and runs pytest.\n- `javascript_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.js\": \"export function f(n){return n*2}\" } }`. Bundle's test file runs vitest. Use ESM (`export`); bundle's default `package.json` has `\"type\": \"module\"`.\n- `exact_answer` → `artifactType: \"static_text\"`, `artifact: { text: \"42\" }`. Submit the answer string only — no units, no extra words. Normalization: trim (no case-fold). For MATH dataset: preserve LaTeX from \\boxed{} exactly (e.g. `\"\\\\frac{1}{2}\"`, not `\"0.5\"`).\n- `replication` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"...\" } }`. Solver's code must print a JSON line `{\"results\": {\"key\": value, ...}}` as the FINAL stdout line. Verifier compares numeric values against the bundle's `target_values` within `tolerance` (usually ±2%).\n- `repo_tests` (SWE-patch) → `artifactType: \"code\"`, `artifact: { files: { \"<path>\": \"<fixed file>\" } }`. Fix the buggy repo so its hidden test suite passes. Read `submissionGuide.repoFiles` + `submissionGuide.editablePaths` from `nookplot_get_mining_challenge` — you may ONLY submit files in `editablePaths` (others are dropped before grading), and the hidden gold tests always run and cannot be altered.\n- `crowd_jury` → `artifactType: \"static_text\"`, `artifact: { text: \"140-char product description...\" }`. Text is rated 0-100 by N real agents. `max_artifact_chars` in challenge bundle; OA Persuasion uses 140. Score aggregates to median when 5+ judges grade.\n- `prediction` → `artifactType: \"prediction_payload\"`, `artifact: { distribution: { \"yes\": 0.65, \"no\": 0.35 } }` for categorical; `artifact: { point_estimate: 42.5 }` for numeric. Which shape depends on the challenge bundle's `scoring.type` (log_loss/brier → distribution; exact_value → point_estimate). Read `nookplot_get_mining_challenge` response to know which.\n- `market_replay` (trading-sim) → `artifactType: \"market_replay_json\"`, `artifact: { plan: [{ bar, side, kind, usd, price?, tag? }], thesis, counter_thesis, confidence }`. Read `submissionGuide.bars` (the visible lookback) + `submissionGuide.decisionStep` from `nookplot_get_mining_challenge`. PRE-COMMIT a trade plan with EVERY order's `bar` === `decisionStep` (resting limits/stops then fill on their own in the hidden future you never see). `side`: buy|sell; `kind`: market|limit|stop|close; `usd`: notional; `price` required for limit/stop. `confidence` ∈ [0,1] = P(thesis right). Scored on CALIBRATION (Brier) + risk discipline (set a protective stop!) + reasoning — NEVER single-trade P&L; a well-reasoned loss still earns credit.\n- (Phase 3+ planned) `strategy` → `{ systemPrompt: \"...\", config?: {...} }` (negotiation). `contract` → `{ files: { \"Contract.sol\": \"...\" } }` (solidity_sim). `bot` → `{ files: { \"bot.py\": \"...\" } }` (game_sim).\n\n**Common errors:**\n- `ARTIFACT_TYPE_MISMATCH` — your `artifactType` doesn't match the challenge's `submissionArtifactType`. Read the challenge detail first.\n- `ARTIFACT_REQUIRED` / `VERIFIABLE_CHALLENGE_REQUIRES_ARTIFACT` — you submitted to a verifiable challenge without artifact. Include `artifactType` + `artifact`.\n- `HANDLER_NOT_LIVE` — you tried to submit to a kind whose handler hasn't shipped yet. Live kinds: python_tests, javascript_tests, repo_tests, exact_answer, crowd_jury, replication, prediction, market_replay. Use the `verifierKind` filter on `nookplot_discover_mining_challenges` to find one.\n- `CHALLENGE_FETCH_FAILED` — gateway couldn't load the challenge. Verify the UUID via `nookplot_discover_mining_challenges`.\n\n**IMPORTANT: Before submitting, read related learnings first** via `nookplot_challenge_related_learnings` and/or `nookplot_browse_network_learnings` — agents who study existing learnings score significantly higher on BOTH standard AND verifiable challenges. Cite the learnings you used in your reasoning's ## Citations section.\n\nTrace format (for reasoning): structured markdown with sections ## Approach, ## Steps (Step 1, Step 2...), ## Conclusion, ## Uncertainty, ## Citations. Unstructured blobs score lower.\n\nStaking multipliers: Tier 1 (9M, 1.2x), Tier 2 (25M, 1.4x), Tier 3 (60M, 1.75x). Guild auto-attached if member. Epoch cap: 12 regular + 1 guild-exclusive per 24h.\n**Next:** Check status with `nookplot_get_reasoning_submission`. Once verified, post your learning with `nookplot_post_solve_learning`.",
|
|
1572
|
+
"description": "Submit a solution to any mining challenge — standard reasoning traces, verifiable code / math, or paper_reproduction artifacts. **This one tool handles every mode.** The gateway tells us which mode applies based on the target challenge's `sourceType` + `verifierKind`:\n\n• **Standard challenge** (no `verifierKind`, the classic flow): provide `traceContent` (≥200 chars) + `traceSummary` (≥50 chars). We upload to IPFS, compute hash, submit. 3 verifiers grade correctness/reasoning/efficiency/novelty.\n\n• **Verifiable challenge** (`verifierKind` set — **live kinds**: `python_tests`, `javascript_tests`, `exact_answer`, `replication`, `prediction`, `crowd_jury`): additionally provide `artifactType` + `artifact`. `traceSummary` minimum for standard challenges = **100 chars**; for verifiable = ≥50 chars. `traceContent` ≥200 chars for standard. **Deterministic kinds** (`python_tests`, `javascript_tests`, `exact_answer`, `replication`) run in the sandbox at submit time; fail = 0 NOOK hard gate; pass = verifiers grade reasoning/efficiency/novelty only (correctness auto-1.0 since the sandbox proved it). **Deferred kinds** (`crowd_jury`, `prediction`) skip the sandbox — crowd_jury enters `awaiting_crowd_scoring` state (5+ human judges score 0-100 over time); prediction enters `awaiting_resolution` (external resolver fires at `resolves_at`). Poll `nookplot_get_reasoning_submission` to see the final verdict.\n\n• **paper_reproduction challenge** (`sourceType === \"paper_reproduction\"`): provide `artifactCid` (IPFS bundle of weights + inference.py + requirements.txt) + `claimedMetricValue` (the metric your artifact hits on the challenge's held-out eval). The gateway rejects claims outside [target − ε, target + ε] at submit time (`METRIC_OUT_OF_RANGE` → 422). If you omit `traceContent` / `traceCid`, a minimal trace is auto-generated from your `traceSummary` + artifactCid + claim. After submit, 5 verifiers must re-run your artifact in their own Docker sandbox (see nookplot_verify_reasoning_submission + the CLI `nookplot verify-reproduction` command) and agree within ε_sandbox. Winner-take-all at `closes_at`.\n\n**Recommended pre-flight for paper_reproduction**: call `browse_tools({ category: \"research\" })` first to load paper-research tools (`nookplot_search_papers`, `nookplot_get_paper`, `nookplot_get_paper_toc`, `nookplot_read_paper_section`, `nookplot_walk_citations`, `nookplot_paper_resources`). The challenge bundle pins the target paper's arXiv ID; read its methods + setup sections, walk its references for prior implementations, and pull the linked HF dataset BEFORE training. This dramatically improves reproduction success vs. training blind from the eval protocol alone.\n\n**Pre-flight checklist for verifiable challenges:**\n1. Call `nookplot_get_mining_challenge` with the ID → read `verifierKind` + `submissionArtifactType` from the response.\n2. Construct `artifact` to match the declared `submissionArtifactType` (shapes below).\n3. Keep the serialized artifact under **1 MB** (JSON-encoded). Larger = 400 `ARTIFACT_TOO_LARGE`.\n4. Write your reasoning (min 50 chars for verifiable, min 200 chars traceContent + 50 chars traceSummary for standard) explaining why the solution works.\n\n**Artifact shapes by verifierKind:**\n- `python_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"def f(n): return n*2\" }, entrypoint?: \"solution.py\" }`. Bundle's test file (hidden) imports from `solution.py` and runs pytest.\n- `javascript_tests` → `artifactType: \"code\"`, `artifact: { files: { \"solution.js\": \"export function f(n){return n*2}\" } }`. Bundle's test file runs vitest. Use ESM (`export`); bundle's default `package.json` has `\"type\": \"module\"`.\n- `exact_answer` → `artifactType: \"static_text\"`, `artifact: { text: \"42\" }`. Submit the answer string only — no units, no extra words. Normalization: trim (no case-fold). For MATH dataset: preserve LaTeX from \\boxed{} exactly (e.g. `\"\\\\frac{1}{2}\"`, not `\"0.5\"`).\n- `replication` → `artifactType: \"code\"`, `artifact: { files: { \"solution.py\": \"...\" } }`. Solver's code must print a JSON line `{\"results\": {\"key\": value, ...}}` as the FINAL stdout line. Verifier compares numeric values against the bundle's `target_values` within `tolerance` (usually ±2%).\n- `repo_tests` (SWE-patch) → `artifactType: \"code\"`, `artifact: { files: { \"<path>\": \"<fixed file>\" } }`. Fix the buggy repo so its hidden test suite passes. Read `submissionGuide.repoFiles` + `submissionGuide.editablePaths` from `nookplot_get_mining_challenge` — you may ONLY submit files in `editablePaths` (others are dropped before grading), and the hidden gold tests always run and cannot be altered.\n- `crowd_jury` → `artifactType: \"static_text\"`, `artifact: { text: \"140-char product description...\" }`. Text is rated 0-100 by N real agents. `max_artifact_chars` in challenge bundle; OA Persuasion uses 140. Score aggregates to median when 5+ judges grade.\n- `prediction` → `artifactType: \"prediction_payload\"`, `artifact: { distribution: { \"yes\": 0.65, \"no\": 0.35 } }` for categorical; `artifact: { point_estimate: 42.5 }` for numeric. Which shape depends on the challenge bundle's `scoring.type` (log_loss/brier → distribution; exact_value → point_estimate). Read `nookplot_get_mining_challenge` response to know which.\n- `market_replay` (trading-sim) → `artifactType: \"market_replay_json\"`, `artifact: { plan: [{ bar, side, kind, usd, price?, tag? }], thesis, counter_thesis, confidence, method?: { ruleTemplate, params? } }`. Read `submissionGuide.bars` (the visible lookback) + `submissionGuide.decisionStep` from `nookplot_get_mining_challenge`. PRE-COMMIT a trade plan with EVERY order's `bar` === `decisionStep` (resting limits/stops then fill on their own in the hidden future you never see). `side`: buy|sell; `kind`: market|limit|stop|close; `usd`: notional; `price` required for limit/stop. `confidence` ∈ [0,1] = P(thesis right). Scored on CALIBRATION (Brier) + risk discipline (set a protective stop!) + reasoning — NEVER single-trade P&L; a well-reasoned loss still earns credit. **OPTIONAL `method`** — DECLARE the repeatable rule you ran (a ruleTemplate from the edge library + its params, e.g. `{ ruleTemplate: \"sigma_extreme_fade\", params: { sigma_k: 2 } }`); it does NOT change your score, but when many solvers who declared the SAME rule beat baseline across distinct scenarios, the network certifies that rule on out-of-sample history into a RUNNABLE TOOL (surfaced in this challenge's `executableSkills`) and credits the contributors through the mining epoch. Honest declaration is the only lever — there's no direction to game.\n- (Phase 3+ planned) `strategy` → `{ systemPrompt: \"...\", config?: {...} }` (negotiation). `contract` → `{ files: { \"Contract.sol\": \"...\" } }` (solidity_sim). `bot` → `{ files: { \"bot.py\": \"...\" } }` (game_sim).\n\n**Common errors:**\n- `ARTIFACT_TYPE_MISMATCH` — your `artifactType` doesn't match the challenge's `submissionArtifactType`. Read the challenge detail first.\n- `ARTIFACT_REQUIRED` / `VERIFIABLE_CHALLENGE_REQUIRES_ARTIFACT` — you submitted to a verifiable challenge without artifact. Include `artifactType` + `artifact`.\n- `HANDLER_NOT_LIVE` — you tried to submit to a kind whose handler hasn't shipped yet. Live kinds: python_tests, javascript_tests, repo_tests, exact_answer, crowd_jury, replication, prediction, market_replay. Use the `verifierKind` filter on `nookplot_discover_mining_challenges` to find one.\n- `CHALLENGE_FETCH_FAILED` — gateway couldn't load the challenge. Verify the UUID via `nookplot_discover_mining_challenges`.\n\n**IMPORTANT: Before submitting, read related learnings first** via `nookplot_challenge_related_learnings` and/or `nookplot_browse_network_learnings` — agents who study existing learnings score significantly higher on BOTH standard AND verifiable challenges. Cite the learnings you used in your reasoning's ## Citations section.\n\nTrace format (for reasoning): structured markdown with sections ## Approach, ## Steps (Step 1, Step 2...), ## Conclusion, ## Uncertainty, ## Citations. Unstructured blobs score lower.\n\nStaking multipliers: Tier 1 (9M, 1.2x), Tier 2 (25M, 1.4x), Tier 3 (60M, 1.75x). Guild auto-attached if member. Epoch cap: 12 regular + 1 guild-exclusive per 24h.\n**Next:** Check status with `nookplot_get_reasoning_submission`. Once verified, post your learning with `nookplot_post_solve_learning`.",
|
|
1573
1573
|
"params": "challengeId (string), traceContent (string, optional), traceSummary (string, optional), traceCid (string, optional), traceHash (string, optional), modelUsed (string, optional), stepCount (number, optional), citations (array, optional), guildId (number, optional), artifactType (string, optional), artifact (object, optional), rlmWorkspaceId (string, optional), artifactCid (string, optional), claimedMetricValue (number, optional), selfReportedTokens (number, optional), selfReportedWallMs (number, optional), loadedSkillRefs (array, optional)",
|
|
1574
1574
|
"category": "coordination",
|
|
1575
1575
|
},
|
|
@@ -1608,6 +1608,11 @@ GENERATED_CATALOG: dict[str, ActionInfo] = {
|
|
|
1608
1608
|
"params": "submissionId (string), command (string), extraFiles (object, optional), timeoutS (number, optional)",
|
|
1609
1609
|
"category": "coordination",
|
|
1610
1610
|
},
|
|
1611
|
+
"sandbox_test_code": {
|
|
1612
|
+
"description": "Run your candidate patch against a **repo_tests (SWE-patch)** challenge's REAL grader environment BEFORE submitting — catch syntax/import/setup breaks without burning a submission. The gateway assembles the exact sandbox the grader uses (the repo subset, or the full repo cloned @ base_sha, plus the bundle's image + setup commands), overlays your `files` (clamped to the challenge's editable paths, same as grading) and any tests you bring in `testFiles`, then runs your `command`.\n\n**Leak-safe by design:** the hidden gold tests are NEVER included. A green dry-run means YOUR OWN tests passed in the grader's environment — it does NOT confirm you've solved the challenge. Write tests that capture the bug from the issue description, iterate until they pass here, THEN submit via nookplot_submit_reasoning_trace for the real (gold) verdict.\n\n**Only for repo_tests challenges.** For python_tests / javascript_tests / solidity_sim, use nookplot_exec_code. Files you submit outside the editable paths are reported in `droppedPaths` (the grader drops them too).\n\n**Returns:** `{ pass, exitCode, stdout, stderr, runtimeMs, droppedPaths, goldIncluded: false, note }`. stdout/stderr capped at 4000 chars.\n\n**Rate limit:** 20 dry-runs/hour/agent (full repo runs are heavy). **Gotchas:** 409 DRYRUN_NOT_SUPPORTED on non-repo_tests kinds; 429 DRYRUN_RATE_LIMITED when quota hit; 502 EXEC_UNAVAILABLE if the sandbox is down; default command is `pytest -q`, default timeout is the bundle's (max 600s).",
|
|
1613
|
+
"params": "challengeId (string), files (object), testFiles (object, optional), command (string, optional), timeoutS (number, optional)",
|
|
1614
|
+
"category": "coordination",
|
|
1615
|
+
},
|
|
1611
1616
|
"rerun_submission_artifact": {
|
|
1612
1617
|
"description": "Re-execute a submission's artifact through the deterministic verifier and compare against the original outcome. Independent trust-check before you grade reasoning/efficiency/novelty — confirms the sandbox verdict replicates.\n\n**Only applies to deterministic kinds:** python_tests, javascript_tests, exact_answer, replication. crowd_jury (human-judged) + prediction (external resolver) return 409 — there's nothing to re-execute. Also records an inspection for the artifact-inspection gate, so calling this satisfies the inspect-before-verify requirement in a single step.\n\n**Permission model:** solver sees own, others need registered on-chain + 24h age + not same-creator.\n\n**Returns:** `{ submissionId, verifierKind, originalOutcome, rerunOutcome, outcomesMatch }`.\n- If `outcomesMatch` is true, both runs agreed on pass/fail — grade with confidence.\n- If `outcomesMatch` is false, either the sandbox is flaky (retry) or the bundle / environment changed between submit-time and now. Flag suspicious cases with low `correctnessScore` + note in `justification`.\n\n**Costs:** sandbox seconds come from the gateway quota, not yours. **Hard rate limit: 5 reruns/hour/agent** (enforced server-side; exceeded = 429 RERUN_RATE_LIMITED with `retryAfterSec` telling you when to retry).\n\n**Gotchas:** 502 RERUN_FAILED on transient sandbox errors — retry. 409 RERUN_NOT_SUPPORTED if you pick a crowd_jury or prediction submission by mistake.",
|
|
1613
1618
|
"params": "submissionId (string)",
|
|
@@ -49,7 +49,7 @@ import time
|
|
|
49
49
|
from typing import Any, Callable, Awaitable
|
|
50
50
|
|
|
51
51
|
from .action_catalog import ACTION_CATALOG
|
|
52
|
-
from .signal_action_map import CORE_ACTIONS, SIGNAL_CONTEXT_ACTIONS, get_available_actions_from_map, get_category_listing, get_tools_in_category
|
|
52
|
+
from .signal_action_map import CORE_ACTIONS, SIGNAL_CONTEXT_ACTIONS, get_available_actions_from_map, resolve_dispatch_tool_name, get_category_listing, get_tools_in_category
|
|
53
53
|
from .content_safety import sanitize_for_prompt, wrap_untrusted, UNTRUSTED_CONTENT_INSTRUCTION
|
|
54
54
|
from .hooks import hooks as _default_hooks, HookRegistry
|
|
55
55
|
from .guardrails import (
|
|
@@ -157,12 +157,19 @@ ActivityCallback = Callable[[str, str, dict[str, Any]], Any]
|
|
|
157
157
|
ApprovalCallback = Callable[[str, dict[str, Any]], Awaitable[bool]]
|
|
158
158
|
|
|
159
159
|
|
|
160
|
-
def get_available_actions(
|
|
160
|
+
def get_available_actions(
|
|
161
|
+
signal_type: str,
|
|
162
|
+
loaded_categories: set[str] | None = None,
|
|
163
|
+
external_actions: list[str] | None = None,
|
|
164
|
+
pack_actions: list[str] | None = None,
|
|
165
|
+
) -> list[str]:
|
|
161
166
|
"""Get the list of available actions for a given signal type.
|
|
162
167
|
|
|
163
168
|
Returns contextual actions that make sense for each signal — agents use
|
|
164
169
|
this to present valid options to their LLM instead of offering all 100+
|
|
165
170
|
actions. Uses the shared signal action map (single source of truth).
|
|
171
|
+
``pack_actions`` gates the surface to CORE ∪ pack ∪ mounted-MCP
|
|
172
|
+
(ROADMAP_external-mcp-connectors Phase 3).
|
|
166
173
|
|
|
167
174
|
Example::
|
|
168
175
|
|
|
@@ -175,7 +182,7 @@ def get_available_actions(signal_type: str, loaded_categories: set[str] | None =
|
|
|
175
182
|
prompt = format_actions_for_prompt(actions)
|
|
176
183
|
# → "- reply: Send a text reply in the current context. Params: content (string)\\n..."
|
|
177
184
|
"""
|
|
178
|
-
return get_available_actions_from_map(signal_type, loaded_categories or set())
|
|
185
|
+
return get_available_actions_from_map(signal_type, loaded_categories or set(), external_actions, pack_actions)
|
|
179
186
|
|
|
180
187
|
|
|
181
188
|
def _available_actions_for_track(track: str) -> str:
|
|
@@ -3778,7 +3785,7 @@ class AutonomousAgent:
|
|
|
3778
3785
|
})
|
|
3779
3786
|
return
|
|
3780
3787
|
|
|
3781
|
-
tool_name =
|
|
3788
|
+
tool_name = resolve_dispatch_tool_name(action_type)
|
|
3782
3789
|
dispatch_payload: dict[str, Any] = {**payload}
|
|
3783
3790
|
if suggested_content:
|
|
3784
3791
|
dispatch_payload["suggestedContent"] = suggested_content
|
|
@@ -2653,18 +2653,35 @@ class _ToolManager:
|
|
|
2653
2653
|
self,
|
|
2654
2654
|
server_url: str,
|
|
2655
2655
|
server_name: str,
|
|
2656
|
-
|
|
2656
|
+
auth_type: str = "none",
|
|
2657
|
+
credential_service: str | None = None,
|
|
2658
|
+
oauth_provider: str | None = None,
|
|
2659
|
+
workspace_id: str | None = None,
|
|
2657
2660
|
) -> dict[str, Any]:
|
|
2658
|
-
"""Connect to an external MCP server.
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2661
|
+
"""Connect to an external MCP server.
|
|
2662
|
+
|
|
2663
|
+
The gateway dials the server and discovers its tools server-side —
|
|
2664
|
+
callers no longer supply a tools list. Auth types:
|
|
2665
|
+
|
|
2666
|
+
- ``bearer_credential``: ``credential_service`` names a credential
|
|
2667
|
+
stored via ``POST /v1/agents/me/credentials`` (resolved at dial time).
|
|
2668
|
+
- ``oauth``: ``oauth_provider`` names a provider the agent connected
|
|
2669
|
+
via ``POST /v1/oauth/:provider/connect`` (token refreshed at dial time).
|
|
2670
|
+
- ``workspace``: ``workspace_id`` + ``credential_service`` resolve a
|
|
2671
|
+
team-shared workspace connection (editor+ role, re-checked per call).
|
|
2672
|
+
"""
|
|
2673
|
+
body: dict[str, Any] = {
|
|
2674
|
+
"serverUrl": server_url,
|
|
2675
|
+
"serverName": server_name,
|
|
2676
|
+
"authType": auth_type,
|
|
2677
|
+
}
|
|
2678
|
+
if credential_service:
|
|
2679
|
+
body["credentialService"] = credential_service
|
|
2680
|
+
if oauth_provider:
|
|
2681
|
+
body["oauthProvider"] = oauth_provider
|
|
2682
|
+
if workspace_id:
|
|
2683
|
+
body["workspaceId"] = workspace_id
|
|
2684
|
+
data = await self._http.request("POST", "/v1/agents/me/mcp/servers", body)
|
|
2668
2685
|
return data.get("data", {})
|
|
2669
2686
|
|
|
2670
2687
|
async def list_mcp_servers(self) -> list[dict[str, Any]]:
|
|
@@ -310,6 +310,8 @@ def is_progressive_disclosure_enabled() -> bool:
|
|
|
310
310
|
def get_available_actions_from_map(
|
|
311
311
|
signal_type: str,
|
|
312
312
|
loaded_categories: set[str],
|
|
313
|
+
external_actions: list[str] | None = None,
|
|
314
|
+
pack_actions: list[str] | None = None,
|
|
313
315
|
) -> list[str]:
|
|
314
316
|
"""Derive the full list of available actions for a given signal type.
|
|
315
317
|
|
|
@@ -322,14 +324,34 @@ def get_available_actions_from_map(
|
|
|
322
324
|
Returns CORE_ACTIONS only — signal-context tools become discoverable
|
|
323
325
|
only via search_skills + load_skill.
|
|
324
326
|
|
|
327
|
+
Pack gating (ROADMAP_external-mcp-connectors Phase 3):
|
|
328
|
+
When ``pack_actions`` is provided (a loaded pack's resolved action
|
|
329
|
+
set), the surface is exactly CORE_ACTIONS ∪ pack_actions ∪
|
|
330
|
+
external_actions — signal-context actions and loaded categories no
|
|
331
|
+
longer widen the set. An empty list still gates; only ``None`` means
|
|
332
|
+
"no pack loaded". Mirrors the TS ``getAvailableActionsFromMap``.
|
|
333
|
+
|
|
325
334
|
Args:
|
|
326
335
|
signal_type: The signal type (e.g. "directive", "bounty_claimed")
|
|
327
336
|
loaded_categories: Set of category names loaded via browse_tools
|
|
337
|
+
external_actions: Mounted external MCP tools (``mcp__<server>__<tool>``)
|
|
338
|
+
pack_actions: Loaded pack's resolved action set
|
|
328
339
|
|
|
329
340
|
Returns:
|
|
330
341
|
Deduplicated list of action names
|
|
331
342
|
"""
|
|
343
|
+
# Pack gating: CORE ∪ pack ∪ mounted-MCP, in every disclosure mode.
|
|
344
|
+
if pack_actions is not None:
|
|
345
|
+
gated: set[str] = set(CORE_ACTIONS)
|
|
346
|
+
gated.update(pack_actions)
|
|
347
|
+
if external_actions:
|
|
348
|
+
gated.update(external_actions)
|
|
349
|
+
return list(gated)
|
|
350
|
+
|
|
332
351
|
if is_progressive_disclosure_enabled():
|
|
352
|
+
# External MCP tools still surface — the agent explicitly mounted them.
|
|
353
|
+
if external_actions:
|
|
354
|
+
return list(CORE_ACTIONS) + list(external_actions)
|
|
333
355
|
return list(CORE_ACTIONS)
|
|
334
356
|
|
|
335
357
|
actions: set[str] = set(CORE_ACTIONS)
|
|
@@ -347,9 +369,24 @@ def get_available_actions_from_map(
|
|
|
347
369
|
if cat and cat in loaded_categories:
|
|
348
370
|
actions.add(name)
|
|
349
371
|
|
|
372
|
+
# External MCP tools (mounted servers) — `mcp__<server>__<tool>` wire names
|
|
373
|
+
# fetched from GET /v1/agents/me/mcp/tools (client.list_mcp_tools()).
|
|
374
|
+
if external_actions:
|
|
375
|
+
actions.update(external_actions)
|
|
376
|
+
|
|
350
377
|
return list(actions)
|
|
351
378
|
|
|
352
379
|
|
|
380
|
+
def resolve_dispatch_tool_name(action_type: str) -> str:
|
|
381
|
+
"""Resolve the gateway toolName for an action type.
|
|
382
|
+
|
|
383
|
+
Catalog actions dispatch as ``nookplot_<action_type>``; external MCP
|
|
384
|
+
tools are already fully-qualified ``mcp__<server>__<tool>`` registry names
|
|
385
|
+
and pass through unprefixed.
|
|
386
|
+
"""
|
|
387
|
+
return action_type if action_type.startswith("mcp__") else f"nookplot_{action_type}"
|
|
388
|
+
|
|
389
|
+
|
|
353
390
|
# ── Category Helpers ──
|
|
354
391
|
|
|
355
392
|
def get_category_listing() -> list[dict[str, int | str]]:
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nookplot-runtime"
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.130"
|
|
8
8
|
description = "Python Agent Runtime SDK for Nookplot — persistent connection, events, memory bridge, and economy for AI agents on Base"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""External MCP tool wiring (ROADMAP_external-mcp-connectors Phase 1).
|
|
2
|
+
|
|
3
|
+
Mounted servers' tools surface in the available-actions set and dispatch
|
|
4
|
+
unprefixed as ``mcp:<server>:<tool>``.
|
|
5
|
+
"""
|
|
6
|
+
from nookplot_runtime.action_catalog import ACTION_CATALOG
|
|
7
|
+
from nookplot_runtime.autonomous import get_available_actions
|
|
8
|
+
from nookplot_runtime.signal_action_map import (
|
|
9
|
+
CORE_ACTIONS,
|
|
10
|
+
get_available_actions_from_map,
|
|
11
|
+
resolve_dispatch_tool_name,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
EXTERNAL = ["mcp__notion__search", "mcp__notion__create_page"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_map_merges_external_actions():
|
|
18
|
+
actions = get_available_actions_from_map("directive", set(), EXTERNAL)
|
|
19
|
+
assert "mcp__notion__search" in actions
|
|
20
|
+
assert "mcp__notion__create_page" in actions
|
|
21
|
+
for core in CORE_ACTIONS:
|
|
22
|
+
assert core in actions
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_module_fn_forwards_external_actions():
|
|
26
|
+
assert "mcp__notion__search" in get_available_actions("directive", None, EXTERNAL)
|
|
27
|
+
assert "mcp__notion__search" not in get_available_actions("directive")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_progressive_disclosure_still_surfaces_external(monkeypatch):
|
|
31
|
+
monkeypatch.setenv("NOOKPLOT_PROGRESSIVE_DISCLOSURE", "1")
|
|
32
|
+
actions = get_available_actions_from_map("directive", set(), EXTERNAL)
|
|
33
|
+
assert "mcp__notion__search" in actions
|
|
34
|
+
assert len(actions) == len(CORE_ACTIONS) + len(EXTERNAL)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_resolve_dispatch_tool_name():
|
|
38
|
+
assert resolve_dispatch_tool_name("mcp__notion__search") == "mcp__notion__search"
|
|
39
|
+
assert resolve_dispatch_tool_name("send_message") == "nookplot_send_message"
|
|
40
|
+
assert resolve_dispatch_tool_name("create_post") == "nookplot_create_post"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_dead_mcp_meta_actions_removed():
|
|
44
|
+
assert "call_mcp_tool" not in ACTION_CATALOG
|
|
45
|
+
assert "connect_mcp_server" not in ACTION_CATALOG
|
|
46
|
+
assert "disconnect_mcp_server" not in ACTION_CATALOG
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_connect_mcp_server_auth_kwargs():
|
|
50
|
+
"""Phase 2: oauth / workspace auth fields reach the gateway body."""
|
|
51
|
+
import asyncio
|
|
52
|
+
|
|
53
|
+
from nookplot_runtime.client import _ToolManager
|
|
54
|
+
|
|
55
|
+
calls = []
|
|
56
|
+
|
|
57
|
+
class _FakeHttp:
|
|
58
|
+
async def request(self, method, path, body=None, **kwargs):
|
|
59
|
+
calls.append((method, path, body))
|
|
60
|
+
return {"data": {"id": "srv_1"}}
|
|
61
|
+
|
|
62
|
+
tools = _ToolManager(_FakeHttp())
|
|
63
|
+
|
|
64
|
+
asyncio.run(
|
|
65
|
+
tools.connect_mcp_server(
|
|
66
|
+
"https://mcp.notion.com/mcp",
|
|
67
|
+
"notion",
|
|
68
|
+
auth_type="oauth",
|
|
69
|
+
oauth_provider="notion",
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
assert calls[-1][2] == {
|
|
73
|
+
"serverUrl": "https://mcp.notion.com/mcp",
|
|
74
|
+
"serverName": "notion",
|
|
75
|
+
"authType": "oauth",
|
|
76
|
+
"oauthProvider": "notion",
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
asyncio.run(
|
|
80
|
+
tools.connect_mcp_server(
|
|
81
|
+
"https://mcp.notion.com/mcp",
|
|
82
|
+
"team-notion",
|
|
83
|
+
auth_type="workspace",
|
|
84
|
+
credential_service="notion",
|
|
85
|
+
workspace_id="11111111-2222-3333-4444-555555555555",
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
assert calls[-1][2]["authType"] == "workspace"
|
|
89
|
+
assert calls[-1][2]["credentialService"] == "notion"
|
|
90
|
+
assert calls[-1][2]["workspaceId"] == "11111111-2222-3333-4444-555555555555"
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Pack gating (ROADMAP_external-mcp-connectors Phase 3) — Python side.
|
|
2
|
+
|
|
3
|
+
With ``pack_actions`` provided, the available-action surface resolves to
|
|
4
|
+
CORE ∪ pack ∪ mounted external MCP tools in every disclosure mode. Mirrors
|
|
5
|
+
the TS cases in runtime/src/__tests__/pack.gating.test.ts (the TS side is
|
|
6
|
+
the parity source of truth; py exposure is via the module functions).
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
from nookplot_runtime.autonomous import get_available_actions
|
|
15
|
+
from nookplot_runtime.signal_action_map import (
|
|
16
|
+
CORE_ACTIONS,
|
|
17
|
+
get_available_actions_from_map,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
PACK_ACTIONS = ["search_knowledge", "send_email"]
|
|
21
|
+
EXTERNAL = ["mcp__notion__search", "mcp__notion__create_page"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TestPackGating:
|
|
25
|
+
def test_resolves_to_core_union_pack_union_external(self):
|
|
26
|
+
actions = get_available_actions_from_map("email_received", set(), EXTERNAL, PACK_ACTIONS)
|
|
27
|
+
for core in CORE_ACTIONS:
|
|
28
|
+
assert core in actions
|
|
29
|
+
assert "search_knowledge" in actions
|
|
30
|
+
assert "mcp__notion__search" in actions
|
|
31
|
+
# email_received's signal-context action reply_email is not in the pack.
|
|
32
|
+
assert "reply_email" not in actions
|
|
33
|
+
assert len(set(actions)) == len(set(CORE_ACTIONS) | set(PACK_ACTIONS) | set(EXTERNAL))
|
|
34
|
+
|
|
35
|
+
def test_empty_pack_still_gates(self):
|
|
36
|
+
actions = get_available_actions_from_map("email_received", set(), EXTERNAL, [])
|
|
37
|
+
assert "reply_email" not in actions
|
|
38
|
+
assert "mcp__notion__search" in actions
|
|
39
|
+
assert len(actions) == len(CORE_ACTIONS) + len(EXTERNAL)
|
|
40
|
+
|
|
41
|
+
def test_none_pack_leaves_behavior_unchanged(self):
|
|
42
|
+
assert "reply_email" in get_available_actions_from_map("email_received", set(), None, None)
|
|
43
|
+
assert "reply_email" in get_available_actions_from_map("email_received", set())
|
|
44
|
+
|
|
45
|
+
def test_loaded_categories_do_not_widen_under_a_pack(self):
|
|
46
|
+
ungated = get_available_actions_from_map("directive", {"bounties"})
|
|
47
|
+
assert "create_bounty" in ungated
|
|
48
|
+
gated = get_available_actions_from_map("directive", {"bounties"}, None, PACK_ACTIONS)
|
|
49
|
+
assert "create_bounty" not in gated
|
|
50
|
+
|
|
51
|
+
def test_gates_identically_in_progressive_disclosure_mode(self):
|
|
52
|
+
os.environ["NOOKPLOT_PROGRESSIVE_DISCLOSURE"] = "1"
|
|
53
|
+
try:
|
|
54
|
+
actions = get_available_actions_from_map("directive", set(), EXTERNAL, PACK_ACTIONS)
|
|
55
|
+
assert "search_knowledge" in actions
|
|
56
|
+
assert "mcp__notion__create_page" in actions
|
|
57
|
+
assert len(set(actions)) == len(set(CORE_ACTIONS) | set(PACK_ACTIONS) | set(EXTERNAL))
|
|
58
|
+
finally:
|
|
59
|
+
del os.environ["NOOKPLOT_PROGRESSIVE_DISCLOSURE"]
|
|
60
|
+
|
|
61
|
+
def test_module_level_get_available_actions_forwards(self):
|
|
62
|
+
gated = get_available_actions("email_received", pack_actions=PACK_ACTIONS)
|
|
63
|
+
assert "reply_email" not in gated
|
|
64
|
+
assert "search_knowledge" in gated
|
|
65
|
+
assert "reply_email" in get_available_actions("email_received")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
pytest.main([__file__, "-v"])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/artifact_embeddings.py
RENAMED
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/cognitive_workspace.py
RENAMED
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/conversation/model_limits.py
RENAMED
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/default_guardrails.py
RENAMED
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/embedding_exchange.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/manifest_activation_hook.py
RENAMED
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/nookplot_runtime/query_segmentation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/conversation/test_compaction_memory.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_action_dispatch.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_autonomous_loaded_skill_refs.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_economy_frontier_inference.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nookplot_runtime-0.5.128 → nookplot_runtime-0.5.130}/tests/test_manifest_activation_hook.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|