ctxprotocol 0.8.4__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/PKG-INFO +27 -7
  2. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/README.md +26 -6
  3. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/__init__.py +9 -1
  4. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/client/__init__.py +8 -0
  5. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/client/client.py +39 -10
  6. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/client/resources/discovery.py +5 -0
  7. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/client/resources/query.py +51 -62
  8. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/client/types.py +269 -6
  9. ctxprotocol-0.9.0/examples/client/test_get_tool.py +56 -0
  10. ctxprotocol-0.9.0/polymarket-query-trace-results-py.json +578 -0
  11. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/pyproject.toml +1 -1
  12. ctxprotocol-0.9.0/tests/test_client.py +127 -0
  13. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/tests/test_discovery.py +19 -0
  14. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/tests/test_query.py +276 -45
  15. ctxprotocol-0.8.4/polymarket-query-trace-results-py.json +0 -417
  16. ctxprotocol-0.8.4/tests/test_client.py +0 -83
  17. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/.codexignore +0 -0
  18. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/.gitignore +0 -0
  19. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/auth/__init__.py +0 -0
  20. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/client/resources/__init__.py +0 -0
  21. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/client/resources/tools.py +0 -0
  22. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/context/__init__.py +0 -0
  23. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/context/hyperliquid.py +0 -0
  24. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/context/polymarket.py +0 -0
  25. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/context/wallet.py +0 -0
  26. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/handshake/__init__.py +0 -0
  27. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/ctxprotocol/py.typed +0 -0
  28. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/client/execute_client.py +0 -0
  29. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/client/polymarket_query_trace_validation.py +0 -0
  30. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/client/query_client.py +0 -0
  31. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/client/two_surfaces_client.py +0 -0
  32. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/server/hummingbot-contributor/README.md +0 -0
  33. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/server/hummingbot-contributor/env.example +0 -0
  34. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/server/hummingbot-contributor/requirements.txt +0 -0
  35. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/examples/server/hummingbot-contributor/server.py +0 -0
  36. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/tests/__init__.py +0 -0
  37. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/tests/test_tools.py +0 -0
  38. {ctxprotocol-0.8.4 → ctxprotocol-0.9.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ctxprotocol
3
- Version: 0.8.4
3
+ Version: 0.9.0
4
4
  Summary: Official Python SDK for the Context Protocol - Discover and execute AI tools programmatically
5
5
  Project-URL: Homepage, https://ctxprotocol.com
6
6
  Project-URL: Documentation, https://docs.ctxprotocol.com
@@ -125,7 +125,7 @@ result = await client.tools.execute(
125
125
  print(result.session) # method_price, spent, remaining, max_spend, ...
126
126
  ```
127
127
 
128
- **Query mode** gives you curated answers — the server handles answer-safe tool discovery, multi-tool orchestration (up to 100 MCP calls per response turn), self-healing retries, completeness checks, model-aware context budgeting, and AI synthesis for one flat fee:
128
+ **Query mode** gives you curated answers — the server runs a discovery-first planner contract (`discover/probe -> plan-from-evidence -> execute -> bounded fallback`) with model-aware context budgeting and AI synthesis for one flat fee:
129
129
  ```python
130
130
  answer = await client.query.run(
131
131
  query="What are the top whale movements on Base?",
@@ -139,6 +139,12 @@ print(answer.tools_used) # Which tools were used
139
139
  print(answer.cost) # Cost breakdown
140
140
  print(answer.data_url) # Optional blob URL with full data
141
141
  print(answer.developer_trace.summary if answer.developer_trace else None)
142
+ print(
143
+ answer.developer_trace.diagnostics.selection
144
+ if answer.developer_trace and answer.developer_trace.diagnostics
145
+ else None
146
+ )
147
+ print(answer.orchestration_metrics) # Optional first-pass / rediscovery metrics
142
148
  ```
143
149
 
144
150
  > Mixed listings are first-class: one listing can expose methods to both surfaces. Methods without `_meta.pricing.executeUsd` remain query-only until priced.
@@ -187,8 +193,8 @@ See a full dual-surface client script in [`examples/two-surfaces-client.py`](./e
187
193
  |--------|------|----------|---------|-------------|
188
194
  | `api_key` | `str` | Yes | — | Your Context Protocol API key |
189
195
  | `base_url` | `str` | No | `https://www.ctxprotocol.com` | API base URL (for development) |
190
- | `request_timeout_seconds` | `float` | No | `300.0` | Timeout for non-streaming API calls |
191
- | `stream_timeout_seconds` | `float` | No | `600.0` | Timeout for establishing streaming API calls |
196
+ | `request_timeout_seconds` | `float` | No | `300.0` | Timeout for non-streaming JSON API calls |
197
+ | `stream_timeout_seconds` | `float` | No | `600.0` | Timeout for streaming API calls; also used by `client.query.run()` |
192
198
 
193
199
  ```python
194
200
  # Production
@@ -277,15 +283,20 @@ closed = await client.tools.close_session("sess_123")
277
283
 
278
284
  ### Query (Pay-Per-Response)
279
285
 
280
- #### `client.query.run(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, idempotency_key?)`
286
+ #### `client.query.run(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, debug_scout_deep_mode?, idempotency_key?)`
287
+
288
+ Run an agentic query. The server applies discovery-first orchestration (`discover/probe -> plan-from-evidence -> execute -> bounded fallback`) with up to 100 MCP calls per response turn, then returns an AI-synthesized answer.
281
289
 
282
- Run an agentic query. The server discovers answer-safe tools, executes the full pipeline (up to 100 MCP calls per response turn), applies model-aware mediator/data budgeting, and returns an AI-synthesized answer.
290
+ `client.query.run()` buffers the same SSE transport used by `client.query.stream()` and returns the final `done` result. This keeps Python aligned with the TypeScript SDK and the live query runtime.
283
291
 
284
292
  `query_depth` controls orchestration depth:
285
293
  - `fast`: lower-latency path for simple lookups.
286
294
  - `auto`: server routes to either `fast` or `deep` from query intent + selected tool complexity.
287
295
  - `deep`: completeness-oriented path (default when omitted).
288
296
 
297
+ `include_developer_trace` and `orchestration_metrics` are optional diagnostics.
298
+ `debug_scout_deep_mode` remains test-only and should not be used in production flows.
299
+
289
300
  ```python
290
301
  # Simple string
291
302
  answer = await client.query.run("What are the top whale movements on Base?")
@@ -308,11 +319,17 @@ print(answer.duration_ms) # Total time
308
319
  print(answer.data) # Optional execution data (when include_data=True)
309
320
  print(answer.data_url) # Optional blob URL (when include_data_url=True)
310
321
  print(answer.developer_trace.summary if answer.developer_trace else None)
322
+ print(
323
+ answer.developer_trace.diagnostics.selection
324
+ if answer.developer_trace and answer.developer_trace.diagnostics
325
+ else None
326
+ )
327
+ print(answer.orchestration_metrics) # Optional first-pass / rediscovery metrics
311
328
  ```
312
329
 
313
330
  When retrieval-first synthesis rollout is enabled server-side, full-data or truncation-sensitive query requests can switch to retrieval-first context assembly using private stage artifacts and canonical execution data slices. `include_data` and `include_data_url` continue to reference the same canonical dataset used for synthesis.
314
331
 
315
- #### `client.query.stream(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, idempotency_key?)`
332
+ #### `client.query.stream(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, debug_scout_deep_mode?, idempotency_key?)`
316
333
 
317
334
  Same as `run()` but streams events in real-time via SSE.
318
335
 
@@ -320,6 +337,7 @@ Event types:
320
337
  - `tool-status`
321
338
  - `text-delta`
322
339
  - `developer-trace` (when `include_developer_trace=True`)
340
+ - `error`
323
341
  - `done`
324
342
 
325
343
  ```python
@@ -331,6 +349,8 @@ async for event in client.query.stream(
331
349
  print(f"Tool {event.tool.name}: {event.status}")
332
350
  elif event.type == "text-delta":
333
351
  print(event.delta, end="")
352
+ elif event.type == "error":
353
+ print(f"\nStream error: {event.error}")
334
354
  elif event.type == "done":
335
355
  print(f"\nTotal cost: {event.result.cost.total_cost_usd}")
336
356
  ```
@@ -87,7 +87,7 @@ result = await client.tools.execute(
87
87
  print(result.session) # method_price, spent, remaining, max_spend, ...
88
88
  ```
89
89
 
90
- **Query mode** gives you curated answers — the server handles answer-safe tool discovery, multi-tool orchestration (up to 100 MCP calls per response turn), self-healing retries, completeness checks, model-aware context budgeting, and AI synthesis for one flat fee:
90
+ **Query mode** gives you curated answers — the server runs a discovery-first planner contract (`discover/probe -> plan-from-evidence -> execute -> bounded fallback`) with model-aware context budgeting and AI synthesis for one flat fee:
91
91
  ```python
92
92
  answer = await client.query.run(
93
93
  query="What are the top whale movements on Base?",
@@ -101,6 +101,12 @@ print(answer.tools_used) # Which tools were used
101
101
  print(answer.cost) # Cost breakdown
102
102
  print(answer.data_url) # Optional blob URL with full data
103
103
  print(answer.developer_trace.summary if answer.developer_trace else None)
104
+ print(
105
+ answer.developer_trace.diagnostics.selection
106
+ if answer.developer_trace and answer.developer_trace.diagnostics
107
+ else None
108
+ )
109
+ print(answer.orchestration_metrics) # Optional first-pass / rediscovery metrics
104
110
  ```
105
111
 
106
112
  > Mixed listings are first-class: one listing can expose methods to both surfaces. Methods without `_meta.pricing.executeUsd` remain query-only until priced.
@@ -149,8 +155,8 @@ See a full dual-surface client script in [`examples/two-surfaces-client.py`](./e
149
155
  |--------|------|----------|---------|-------------|
150
156
  | `api_key` | `str` | Yes | — | Your Context Protocol API key |
151
157
  | `base_url` | `str` | No | `https://www.ctxprotocol.com` | API base URL (for development) |
152
- | `request_timeout_seconds` | `float` | No | `300.0` | Timeout for non-streaming API calls |
153
- | `stream_timeout_seconds` | `float` | No | `600.0` | Timeout for establishing streaming API calls |
158
+ | `request_timeout_seconds` | `float` | No | `300.0` | Timeout for non-streaming JSON API calls |
159
+ | `stream_timeout_seconds` | `float` | No | `600.0` | Timeout for streaming API calls; also used by `client.query.run()` |
154
160
 
155
161
  ```python
156
162
  # Production
@@ -239,15 +245,20 @@ closed = await client.tools.close_session("sess_123")
239
245
 
240
246
  ### Query (Pay-Per-Response)
241
247
 
242
- #### `client.query.run(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, idempotency_key?)`
248
+ #### `client.query.run(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, debug_scout_deep_mode?, idempotency_key?)`
249
+
250
+ Run an agentic query. The server applies discovery-first orchestration (`discover/probe -> plan-from-evidence -> execute -> bounded fallback`) with up to 100 MCP calls per response turn, then returns an AI-synthesized answer.
243
251
 
244
- Run an agentic query. The server discovers answer-safe tools, executes the full pipeline (up to 100 MCP calls per response turn), applies model-aware mediator/data budgeting, and returns an AI-synthesized answer.
252
+ `client.query.run()` buffers the same SSE transport used by `client.query.stream()` and returns the final `done` result. This keeps Python aligned with the TypeScript SDK and the live query runtime.
245
253
 
246
254
  `query_depth` controls orchestration depth:
247
255
  - `fast`: lower-latency path for simple lookups.
248
256
  - `auto`: server routes to either `fast` or `deep` from query intent + selected tool complexity.
249
257
  - `deep`: completeness-oriented path (default when omitted).
250
258
 
259
+ `include_developer_trace` and `orchestration_metrics` are optional diagnostics.
260
+ `debug_scout_deep_mode` remains test-only and should not be used in production flows.
261
+
251
262
  ```python
252
263
  # Simple string
253
264
  answer = await client.query.run("What are the top whale movements on Base?")
@@ -270,11 +281,17 @@ print(answer.duration_ms) # Total time
270
281
  print(answer.data) # Optional execution data (when include_data=True)
271
282
  print(answer.data_url) # Optional blob URL (when include_data_url=True)
272
283
  print(answer.developer_trace.summary if answer.developer_trace else None)
284
+ print(
285
+ answer.developer_trace.diagnostics.selection
286
+ if answer.developer_trace and answer.developer_trace.diagnostics
287
+ else None
288
+ )
289
+ print(answer.orchestration_metrics) # Optional first-pass / rediscovery metrics
273
290
  ```
274
291
 
275
292
  When retrieval-first synthesis rollout is enabled server-side, full-data or truncation-sensitive query requests can switch to retrieval-first context assembly using private stage artifacts and canonical execution data slices. `include_data` and `include_data_url` continue to reference the same canonical dataset used for synthesis.
276
293
 
277
- #### `client.query.stream(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, idempotency_key?)`
294
+ #### `client.query.stream(query, tools?, model_id?, include_data?, include_data_url?, include_developer_trace?, query_depth?, debug_scout_deep_mode?, idempotency_key?)`
278
295
 
279
296
  Same as `run()` but streams events in real-time via SSE.
280
297
 
@@ -282,6 +299,7 @@ Event types:
282
299
  - `tool-status`
283
300
  - `text-delta`
284
301
  - `developer-trace` (when `include_developer_trace=True`)
302
+ - `error`
285
303
  - `done`
286
304
 
287
305
  ```python
@@ -293,6 +311,8 @@ async for event in client.query.stream(
293
311
  print(f"Tool {event.tool.name}: {event.status}")
294
312
  elif event.type == "text-delta":
295
313
  print(event.delta, end="")
314
+ elif event.type == "error":
315
+ print(f"\nStream error: {event.error}")
296
316
  elif event.type == "done":
297
317
  print(f"\nTotal cost: {event.result.cost.total_cost_usd}")
298
318
  ```
@@ -31,7 +31,7 @@ Example:
31
31
  For more information, visit: https://ctxprotocol.com
32
32
  """
33
33
 
34
- __version__ = "0.8.4"
34
+ __version__ = "0.8.5"
35
35
 
36
36
  # Re-export everything from client module
37
37
  from ctxprotocol.client import (
@@ -61,13 +61,17 @@ from ctxprotocol.client.types import (
61
61
  # Query types (pay-per-response)
62
62
  QueryApiSuccessResponse,
63
63
  QueryCost,
64
+ QueryDeepMode,
64
65
  QueryDeveloperTrace,
66
+ QueryDeveloperTraceDiagnostics,
65
67
  QueryDeveloperTraceSummary,
66
68
  QueryDeveloperTraceStep,
67
69
  QueryDeveloperTraceToolRef,
70
+ QueryOrchestrationMetrics,
68
71
  QueryDeveloperTraceLoopInfo,
69
72
  QueryStreamDeveloperTraceEvent,
70
73
  QueryStreamEvent,
74
+ QueryStreamErrorEvent,
71
75
  QueryOptions,
72
76
  QueryResult,
73
77
  QueryStreamDoneEvent,
@@ -177,16 +181,20 @@ __all__ = [
177
181
  "QueryResult",
178
182
  "QueryToolUsage",
179
183
  "QueryCost",
184
+ "QueryDeepMode",
180
185
  "QueryDeveloperTrace",
186
+ "QueryDeveloperTraceDiagnostics",
181
187
  "QueryDeveloperTraceSummary",
182
188
  "QueryDeveloperTraceStep",
183
189
  "QueryDeveloperTraceToolRef",
190
+ "QueryOrchestrationMetrics",
184
191
  "QueryDeveloperTraceLoopInfo",
185
192
  "QueryApiSuccessResponse",
186
193
  "QueryStreamToolStatusEvent",
187
194
  "QueryStreamTextDeltaEvent",
188
195
  "QueryStreamDeveloperTraceEvent",
189
196
  "QueryStreamDoneEvent",
197
+ "QueryStreamErrorEvent",
190
198
  "QueryStreamEvent",
191
199
  "ContextErrorCode",
192
200
  # Errors
@@ -29,13 +29,17 @@ from ctxprotocol.client.types import (
29
29
  # Query types (pay-per-response)
30
30
  QueryApiSuccessResponse,
31
31
  QueryCost,
32
+ QueryDeepMode,
32
33
  QueryDeveloperTrace,
34
+ QueryDeveloperTraceDiagnostics,
33
35
  QueryDeveloperTraceSummary,
34
36
  QueryDeveloperTraceStep,
35
37
  QueryDeveloperTraceToolRef,
38
+ QueryOrchestrationMetrics,
36
39
  QueryDeveloperTraceLoopInfo,
37
40
  QueryStreamDeveloperTraceEvent,
38
41
  QueryStreamEvent,
42
+ QueryStreamErrorEvent,
39
43
  QueryOptions,
40
44
  QueryResult,
41
45
  QueryStreamDoneEvent,
@@ -80,16 +84,20 @@ __all__ = [
80
84
  "QueryResult",
81
85
  "QueryToolUsage",
82
86
  "QueryCost",
87
+ "QueryDeepMode",
83
88
  "QueryDeveloperTrace",
89
+ "QueryDeveloperTraceDiagnostics",
84
90
  "QueryDeveloperTraceSummary",
85
91
  "QueryDeveloperTraceStep",
86
92
  "QueryDeveloperTraceToolRef",
93
+ "QueryOrchestrationMetrics",
87
94
  "QueryDeveloperTraceLoopInfo",
88
95
  "QueryApiSuccessResponse",
89
96
  "QueryStreamToolStatusEvent",
90
97
  "QueryStreamTextDeltaEvent",
91
98
  "QueryStreamDeveloperTraceEvent",
92
99
  "QueryStreamDoneEvent",
100
+ "QueryStreamErrorEvent",
93
101
  "QueryStreamEvent",
94
102
  "ContextErrorCode",
95
103
  # Errors
@@ -53,8 +53,9 @@ class ContextClient:
53
53
  Args:
54
54
  api_key: Your Context Protocol API key (format: sk_live_...)
55
55
  base_url: Optional base URL override (defaults to https://www.ctxprotocol.com)
56
- request_timeout_seconds: Timeout for non-streaming requests (default 300.0s)
57
- stream_timeout_seconds: Timeout for establishing streaming requests (default 600.0s)
56
+ request_timeout_seconds: Timeout for non-streaming JSON requests (default 300.0s)
57
+ stream_timeout_seconds: Timeout for streaming requests (default 600.0s);
58
+ also used by query.run(), which follows the SSE done path for parity
58
59
 
59
60
  Raises:
60
61
  ContextError: If API key is not provided or timeout values are invalid
@@ -160,13 +161,20 @@ class ContextClient:
160
161
  """
161
162
  max_retries = 3
162
163
  timeout_seconds = self._request_timeout_seconds
164
+ method_upper = method.upper()
165
+ headers = extra_headers or {}
166
+ can_retry_request = method_upper in {
167
+ "GET",
168
+ "HEAD",
169
+ "OPTIONS",
170
+ } or "Idempotency-Key" in headers
163
171
  last_error: Exception | None = None
164
172
 
165
173
  for attempt in range(max_retries + 1):
166
174
  try:
167
- if method == "GET":
175
+ if method_upper == "GET":
168
176
  response = await self._client.get(endpoint, headers=extra_headers)
169
- elif method == "POST":
177
+ elif method_upper == "POST":
170
178
  response = await self._client.post(
171
179
  endpoint,
172
180
  json=json_body,
@@ -177,7 +185,11 @@ class ContextClient:
177
185
 
178
186
  if not response.is_success:
179
187
  # Retry transient 5xx errors
180
- if response.status_code >= 500 and attempt < max_retries:
188
+ if (
189
+ response.status_code >= 500
190
+ and can_retry_request
191
+ and attempt < max_retries
192
+ ):
181
193
  delay = min(2**attempt, 10)
182
194
  await asyncio.sleep(delay)
183
195
  continue
@@ -203,12 +215,18 @@ class ContextClient:
203
215
  help_url=help_url,
204
216
  )
205
217
 
206
- return response.json()
218
+ try:
219
+ return response.json()
220
+ except Exception as exc:
221
+ raise ContextError(
222
+ message=f"Failed to parse JSON response: {exc}",
223
+ status_code=response.status_code,
224
+ ) from exc
207
225
  except ContextError:
208
226
  raise
209
227
  except (httpx.TimeoutException, httpx.TransportError) as exc:
210
228
  last_error = exc
211
- if attempt < max_retries:
229
+ if can_retry_request and attempt < max_retries:
212
230
  delay = min(2**attempt, 10)
213
231
  await asyncio.sleep(delay)
214
232
  continue
@@ -251,13 +269,20 @@ class ContextClient:
251
269
  """
252
270
  max_retries = 3
253
271
  timeout_seconds = self._stream_timeout_seconds
272
+ method_upper = method.upper()
273
+ headers = extra_headers or {}
274
+ can_retry_request = method_upper in {
275
+ "GET",
276
+ "HEAD",
277
+ "OPTIONS",
278
+ } or "Idempotency-Key" in headers
254
279
  last_error: Exception | None = None
255
280
 
256
281
  for attempt in range(max_retries + 1):
257
282
  try:
258
283
  response = await self._stream_client.send(
259
284
  self._stream_client.build_request(
260
- method,
285
+ method_upper,
261
286
  endpoint,
262
287
  json=json_body,
263
288
  headers=extra_headers,
@@ -269,7 +294,11 @@ class ContextClient:
269
294
  # Read body before retrying/raising
270
295
  await response.aread()
271
296
 
272
- if response.status_code >= 500 and attempt < max_retries:
297
+ if (
298
+ response.status_code >= 500
299
+ and can_retry_request
300
+ and attempt < max_retries
301
+ ):
273
302
  delay = min(2**attempt, 10)
274
303
  await asyncio.sleep(delay)
275
304
  continue
@@ -299,7 +328,7 @@ class ContextClient:
299
328
  raise
300
329
  except (httpx.TimeoutException, httpx.TransportError) as exc:
301
330
  last_error = exc
302
- if attempt < max_retries:
331
+ if can_retry_request and attempt < max_retries:
303
332
  delay = min(2**attempt, 10)
304
333
  await asyncio.sleep(delay)
305
334
  continue
@@ -23,6 +23,11 @@ class Discovery:
23
23
  """
24
24
  self._client = client
25
25
 
26
+ async def get(self, tool_id: str) -> Tool:
27
+ """Fetch a single marketplace tool by its unique ID."""
28
+ response = await self._client.fetch(f"/api/v1/tools/{tool_id}")
29
+ return Tool.model_validate(response)
30
+
26
31
  async def search(
27
32
  self,
28
33
  query: str,
@@ -3,8 +3,8 @@ Query resource for pay-per-response agentic queries.
3
3
 
4
4
  Unlike ``tools.execute()`` which calls a single tool once (pay-per-request),
5
5
  the Query resource sends a natural-language question and lets the server
6
- handle tool discovery, multi-tool orchestration, self-healing retries,
7
- completeness checks, and AI synthesis — all for one flat fee.
6
+ handle discovery-first orchestration (discover/probe -> plan-from-evidence ->
7
+ execute -> bounded fallback) and AI synthesis — all for one flat fee.
8
8
  """
9
9
 
10
10
  from __future__ import annotations
@@ -14,13 +14,13 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator
14
14
 
15
15
  from ctxprotocol.client.types import (
16
16
  ContextError,
17
- ExecuteApiErrorResponse,
18
- QueryApiSuccessResponse,
19
17
  QueryDeveloperTrace,
18
+ QueryDeepMode,
20
19
  QueryDepth,
21
20
  QueryResult,
22
21
  QueryStreamDeveloperTraceEvent,
23
22
  QueryStreamDoneEvent,
23
+ QueryStreamErrorEvent,
24
24
  QueryStreamEvent,
25
25
  QueryStreamTextDeltaEvent,
26
26
  QueryStreamToolStatusEvent,
@@ -196,14 +196,17 @@ class Query:
196
196
  include_data_url: bool | None = None,
197
197
  include_developer_trace: bool | None = None,
198
198
  query_depth: QueryDepth | None = None,
199
+ debug_scout_deep_mode: QueryDeepMode | None = None,
199
200
  idempotency_key: str | None = None,
200
201
  ) -> QueryResult:
201
202
  """Run an agentic query and wait for the full response.
202
203
 
203
204
  The server discovers relevant tools (or uses the ones you specify),
204
- executes the full agentic pipeline (up to 100 MCP calls per tool),
205
+ executes the discovery-first pipeline (up to 100 MCP calls per tool),
205
206
  and returns an AI-synthesized answer. Payment is settled after
206
207
  successful execution via deferred settlement.
208
+ Internally this follows the same SSE `done` path as `query.stream()`
209
+ so Python and TypeScript observe the same query runtime behavior.
207
210
 
208
211
  Args:
209
212
  query: The natural-language question to answer
@@ -213,6 +216,7 @@ class Query:
213
216
  include_data_url: Persist execution data to blob and return URL
214
217
  include_developer_trace: Include machine-readable Developer Mode traces
215
218
  query_depth: Query orchestration depth mode (fast, auto, or deep)
219
+ debug_scout_deep_mode: Test-only internal deep lane override
216
220
  idempotency_key: Optional idempotency key (UUID recommended) for safe retries
217
221
 
218
222
  Returns:
@@ -237,63 +241,33 @@ class Query:
237
241
  ... tools=["tool-uuid-1", "tool-uuid-2"],
238
242
  ... )
239
243
  """
240
- request_body: dict[str, Any] = {
241
- "query": query,
242
- "tools": tools,
243
- "stream": False,
244
- }
245
- if model_id is not None:
246
- request_body["modelId"] = model_id
247
- if include_data is not None:
248
- request_body["includeData"] = include_data
249
- if include_data_url is not None:
250
- request_body["includeDataUrl"] = include_data_url
251
- if include_developer_trace is not None:
252
- request_body["includeDeveloperTrace"] = include_developer_trace
253
- if query_depth is not None:
254
- request_body["queryDepth"] = query_depth
244
+ terminal_error: QueryStreamErrorEvent | None = None
245
+
246
+ async for event in self.stream(
247
+ query=query,
248
+ tools=tools,
249
+ model_id=model_id,
250
+ include_data=include_data,
251
+ include_data_url=include_data_url,
252
+ include_developer_trace=include_developer_trace,
253
+ query_depth=query_depth,
254
+ debug_scout_deep_mode=debug_scout_deep_mode,
255
+ idempotency_key=idempotency_key,
256
+ ):
257
+ if event.type == "error":
258
+ terminal_error = event
259
+ continue
255
260
 
256
- response = await self._client.fetch(
257
- "/api/v1/query",
258
- method="POST",
259
- json_body=request_body,
260
- extra_headers=(
261
- {"Idempotency-Key": idempotency_key}
262
- if idempotency_key
263
- else None
264
- ),
265
- )
261
+ if event.type == "done":
262
+ return event.result
266
263
 
267
- # Handle error response
268
- if "error" in response:
269
- error_response = ExecuteApiErrorResponse.model_validate(response)
264
+ if terminal_error is not None:
270
265
  raise ContextError(
271
- message=error_response.error,
272
- code=error_response.code,
273
- status_code=None,
274
- help_url=error_response.help_url,
275
- )
276
-
277
- # Handle success response
278
- if response.get("success"):
279
- success_response = QueryApiSuccessResponse.model_validate(response)
280
- developer_trace = success_response.developer_trace
281
- if include_developer_trace and developer_trace is None:
282
- developer_trace = self._build_synthetic_trace_from_run_result(
283
- success_response.tools_used,
284
- success_response.duration_ms,
285
- )
286
- return QueryResult(
287
- response=success_response.response,
288
- tools_used=success_response.tools_used,
289
- cost=success_response.cost,
290
- duration_ms=success_response.duration_ms,
291
- data=success_response.data,
292
- data_url=success_response.data_url,
293
- developer_trace=developer_trace,
266
+ message=terminal_error.error,
267
+ code=terminal_error.code,
294
268
  )
295
269
 
296
- raise ContextError("Unexpected response format from query API")
270
+ raise ContextError("Streaming query ended before done event")
297
271
 
298
272
  async def stream(
299
273
  self,
@@ -304,6 +278,7 @@ class Query:
304
278
  include_data_url: bool | None = None,
305
279
  include_developer_trace: bool | None = None,
306
280
  query_depth: QueryDepth | None = None,
281
+ debug_scout_deep_mode: QueryDeepMode | None = None,
307
282
  idempotency_key: str | None = None,
308
283
  ) -> AsyncGenerator[QueryStreamEvent, None]:
309
284
  """Run an agentic query with streaming via SSE.
@@ -312,6 +287,7 @@ class Query:
312
287
  - ``tool-status`` — A tool started executing or changed status
313
288
  - ``text-delta`` — A chunk of the AI response text
314
289
  - ``developer-trace`` — Runtime trace metadata (when enabled)
290
+ - ``error`` — A structured query/runtime error emitted before completion
315
291
  - ``done`` — The full response is complete (includes final QueryResult)
316
292
 
317
293
  Args:
@@ -322,6 +298,7 @@ class Query:
322
298
  include_data_url: Persist execution data to blob and return URL
323
299
  include_developer_trace: Include machine-readable Developer Mode traces
324
300
  query_depth: Query orchestration depth mode (fast, auto, or deep)
301
+ debug_scout_deep_mode: Test-only internal deep lane override
325
302
  idempotency_key: Optional idempotency key (UUID recommended) for safe retries
326
303
 
327
304
  Yields:
@@ -331,6 +308,8 @@ class Query:
331
308
  >>> async for event in client.query.stream("What are the top whale movements?"):
332
309
  ... if event.type == "text-delta":
333
310
  ... print(event.delta, end="")
311
+ ... elif event.type == "error":
312
+ ... print(f"\\nStream error: {event.error}")
334
313
  ... elif event.type == "done":
335
314
  ... print(f"\\nCost: {event.result.cost.total_cost_usd}")
336
315
  """
@@ -349,6 +328,8 @@ class Query:
349
328
  request_body["includeDeveloperTrace"] = include_developer_trace
350
329
  if query_depth is not None:
351
330
  request_body["queryDepth"] = query_depth
331
+ if debug_scout_deep_mode is not None:
332
+ request_body["debugScoutDeepMode"] = debug_scout_deep_mode
352
333
 
353
334
  response = await self._client.fetch_stream(
354
335
  "/api/v1/query",
@@ -400,6 +381,8 @@ class Query:
400
381
  trace_event.trace,
401
382
  )
402
383
  yield trace_event
384
+ elif event_type == "error":
385
+ yield QueryStreamErrorEvent.model_validate(parsed)
403
386
  elif event_type == "done":
404
387
  done_event = QueryStreamDoneEvent.model_validate(parsed)
405
388
  done_trace = self._merge_developer_trace(
@@ -407,10 +390,16 @@ class Query:
407
390
  done_event.result.developer_trace,
408
391
  )
409
392
  if done_trace is None and include_developer_trace:
410
- done_trace = self._build_synthetic_trace_from_stream_status(
411
- status_timeline=status_timeline,
412
- tools_used=done_event.result.tools_used,
413
- duration_ms=done_event.result.duration_ms,
414
- )
393
+ if status_timeline:
394
+ done_trace = self._build_synthetic_trace_from_stream_status(
395
+ status_timeline=status_timeline,
396
+ tools_used=done_event.result.tools_used,
397
+ duration_ms=done_event.result.duration_ms,
398
+ )
399
+ else:
400
+ done_trace = self._build_synthetic_trace_from_run_result(
401
+ done_event.result.tools_used,
402
+ done_event.result.duration_ms,
403
+ )
415
404
  done_event.result.developer_trace = done_trace
416
405
  yield done_event