data-harness 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,48 @@
1
+ from data_harness.agent import Agent, AgentSession, AsyncAgent, AsyncAgentSession
2
+ from data_harness.exceptions import (
3
+ MaxTurnsExceeded,
4
+ SubagentRecursionError,
5
+ ToolNotFoundError,
6
+ )
7
+ from data_harness.loop import AsyncHarness
8
+ from data_harness.providers.base import (
9
+ AsyncProviderAdapter,
10
+ NormalizedResponse,
11
+ ProviderAdapter,
12
+ StopReason,
13
+ )
14
+ from data_harness.result import CacheStorageInfo, RunResult, Usage
15
+ from data_harness.types import (
16
+ ContentBlock,
17
+ Message,
18
+ TextBlock,
19
+ ToolAnnotations,
20
+ ToolResultBlock,
21
+ ToolSpec,
22
+ ToolUseBlock,
23
+ )
24
+
25
+ __all__ = [
26
+ "Agent",
27
+ "AgentSession",
28
+ "AsyncAgent",
29
+ "AsyncAgentSession",
30
+ "AsyncHarness",
31
+ "AsyncProviderAdapter",
32
+ "CacheStorageInfo",
33
+ "ContentBlock",
34
+ "MaxTurnsExceeded",
35
+ "Message",
36
+ "NormalizedResponse",
37
+ "ProviderAdapter",
38
+ "RunResult",
39
+ "StopReason",
40
+ "SubagentRecursionError",
41
+ "TextBlock",
42
+ "ToolAnnotations",
43
+ "ToolNotFoundError",
44
+ "ToolResultBlock",
45
+ "ToolSpec",
46
+ "ToolUseBlock",
47
+ "Usage",
48
+ ]
data_harness/agent.py ADDED
@@ -0,0 +1,550 @@
1
+ """High-level `Agent` and `AsyncAgent` convenience layers.
2
+
3
+ `Agent` wraps `Harness` for sync workflows.
4
+ `AsyncAgent` wraps `AsyncHarness` for async and streaming workflows.
5
+
6
+ Both are one-shot per `run()` call. Use `session()` / `async_session()` for
7
+ multi-turn conversations over a shared message history and cache.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import uuid
13
+ from collections.abc import AsyncGenerator, Callable
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from data_harness.cache import SessionCache
19
+ from data_harness.loop import AsyncHarness, Harness
20
+ from data_harness.providers.base import AsyncProviderAdapter, ProviderAdapter
21
+ from data_harness.result import RunResult
22
+ from data_harness.schema import infer_input_schema
23
+ from data_harness.tools.connectors import ConnectorRegistry
24
+ from data_harness.tools.interpreter import PythonInterpreter
25
+ from data_harness.tools.planner import Planner
26
+ from data_harness.tools.subagent import _copy_cache_value, make_subagent_spec
27
+ from data_harness.tools.variables import make_list_variables_spec
28
+ from data_harness.types import ToolAnnotations, ToolSpec
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class _ConnectorToolDefinition:
33
+ connector_name: str
34
+ fn: Callable[..., Any]
35
+ description: str
36
+ input_schema: dict
37
+ annotations: ToolAnnotations | None = None
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class _ConnectorDefinition:
42
+ name: str
43
+ description: str
44
+
45
+
46
+ class ConnectorBuilder:
47
+ def __init__(self, agent: Agent | AsyncAgent, name: str) -> None:
48
+ self._agent = agent
49
+ self._name = name
50
+
51
+ def tool(
52
+ self,
53
+ fn: Callable[..., Any],
54
+ *,
55
+ description: str,
56
+ input_schema: dict | None = None,
57
+ annotations: ToolAnnotations | None = None,
58
+ ) -> Callable[..., Any]:
59
+ schema = input_schema if input_schema is not None else infer_input_schema(fn)
60
+ self._agent._connector_tools.append(
61
+ _ConnectorToolDefinition(
62
+ connector_name=self._name,
63
+ fn=fn,
64
+ description=description,
65
+ input_schema=schema,
66
+ annotations=annotations,
67
+ )
68
+ )
69
+ return fn
70
+
71
+
72
+ def _build_tools_for(
73
+ agent: Agent | AsyncAgent,
74
+ *,
75
+ planner: Planner | None,
76
+ cache: SessionCache,
77
+ ) -> list[ToolSpec]:
78
+ """Shared tool-building logic for Agent and AsyncAgent."""
79
+ tools = [
80
+ PythonInterpreter.make_tool_spec(cache),
81
+ make_list_variables_spec(cache),
82
+ ]
83
+ if planner is not None:
84
+ tools.extend(planner.make_tool_specs())
85
+ if agent._connectors:
86
+ registry = ConnectorRegistry()
87
+ for connector_name, connector in agent._connectors.items():
88
+ registry.register(
89
+ name=connector_name,
90
+ description=connector.description,
91
+ tools=[
92
+ ToolSpec(
93
+ name=f"{connector_name}__{definition.fn.__name__}",
94
+ description=definition.description,
95
+ input_schema=definition.input_schema,
96
+ handler=definition.fn,
97
+ visible=False,
98
+ annotations=definition.annotations,
99
+ )
100
+ for definition in agent._connector_tools
101
+ if definition.connector_name == connector_name
102
+ ],
103
+ )
104
+ tools.append(registry.get_load_connectors_spec())
105
+ tools.extend(registry.make_wrapped_specs(cache))
106
+ return tools
107
+
108
+
109
+ class Agent:
110
+ def __init__(
111
+ self,
112
+ adapter: ProviderAdapter,
113
+ system: str,
114
+ *,
115
+ max_turns: int = 25,
116
+ cache: SessionCache | None = None,
117
+ run_dir: str | Path | None = None,
118
+ ) -> None:
119
+ self._adapter = adapter
120
+ self._system = system
121
+ self._max_turns = max_turns
122
+ self._cache = cache if cache is not None else SessionCache()
123
+ self._run_dir = run_dir
124
+ self._last_harness: Harness | None = None
125
+ self._last_run_file: str | None = None
126
+ self._connectors: dict[str, _ConnectorDefinition] = {}
127
+ self._connector_tools: list[_ConnectorToolDefinition] = []
128
+ self._planner_enabled = False
129
+ self._subagent_factory: Callable[[], ProviderAdapter] | None = None
130
+
131
+ @property
132
+ def cache(self) -> SessionCache:
133
+ return self._cache
134
+
135
+ @property
136
+ def last_harness(self) -> Harness | None:
137
+ return self._last_harness
138
+
139
+ @property
140
+ def last_run_file(self) -> str | None:
141
+ return self._last_run_file
142
+
143
+ def connector(self, name: str, *, description: str) -> ConnectorBuilder:
144
+ self._connectors[name] = _ConnectorDefinition(
145
+ name=name, description=description
146
+ )
147
+ return ConnectorBuilder(self, name)
148
+
149
+ def enable_planner(self) -> Agent:
150
+ self._planner_enabled = True
151
+ return self
152
+
153
+ def enable_subagents(
154
+ self, *, adapter_factory: Callable[[], ProviderAdapter]
155
+ ) -> Agent:
156
+ self._subagent_factory = adapter_factory
157
+ return self
158
+
159
+ def session(self) -> AgentSession:
160
+ return AgentSession(self)
161
+
162
+ def run_result(self, user_message: str) -> RunResult:
163
+ harness = self._make_harness()
164
+ self._last_harness = harness
165
+ result = harness.run_result(
166
+ user_message, run_id=str(uuid.uuid4()), session_id=None
167
+ )
168
+ self._last_run_file = harness.run_file
169
+ return result
170
+
171
+ def run(self, user_message: str) -> str:
172
+ harness = self._make_harness()
173
+ self._last_harness = harness
174
+ result = harness.run(user_message)
175
+ self._last_run_file = harness.run_file
176
+ return result
177
+
178
+ def explain(self) -> str:
179
+ return _EXPLAIN_TEMPLATE.format(
180
+ system=_truncate(self._system),
181
+ max_turns=self._max_turns,
182
+ run_dir=self._run_dir if self._run_dir is not None else "./runs",
183
+ )
184
+
185
+ def _build_tools(
186
+ self,
187
+ *,
188
+ planner: Planner | None = None,
189
+ cache: SessionCache | None = None,
190
+ ) -> list[ToolSpec]:
191
+ target_cache = cache if cache is not None else self._cache
192
+ return _build_tools_for(self, planner=planner, cache=target_cache)
193
+
194
+ def _make_harness(
195
+ self,
196
+ *,
197
+ cache: SessionCache | None = None,
198
+ planner: Planner | None = None,
199
+ ) -> Harness:
200
+ effective_cache = cache if cache is not None else self._cache
201
+ effective_planner = (
202
+ planner
203
+ if planner is not None
204
+ else Planner()
205
+ if self._planner_enabled
206
+ else None
207
+ )
208
+ tools = self._build_tools(planner=effective_planner, cache=effective_cache)
209
+ if self._subagent_factory is not None:
210
+ subagent_parent_tools = self._build_tools(
211
+ planner=None, cache=effective_cache
212
+ )
213
+ effective_run_dir = (
214
+ str(self._run_dir) if self._run_dir is not None else "./runs"
215
+ )
216
+ tools.append(
217
+ make_subagent_spec(
218
+ adapter_factory=self._subagent_factory,
219
+ parent_tools=subagent_parent_tools,
220
+ parent_cache=effective_cache,
221
+ run_dir=effective_run_dir,
222
+ make_sub_tools=lambda sub_cache: self._build_tools(
223
+ planner=None, cache=sub_cache
224
+ ),
225
+ )
226
+ )
227
+ harness_kwargs: dict = {
228
+ "adapter": self._adapter,
229
+ "system": self._system,
230
+ "tools": tools,
231
+ "max_turns": self._max_turns,
232
+ "cache": effective_cache,
233
+ }
234
+ if self._run_dir is not None:
235
+ harness_kwargs["run_dir"] = str(self._run_dir)
236
+
237
+ harness = Harness(**harness_kwargs)
238
+ if effective_planner is not None:
239
+ harness.register_reminder(effective_planner.reminder_hook)
240
+ return harness
241
+
242
+
243
+ class AgentSession:
244
+ """Stateful chat session built from an `Agent` definition.
245
+
246
+ `Agent.run()` intentionally stays one-shot for examples and tests. Use
247
+ `Agent.session()` when an application needs follow-up questions over the
248
+ same message history and cache handles.
249
+ """
250
+
251
+ def __init__(self, agent: Agent) -> None:
252
+ self._agent = agent
253
+ self._cache = SessionCache(
254
+ sample_size=agent.cache.sample_size,
255
+ storage_dir=None,
256
+ hot_limit=agent.cache.hot_limit,
257
+ )
258
+ for name, value in agent.cache.items():
259
+ self._cache.put(name, _copy_cache_value(value))
260
+ self._harness = agent._make_harness(cache=self._cache)
261
+ self._id: str = str(uuid.uuid4())
262
+ self._last_result: RunResult | None = None
263
+ self._turns: int = 0
264
+
265
+ @property
266
+ def id(self) -> str:
267
+ return self._id
268
+
269
+ @property
270
+ def last_result(self) -> RunResult | None:
271
+ return self._last_result
272
+
273
+ @property
274
+ def turns(self) -> int:
275
+ return self._turns
276
+
277
+ @property
278
+ def cache(self) -> SessionCache:
279
+ return self._cache
280
+
281
+ @property
282
+ def harness(self) -> Harness:
283
+ return self._harness
284
+
285
+ @property
286
+ def run_file(self) -> str | None:
287
+ return self._harness.run_file
288
+
289
+ def put(self, name: str, value: Any, *, overwrite: bool = False) -> str:
290
+ return self._cache.put(name, value, overwrite=overwrite)
291
+
292
+ def list_handles(self) -> dict[str, str]:
293
+ return self._cache.list_handles()
294
+
295
+ def ask_result(self, user_message: str) -> RunResult:
296
+ result = self._harness.ask_result(
297
+ user_message, run_id=str(uuid.uuid4()), session_id=self._id
298
+ )
299
+ self._last_result = result
300
+ self._turns += result.turns
301
+ self._agent._last_harness = self._harness
302
+ self._agent._last_run_file = self._harness.run_file
303
+ return result
304
+
305
+ def ask(self, user_message: str) -> str:
306
+ result = self.ask_result(user_message)
307
+ if result.status == "max_turns_exceeded":
308
+ from data_harness.exceptions import MaxTurnsExceeded
309
+
310
+ raise MaxTurnsExceeded(result.turns)
311
+ if result.status == "error":
312
+ raise RuntimeError(result.error or "unknown error")
313
+ return result.text
314
+
315
+
316
+ class AsyncAgent:
317
+ """Async agent for use with `AsyncProviderAdapter`.
318
+
319
+ `run()` and `run_result()` are coroutines. `run_stream()` is an async
320
+ generator that yields text tokens as they arrive from the provider.
321
+ Use `async_session()` for multi-turn streaming conversations.
322
+ """
323
+
324
+ def __init__(
325
+ self,
326
+ adapter: AsyncProviderAdapter,
327
+ system: str,
328
+ *,
329
+ max_turns: int = 25,
330
+ cache: SessionCache | None = None,
331
+ run_dir: str | Path | None = None,
332
+ ) -> None:
333
+ self._adapter = adapter
334
+ self._system = system
335
+ self._max_turns = max_turns
336
+ self._cache = cache if cache is not None else SessionCache()
337
+ self._run_dir = run_dir
338
+ self._last_harness: AsyncHarness | None = None
339
+ self._last_run_file: str | None = None
340
+ self._connectors: dict[str, _ConnectorDefinition] = {}
341
+ self._connector_tools: list[_ConnectorToolDefinition] = []
342
+ self._planner_enabled = False
343
+
344
+ @property
345
+ def cache(self) -> SessionCache:
346
+ return self._cache
347
+
348
+ @property
349
+ def last_harness(self) -> AsyncHarness | None:
350
+ return self._last_harness
351
+
352
+ @property
353
+ def last_run_file(self) -> str | None:
354
+ return self._last_run_file
355
+
356
+ def connector(self, name: str, *, description: str) -> ConnectorBuilder:
357
+ self._connectors[name] = _ConnectorDefinition(
358
+ name=name, description=description
359
+ )
360
+ return ConnectorBuilder(self, name)
361
+
362
+ def enable_planner(self) -> AsyncAgent:
363
+ self._planner_enabled = True
364
+ return self
365
+
366
+ def async_session(self) -> AsyncAgentSession:
367
+ return AsyncAgentSession(self)
368
+
369
+ async def run_result(self, user_message: str) -> RunResult:
370
+ harness = self._make_harness()
371
+ self._last_harness = harness
372
+ result = await harness.run_result(
373
+ user_message, run_id=str(uuid.uuid4()), session_id=None
374
+ )
375
+ self._last_run_file = harness.run_file
376
+ return result
377
+
378
+ async def run(self, user_message: str) -> str:
379
+ result = await self.run_result(user_message)
380
+ from data_harness.exceptions import MaxTurnsExceeded
381
+
382
+ if result.status == "max_turns_exceeded":
383
+ raise MaxTurnsExceeded(result.turns)
384
+ if result.status == "error":
385
+ raise RuntimeError(result.error or "unknown error")
386
+ return result.text
387
+
388
+ async def run_stream(self, user_message: str) -> AsyncGenerator[str, None]:
389
+ """Stream assistant tokens for a one-shot run.
390
+
391
+ Usage::
392
+
393
+ async for chunk in agent.run_stream("hello"):
394
+ print(chunk, end="", flush=True)
395
+ """
396
+ harness = self._make_harness()
397
+ self._last_harness = harness
398
+ async for chunk in harness.run_stream(user_message):
399
+ yield chunk
400
+ self._last_run_file = harness.run_file
401
+
402
+ def _build_tools(
403
+ self,
404
+ *,
405
+ planner: Planner | None = None,
406
+ cache: SessionCache | None = None,
407
+ ) -> list[ToolSpec]:
408
+ target_cache = cache if cache is not None else self._cache
409
+ return _build_tools_for(self, planner=planner, cache=target_cache)
410
+
411
+ def _make_harness(
412
+ self,
413
+ *,
414
+ cache: SessionCache | None = None,
415
+ planner: Planner | None = None,
416
+ ) -> AsyncHarness:
417
+ effective_cache = cache if cache is not None else self._cache
418
+ effective_planner = (
419
+ planner
420
+ if planner is not None
421
+ else Planner()
422
+ if self._planner_enabled
423
+ else None
424
+ )
425
+ tools = self._build_tools(planner=effective_planner, cache=effective_cache)
426
+
427
+ harness_kwargs: dict = {
428
+ "adapter": self._adapter,
429
+ "system": self._system,
430
+ "tools": tools,
431
+ "max_turns": self._max_turns,
432
+ "cache": effective_cache,
433
+ }
434
+ if self._run_dir is not None:
435
+ harness_kwargs["run_dir"] = str(self._run_dir)
436
+
437
+ harness = AsyncHarness(**harness_kwargs)
438
+ if effective_planner is not None:
439
+ harness.register_reminder(effective_planner.reminder_hook)
440
+ return harness
441
+
442
+
443
+ class AsyncAgentSession:
444
+ """Stateful async chat session built from an `AsyncAgent` definition."""
445
+
446
+ def __init__(self, agent: AsyncAgent) -> None:
447
+ self._agent = agent
448
+ self._cache = SessionCache(
449
+ sample_size=agent.cache.sample_size,
450
+ storage_dir=None,
451
+ hot_limit=agent.cache.hot_limit,
452
+ )
453
+ for name, value in agent.cache.items():
454
+ self._cache.put(name, _copy_cache_value(value))
455
+ self._harness = agent._make_harness(cache=self._cache)
456
+ self._id: str = str(uuid.uuid4())
457
+ self._last_result: RunResult | None = None
458
+ self._turns: int = 0
459
+
460
+ @property
461
+ def id(self) -> str:
462
+ return self._id
463
+
464
+ @property
465
+ def last_result(self) -> RunResult | None:
466
+ return self._last_result
467
+
468
+ @property
469
+ def turns(self) -> int:
470
+ return self._turns
471
+
472
+ @property
473
+ def cache(self) -> SessionCache:
474
+ return self._cache
475
+
476
+ @property
477
+ def harness(self) -> AsyncHarness:
478
+ return self._harness
479
+
480
+ @property
481
+ def run_file(self) -> str | None:
482
+ return self._harness.run_file
483
+
484
+ def put(self, name: str, value: Any, *, overwrite: bool = False) -> str:
485
+ return self._cache.put(name, value, overwrite=overwrite)
486
+
487
+ def list_handles(self) -> dict[str, str]:
488
+ return self._cache.list_handles()
489
+
490
+ async def ask_result(self, user_message: str) -> RunResult:
491
+ result = await self._harness.ask_result(
492
+ user_message, run_id=str(uuid.uuid4()), session_id=self._id
493
+ )
494
+ self._last_result = result
495
+ self._turns += result.turns
496
+ self._agent._last_harness = self._harness
497
+ self._agent._last_run_file = self._harness.run_file
498
+ return result
499
+
500
+ async def ask(self, user_message: str) -> str:
501
+ result = await self.ask_result(user_message)
502
+ if result.status == "max_turns_exceeded":
503
+ from data_harness.exceptions import MaxTurnsExceeded
504
+
505
+ raise MaxTurnsExceeded(result.turns)
506
+ if result.status == "error":
507
+ raise RuntimeError(result.error or "unknown error")
508
+ return result.text
509
+
510
+ async def ask_stream(self, user_message: str) -> AsyncGenerator[str, None]:
511
+ """Stream assistant tokens for a follow-up turn."""
512
+ async for chunk in self._harness.ask_stream(user_message):
513
+ yield chunk
514
+ self._agent._last_harness = self._harness
515
+ self._agent._last_run_file = self._harness.run_file
516
+
517
+
518
+ _EXPLAIN_TEMPLATE = """\
519
+ Agent is a thin composition layer. The equivalent explicit wiring is:
520
+
521
+ from data_harness.cache import SessionCache
522
+ from data_harness.loop import Harness
523
+ from data_harness.tools.interpreter import PythonInterpreter
524
+ from data_harness.tools.variables import make_list_variables_spec
525
+
526
+ cache = SessionCache()
527
+ tools = [
528
+ PythonInterpreter.make_tool_spec(cache),
529
+ make_list_variables_spec(cache),
530
+ ]
531
+ harness = Harness(
532
+ adapter=adapter,
533
+ system={system!r},
534
+ tools=tools,
535
+ max_turns={max_turns},
536
+ run_dir={run_dir!r},
537
+ cache=cache,
538
+ )
539
+ harness.run(user_message)
540
+
541
+ Each call to Agent.run() builds a fresh Harness with fresh tool specs.
542
+ Model-visible tools include python_interpreter and list_variables.
543
+ The message history resets per run; use agent.session().ask(...) for follow-up.
544
+ """
545
+
546
+
547
+ def _truncate(text: str, limit: int = 80) -> str:
548
+ if len(text) <= limit:
549
+ return text
550
+ return text[: limit - 1] + "…"