opentraces-schema 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .pytest_cache/
8
+ *.egg-link
9
+ web/site/node_modules/
10
+ web/site/.next/
11
+ web/viewer/node_modules/
12
+ web/coming-soon/.vercel/
13
+ .gstack/
14
+ .opentraces/
15
+
16
+ .desloppify/
17
+
18
+ # Private research (not shipped in OSS release)
19
+ kb/
20
+ .opentraces/staging/
21
+ .opentraces/config.json
22
+ .agents/skills/opentraces
23
+ .claude
24
+ .opentraces
@@ -0,0 +1,34 @@
1
+ # Changelog
2
+
3
+ All notable changes to the opentraces-schema package will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
6
+ This project uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html) with
7
+ schema-specific semantics described in VERSION-POLICY.md.
8
+
9
+ ## [Unreleased]
10
+
11
+ ## [0.1.0] - 2026-03-27
12
+
13
+ ### Added
14
+ - Initial schema release with 15 Pydantic v2 models
15
+ - `TraceRecord` top-level model: one JSONL line per complete agent session
16
+ - `Step` model oriented around TAO (Thought-Action-Observation) loops, not conversational turns
17
+ - `Outcome` model with RL-ready signals: `success`, `signal_source`, `signal_confidence` (derived/inferred/annotated)
18
+ - `Attribution` block (experimental) bridging trajectory data and code attribution per Agent Trace spec
19
+ - Sub-agent hierarchy via `Step.parent_step`, `Step.agent_role`, `Step.subagent_trajectory_ref`
20
+ - `Step.call_type` (main/subagent/warmup) for filtering cache-priming calls
21
+ - System prompt deduplication via hash-keyed `system_prompts` dict on `TraceRecord`
22
+ - `SecurityMetadata` with 3-tier classification (1=open, 2=guarded, 3=strict)
23
+ - Content hashing (SHA-256) on `TraceRecord` for cross-upload deduplication
24
+ - `AttributionRange.content_hash` using murmur3 for cross-refactor tracking
25
+ - `Observation.output_summary` for lightweight filtering without loading full tool results
26
+ - `TokenUsage` with `prefix_reuse_tokens`, `cache_read_tokens`, `cache_write_tokens`
27
+ - `Metrics` model with session-level aggregates and `estimated_cost_usd`
28
+ - `Environment` and `VCS` models for runtime context and reproducibility
29
+ - `Task` model with `source`, `repository`, `base_commit`
30
+ - `Agent` model using `provider/model-name` convention from models.dev
31
+ - `Snippet` model for extracted code blocks linked to source steps
32
+
33
+ ### Design References
34
+ - See [RATIONALE-0.1.0.md](RATIONALE-0.1.0.md) for the design basis of each decision in this version
@@ -0,0 +1,159 @@
1
+ # Field Mappings: opentraces -> Downstream Formats
2
+
3
+ Reference tables for converting opentraces TraceRecord to downstream schemas.
4
+ These mappings are implemented in `src/opentraces/exporters/` but documented here
5
+ for ML researchers who want to write their own converters.
6
+
7
+ ## opentraces -> ATIF v1.6
8
+
9
+ ATIF (Agent Trajectory Interchange Format) is designed for SFT and RL training pipelines.
10
+ The export is lossy: opentraces fields with no ATIF equivalent are dropped.
11
+
12
+ ### Root Level
13
+
14
+ | opentraces | ATIF v1.6 | Notes |
15
+ |-----------|-----------|-------|
16
+ | `schema_version` | `schema_version: "ATIF-v1.6"` | Hardcoded |
17
+ | `session_id` | `session_id` | Direct |
18
+ | `agent.name` | `agent.name` | Direct |
19
+ | `agent.version` | `agent.version` | Direct |
20
+ | `agent.model` | `agent.model_name` | Rename |
21
+ | `tool_definitions` | `agent.tool_definitions` | Direct |
22
+ | `trace_id` | - | Dropped (ATIF uses session_id only) |
23
+ | `content_hash` | - | Dropped |
24
+ | `timestamp_start` | - | Dropped (per-step timestamps preserved) |
25
+ | `timestamp_end` | - | Dropped |
26
+ | `environment` | - | Dropped |
27
+ | `outcome` | - | Dropped |
28
+ | `dependencies` | - | Dropped |
29
+ | `metrics` | - | Dropped (per-step metrics preserved) |
30
+ | `security` | - | Dropped |
31
+ | `attribution` | - | Dropped |
32
+ | `system_prompts` | - | Dropped (ATIF stores inline per step) |
33
+ | `metadata` | - | Dropped |
34
+
35
+ ### Step Level
36
+
37
+ | opentraces Step | ATIF Step | Notes |
38
+ |----------------|-----------|-------|
39
+ | `step_index` | `step_id` | Renumbered sequentially from 1 at export time |
40
+ | `role` | `source` | Direct (system/user/agent are the same) |
41
+ | `content` | `message` | Direct, omitted if None |
42
+ | `reasoning_content` | `reasoning_content` | Direct |
43
+ | `model` | `model_name` | Rename |
44
+ | `timestamp` | `timestamp` | Direct |
45
+ | `system_prompt_hash` | - | Dropped |
46
+ | `agent_role` | - | Dropped |
47
+ | `parent_step` | - | Dropped |
48
+ | `call_type` | - | Dropped |
49
+ | `subagent_trajectory_ref` | - | Dropped (ATIF supports this but we don't populate it) |
50
+ | `tools_available` | - | Dropped |
51
+ | `snippets` | - | Dropped |
52
+
53
+ ### Tool Calls
54
+
55
+ | opentraces ToolCall | ATIF ToolCallSchema | Notes |
56
+ |--------------------|---------------------|-------|
57
+ | `tool_call_id` | `tool_call_id` | Direct |
58
+ | `tool_name` | `function_name` | Rename |
59
+ | `input` (dict) | `arguments` (dict) | Direct (ATIF accepts dict) |
60
+ | `duration_ms` | - | Dropped |
61
+
62
+ ### Observations
63
+
64
+ opentraces stores observations as a flat list on each Step. ATIF wraps them
65
+ in a singular `observation` object with a `results` array.
66
+
67
+ | opentraces Observation | ATIF ObservationResult | Notes |
68
+ |-----------------------|-----------------------|-------|
69
+ | `source_call_id` | `source_call_id` | Direct |
70
+ | `content` | `content` | Direct |
71
+ | `output_summary` | - | Dropped |
72
+ | `error` | `content` | Mapped as `[error: {value}]` string |
73
+
74
+ **Structure transformation:**
75
+ ```
76
+ opentraces: step.observations = [Obs1, Obs2]
77
+ ATIF: step.observation = {"results": [Result1, Result2]}
78
+ ```
79
+
80
+ When a step has zero observations, the `observation` key is omitted entirely.
81
+
82
+ ### Token Usage
83
+
84
+ | opentraces TokenUsage | ATIF MetricsSchema | Notes |
85
+ |-----------------------|-------------------|-------|
86
+ | `input_tokens` | `prompt_tokens` | Rename |
87
+ | `output_tokens` | `completion_tokens` | Rename |
88
+ | `cache_read_tokens` | `cached_tokens` | Rename |
89
+ | `cache_write_tokens` | - | Dropped (no ATIF equivalent) |
90
+ | `prefix_reuse_tokens` | - | Dropped (opentraces-only metric) |
91
+
92
+ ATIF also supports `cost_usd`, `prompt_token_ids`, `completion_token_ids`, and
93
+ `logprobs`, but these are not available from CLI-level agent traces and are
94
+ omitted from the export.
95
+
96
+ ---
97
+
98
+ ## opentraces -> ADP (Agent Data Protocol)
99
+
100
+ ADP is designed as a training interlingua for SFT across multiple agent harnesses.
101
+ The export flattens opentraces' hierarchical steps into ADP's alternating
102
+ action/observation list.
103
+
104
+ *Exporter implementation planned. Mapping table below is a reference for
105
+ researchers writing their own converters.*
106
+
107
+ ### Core Mapping
108
+
109
+ | opentraces | ADP | Notes |
110
+ |-----------|-----|-------|
111
+ | `session_id` | `Trajectory.id` | Direct |
112
+ | Step(role=agent, tool_calls=[tc]) | `APIAction(function=tc.tool_name, kwargs=tc.input)` | Each tool call becomes a separate APIAction |
113
+ | Step(role=agent, content=code) | `CodeAction(language=..., content=...)` | Only if step contains executable code |
114
+ | Step(role=agent, content=text) | `MessageAction(content=text)` | Agent messages without tool calls |
115
+ | Observation(content=text) | `TextObservation(source="environment", content=text)` | Tool results |
116
+ | Step(role=user) | `TextObservation(source="user", content=text)` | User messages |
117
+ | `reasoning_content` | `APIAction.description` or `CodeAction.description` | Reasoning attached to the action |
118
+ | `metadata` | `Trajectory.details` | Flexible dict |
119
+
120
+ ### Fields Dropped by ADP Export
121
+
122
+ All of: `attribution`, `security`, `environment`, `outcome`, `dependencies`,
123
+ `metrics`, `system_prompts`, `tool_definitions`, `content_hash`, `token_usage`,
124
+ `snippets`, `hierarchy` (parent_step, agent_role, call_type).
125
+
126
+ ADP's key strength is simplicity: 3 action types + 2 observation types cover
127
+ coding, browsing, tool use, and SWE. The trade-off is losing all the metadata
128
+ that makes opentraces traces useful for analytics, RL reward modeling,
129
+ attribution, and security auditing.
130
+
131
+ ---
132
+
133
+ ## opentraces -> OTel GenAI (future)
134
+
135
+ OTel GenAI Semantic Conventions represent traces as span trees, which is a
136
+ fundamentally different structure from our step arrays. Each opentraces Step
137
+ would become a span, with tool calls as child spans.
138
+
139
+ *Exporter implementation planned for v0.2.*
140
+
141
+ ---
142
+
143
+ ## Notes for Converter Authors
144
+
145
+ 1. **step_id renumbering**: ATIF uses 1-indexed step_id. opentraces step_index
146
+ may be 0 or 1-indexed depending on the parser. Always renumber at export time.
147
+
148
+ 2. **observation wrapping**: ATIF uses singular `observation` with `results[]`.
149
+ opentraces uses plural `observations[]`. Don't just rename the field.
150
+
151
+ 3. **token_usage partial mapping**: opentraces tracks 5 token sub-fields, ATIF
152
+ tracks 3. The two cache fields unique to opentraces (cache_write, prefix_reuse)
153
+ are our key differentiator for cost analysis.
154
+
155
+ 4. **content=None steps**: Steps that are pure tool calls (no text content) should
156
+ omit the `message`/`content` field, not set it to empty string.
157
+
158
+ 5. **dangling tool calls**: Observations with `error="no_result"` indicate tool
159
+ calls that never received a response. Map these to a descriptive error string.
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: opentraces-schema
3
+ Version: 0.1.0
4
+ Summary: Schema models for the opentraces.ai agent trace JSONL format
5
+ Project-URL: Homepage, https://opentraces.ai
6
+ Project-URL: Repository, https://github.com/JayFarei/opentraces
7
+ License-Expression: MIT
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: pydantic>=2.0
10
+ Description-Content-Type: text/markdown
11
+
12
+ # opentraces-schema
13
+
14
+ Pydantic v2 models for the opentraces.ai JSONL trace format.
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install -e packages/opentraces-schema
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```python
25
+ from opentraces_schema import TraceRecord, SCHEMA_VERSION
26
+
27
+ record = TraceRecord(
28
+ trace_id="abc-123",
29
+ session_id="sess-456",
30
+ agent={"name": "claude-code", "version": "1.0.32"},
31
+ )
32
+ line = record.to_jsonl_line()
33
+ ```
34
+
35
+ ## Version
36
+
37
+ The schema version (`0.1.0`) lives in `src/opentraces_schema/version.py` as the
38
+ single source of truth. See [VERSION-POLICY.md](VERSION-POLICY.md) for semver
39
+ semantics and the bump checklist.
40
+
41
+ ## Schema Rationale
42
+
43
+ Every version of the schema ships with a rationale document explaining why each
44
+ model and field exists, grounded in public standards (ATIF, Agent Trace, ADP, OTel)
45
+ and empirical observations from real agent traces.
46
+
47
+ The current rationale is [RATIONALE-0.1.0.md](RATIONALE-0.1.0.md). Each new version
48
+ will have its own rationale file linked from the [CHANGELOG](CHANGELOG.md).
49
+
50
+ ## Contributing
51
+
52
+ Schema feedback, questions, and proposals are welcome via
53
+ [GitHub Issues](https://github.com/opentraces/opentraces/issues). When suggesting
54
+ a schema change, please include:
55
+
56
+ - **What** field or model you would add, change, or remove
57
+ - **Why** it matters for your use case (training, analytics, attribution, etc.)
58
+ - **How** it relates to existing standards (ATIF, Agent Trace, ADP, OTel) if applicable
59
+
60
+ Breaking changes (field renames, removals, type changes) require a major version bump.
61
+ New optional fields and models are minor bumps. See [VERSION-POLICY.md](VERSION-POLICY.md)
62
+ for details.
63
+
64
+ ## Documentation
65
+
66
+ - [CHANGELOG.md](CHANGELOG.md) - What changed in each version
67
+ - [VERSION-POLICY.md](VERSION-POLICY.md) - What version numbers mean for a schema package
68
+ - [RATIONALE-0.1.0.md](RATIONALE-0.1.0.md) - Design rationale for v0.1.0
@@ -0,0 +1,264 @@
1
+ # Schema Rationale: opentraces-schema v0.1.0
2
+
3
+ Why this schema is what it is. Each section connects a design decision to the
4
+ standards, empirical observations, and constraints that motivated it.
5
+
6
+ Standards referenced:
7
+ - [Agent Trace spec](https://github.com/nichochar/agent-trace) (Cursor RFC, CC BY 4.0)
8
+ - [ATIF v1.6](https://github.com/harbor-ai/agent-trajectory-interchange-format) (Agent Trajectory Interchange Format)
9
+ - [ADP](https://arxiv.org/abs/2410.10762) (Agent Data Protocol)
10
+ - [OTel GenAI Semantic Conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/)
11
+
12
+
13
+ ## TraceRecord (Top-Level)
14
+
15
+ ### Why an independent schema, not ATIF-native
16
+
17
+ Three existing standards each serve a different purpose:
18
+ - **ATIF** optimizes for training pipelines (token IDs, logprobs)
19
+ - **Agent Trace** captures attribution only (who wrote which lines)
20
+ - **OTel** captures observability only (latency, error rates)
21
+
22
+ No single standard covers trajectory + attribution + security + environment.
23
+ This schema bridges all three as a superset, with `opentraces export --format atif`
24
+ planned for standards-compatible output.
25
+
26
+ ### Why content_hash (SHA-256)
27
+
28
+ With sharded JSONL upload (one file per push, never append to existing shards),
29
+ dedup must happen at record level. Existing community trace datasets on HuggingFace
30
+ use content hashing for deduplication at upload time.
31
+
32
+ SHA-256 chosen over cheaper hashes because traces are large enough that hash
33
+ computation time is negligible vs I/O. The `compute_content_hash()` method excludes
34
+ `content_hash` and `trace_id` so re-parsing identical source data produces the same
35
+ hash regardless of the random UUID assigned.
36
+
37
+ ### Why session_id + trace_id (two IDs)
38
+
39
+ - `session_id`: the agent's native session identifier, stable across re-parsing
40
+ - `trace_id`: random UUID for database-level uniqueness
41
+
42
+ Agent sessions group many API calls under a single session ID, but a given session
43
+ may be re-ingested multiple times (e.g. re-exported after a schema upgrade). Separate
44
+ IDs allow re-ingestion without collision while preserving join keys to the original
45
+ agent session.
46
+
47
+
48
+ ## Steps: TAO-Loop Oriented
49
+
50
+ ### Why `steps` not `turns`
51
+
52
+ Each Step represents one LLM API call (request + response), not a conversational turn.
53
+ Multi-agent coding sessions routinely involve 50-100+ API calls spanning multiple
54
+ parallel sub-agents within a single user-visible "conversation." Conversational turns
55
+ would collapse this hierarchy into a flat sequence, losing the architectural signal
56
+ needed for caching analysis and training data segmentation.
57
+
58
+ Both ATIF and ADP use step-based models. The TAO (Thought-Action-Observation) loop
59
+ has converged as the canonical trajectory primitive across agent frameworks
60
+ (OpenHands, SWE-Agent, AgentLab) and community datasets.
61
+
62
+ ### Why `role: "agent"` not `"assistant"`
63
+
64
+ ATIF and the broader agent community convention use `system | user | agent`. "Agent"
65
+ is semantically accurate for autonomous coding agents that reason, act, and observe
66
+ in loops, as opposed to chat assistants that respond to single prompts.
67
+
68
+ ### Why parent_step + agent_role + subagent_trajectory_ref
69
+
70
+ Multi-agent coding systems exhibit a hierarchical phase structure:
71
+
72
+ 1. **Warm-up**: cache priming calls with no reasoning
73
+ 2. **Main agent**: full system prompt, full tool set
74
+ 3. **Explore**: parallel sub-agents with fresh context, reduced tool sets, role-specific prompts
75
+ 4. **Plan**: receives only summarized explore findings, not raw context
76
+ 5. **Execute**: main agent follows plan as checklist
77
+
78
+ Sub-agents receive fresh context (not the parent's) and a subset of tools. This
79
+ hierarchical context isolation creates stable prefixes within each sub-agent loop,
80
+ which is why multi-agent architectures achieve high prefix cache reuse rates.
81
+
82
+ Three fields capture this hierarchy:
83
+ - `parent_step`: tree edge (which main-agent step spawned this sub-agent)
84
+ - `agent_role`: phase label (main, explore, plan) for filtering without reading system prompts
85
+ - `subagent_trajectory_ref`: links to a separate TraceRecord when sub-agent trajectories are stored independently
86
+
87
+ ATIF provides the `subagent_trajectory_ref` pattern for multi-agent delegation.
88
+
89
+ ### Why call_type: main | subagent | warmup
90
+
91
+ Multi-agent systems include warm-up calls that exist purely to seed the KV cache.
92
+ They contain no reasoning and produce empty or minimal output, but are architecturally
93
+ significant: they explain why later calls achieve high prefix reuse.
94
+
95
+ For training data, warm-up calls should be filterable (they add noise to SFT datasets).
96
+ For caching analysis, they are essential. `call_type` enables both use cases.
97
+
98
+ ### Why system_prompt_hash + top-level system_prompts dict
99
+
100
+ In multi-agent sessions, system prompts can be 20K+ tokens and repeat identically
101
+ across every call within a sub-agent phase. Storing inline would multiply storage
102
+ dramatically for long sessions.
103
+
104
+ Hash-keyed deduplication: store each unique system prompt once in a top-level dict,
105
+ reference by hash in each step. This separates the queryable step metadata from
106
+ the bulk content.
107
+
108
+ ### Why reasoning_content as explicit field
109
+
110
+ Extended thinking and chain-of-thought content is returned in a separate field by
111
+ LLM APIs that support it. Benchmark evaluations suggest that including hidden
112
+ reasoning in training data improves downstream task performance.
113
+
114
+ A dedicated `reasoning_content` field preserves the API-level separation for training
115
+ pipelines that may want to include or exclude chain-of-thought independently of the
116
+ main content.
117
+
118
+ ### Why output_summary on Observation
119
+
120
+ In multi-agent architectures, downstream sub-agents (e.g. plan phase) receive only
121
+ summarized findings from upstream sub-agents (e.g. explore phase), not raw results.
122
+ This is an information bottleneck pattern.
123
+
124
+ `output_summary` serves the same purpose for trace consumers: scan summaries to assess
125
+ relevance without loading full tool outputs (which can be megabytes for file reads or
126
+ grep results over large codebases).
127
+
128
+
129
+ ## ToolCall and Observation: Separated
130
+
131
+ ### Why tool calls and observations are separate lists
132
+
133
+ Training pipelines (SFT, RLHF) depend on clean tool_call / tool_result separation
134
+ for learning tool selection and result interpretation as distinct capabilities.
135
+ ATIF and ADP both maintain this separation. Observations link back via `source_call_id`
136
+ for 1:1 matching. Dangling tool calls (agent requested a tool but no result was
137
+ recorded) are marked with `error: "no_result"` rather than dropped.
138
+
139
+
140
+ ## TokenUsage: Cache-Aware
141
+
142
+ ### Why prefix_reuse_tokens, cache_read_tokens, cache_write_tokens
143
+
144
+ Prefix reuse is the dominant cost driver for multi-agent architectures. Empirical
145
+ measurements show that well-structured hierarchical agents achieve 90%+ prefix reuse
146
+ (representing 80%+ cost savings), while agents with dynamic mid-prompt mutations
147
+ achieve under 50%, and template-based agents under 5%.
148
+
149
+ | Architecture pattern | Typical prefix reuse | Implication |
150
+ |---------------------|---------------------|-------------|
151
+ | Hierarchical multi-agent (stable prefixes) | 90%+ | Well-structured, cache-friendly |
152
+ | Dynamic memory mutation | ~40-50% | Memory updates break prefix alignment |
153
+ | Template-based prompting | <5% | Variable insertion near prompt start destroys caching |
154
+
155
+ Per-step cache breakdown enables phase-level cost analysis and cross-architecture
156
+ comparisons.
157
+
158
+
159
+ ## Outcome: RL-Ready Signals
160
+
161
+ ### Why success + signal_source + signal_confidence
162
+
163
+ The training community needs trajectory-level reward signals for RLHF, DPO, and RLVR.
164
+ Most existing community trace datasets lack outcome fields entirely, making them
165
+ unsuitable for reward modeling without manual annotation.
166
+
167
+ Three confidence tiers communicate trustworthiness:
168
+ - **derived**: deterministic extraction (e.g. `committed` from git state)
169
+ - **inferred**: heuristic-based (e.g. success from test output patterns)
170
+ - **annotated**: human or CI label
171
+
172
+ This lets training pipelines filter by confidence: use only `derived` signals for
173
+ high-confidence reward, include `inferred` for larger but noisier datasets.
174
+
175
+ ### Why committed + commit_sha
176
+
177
+ The cheapest deterministic quality signal available: did the agent's changes get
178
+ committed to git? Derivable from git state with zero annotation cost, zero LLM
179
+ enrichment. Aligns with the project principle: zero required annotation, all
180
+ enrichment in v0.1.0 is deterministic.
181
+
182
+
183
+ ## Attribution: Embedded Agent Trace Block
184
+
185
+ ### Why embed attribution rather than link externally
186
+
187
+ The Agent Trace spec stores attribution in a separate file from trajectories.
188
+ This schema bridges both: trajectory (process) AND attribution (output) in a single
189
+ record. Embedding keeps traces self-contained for dataset consumption, where each
190
+ JSONL line should be independently useful without external file lookups.
191
+
192
+ Marked `experimental: true` because attribution confidence varies by session
193
+ complexity: single-file edits produce high-confidence attribution, multi-file
194
+ refactors with interleaved tool calls produce lower confidence.
195
+
196
+ ### Why murmur3 for AttributionRange.content_hash
197
+
198
+ The Agent Trace spec uses `algorithm:value` format (e.g. `murmur3:9f2e8a1b`) for
199
+ position-independent content tracking. murmur3 is fast and non-cryptographic,
200
+ sufficient for detecting code movement across refactors and file renames.
201
+
202
+ SHA-256 is used at the trace level (collision resistance needed for dedup integrity),
203
+ murmur3 at the line-range level (speed needed, no security requirement).
204
+
205
+
206
+ ## SecurityMetadata: 3-Tier
207
+
208
+ ### Why a 3-tier security model
209
+
210
+ - **Tier 1** (open): full content, suitable for public repos and open-source projects
211
+ - **Tier 2** (guarded): redacted secrets, anonymized paths/usernames
212
+ - **Tier 3** (strict): structural metadata only, no content
213
+
214
+ Existing trace-sharing tools typically offer a single redaction mode: everything is
215
+ processed the same way. A tier system enables per-project configuration with
216
+ per-session override, so a user working on both open-source and proprietary code
217
+ can publish traces from both with appropriate protection levels.
218
+
219
+
220
+ ## Environment and VCS
221
+
222
+ ### Why capture OS, shell, language_ecosystem
223
+
224
+ Reproducibility: the same task on macOS vs Linux may produce different agent behavior
225
+ (different shell commands, different tool availability, different file paths).
226
+ Filtering: researchers can select traces by ecosystem (Python, TypeScript, Rust)
227
+ to build domain-specific training datasets without parsing file extensions from
228
+ tool call arguments.
229
+
230
+
231
+ ## Deliberate Exclusions
232
+
233
+ ### No token IDs or logprobs
234
+
235
+ ATIF v1.6 includes `prompt_token_ids`, `completion_token_ids`, and `logprobs` per
236
+ step, enabling RL without retokenization drift. These fields are not available from
237
+ agent CLI tools (Claude Code, Cursor, Codex CLI, Gemini CLI) because they intercept
238
+ at the application layer, not the inference layer.
239
+
240
+ Planned for inclusion when agent APIs expose them. Until then, ATIF export
241
+ (`opentraces export --format atif`) will bridge this gap for training pipelines
242
+ that need token-level data from other sources.
243
+
244
+ ### No OTel span IDs
245
+
246
+ OTel provides distributed tracing metadata (trace_id, span_id, parent_span_id)
247
+ designed for production observability across microservices. Our `trace_id` is
248
+ session-scoped, not request-scoped. OTel interop is planned via export, not
249
+ native embedding, because the primary consumers of this schema are training
250
+ pipelines and researchers, not production monitoring dashboards.
251
+
252
+ ### No AGENTS.md content
253
+
254
+ AGENTS.md is a project-level instruction file for agents, not trace data. While
255
+ it provides context for understanding agent behavior, embedding it in every trace
256
+ record would be wasteful. It may be referenced via the `metadata` dict in future
257
+ versions if there is demand.
258
+
259
+ ### No LLM enrichment fields
260
+
261
+ Fields like `task_description`, `domain_tags`, and `task_type` are not in v0.1.0
262
+ because they require LLM inference to generate. The project principle is zero
263
+ required annotation: all enrichment in v0.1.0 is deterministic. LLM-enriched
264
+ metadata may be added as optional fields in a future minor version.
@@ -0,0 +1,57 @@
1
+ # opentraces-schema
2
+
3
+ Pydantic v2 models for the opentraces.ai JSONL trace format.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install -e packages/opentraces-schema
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from opentraces_schema import TraceRecord, SCHEMA_VERSION
15
+
16
+ record = TraceRecord(
17
+ trace_id="abc-123",
18
+ session_id="sess-456",
19
+ agent={"name": "claude-code", "version": "1.0.32"},
20
+ )
21
+ line = record.to_jsonl_line()
22
+ ```
23
+
24
+ ## Version
25
+
26
+ The schema version (`0.1.0`) lives in `src/opentraces_schema/version.py` as the
27
+ single source of truth. See [VERSION-POLICY.md](VERSION-POLICY.md) for semver
28
+ semantics and the bump checklist.
29
+
30
+ ## Schema Rationale
31
+
32
+ Every version of the schema ships with a rationale document explaining why each
33
+ model and field exists, grounded in public standards (ATIF, Agent Trace, ADP, OTel)
34
+ and empirical observations from real agent traces.
35
+
36
+ The current rationale is [RATIONALE-0.1.0.md](RATIONALE-0.1.0.md). Each new version
37
+ will have its own rationale file linked from the [CHANGELOG](CHANGELOG.md).
38
+
39
+ ## Contributing
40
+
41
+ Schema feedback, questions, and proposals are welcome via
42
+ [GitHub Issues](https://github.com/opentraces/opentraces/issues). When suggesting
43
+ a schema change, please include:
44
+
45
+ - **What** field or model you would add, change, or remove
46
+ - **Why** it matters for your use case (training, analytics, attribution, etc.)
47
+ - **How** it relates to existing standards (ATIF, Agent Trace, ADP, OTel) if applicable
48
+
49
+ Breaking changes (field renames, removals, type changes) require a major version bump.
50
+ New optional fields and models are minor bumps. See [VERSION-POLICY.md](VERSION-POLICY.md)
51
+ for details.
52
+
53
+ ## Documentation
54
+
55
+ - [CHANGELOG.md](CHANGELOG.md) - What changed in each version
56
+ - [VERSION-POLICY.md](VERSION-POLICY.md) - What version numbers mean for a schema package
57
+ - [RATIONALE-0.1.0.md](RATIONALE-0.1.0.md) - Design rationale for v0.1.0
@@ -0,0 +1,39 @@
1
+ # Version Policy
2
+
3
+ opentraces-schema follows Semantic Versioning (semver) with schema-specific semantics.
4
+
5
+ ## What the version numbers mean
6
+
7
+ - **MAJOR** (X.0.0): Breaking changes to existing fields. Renaming, removing, or
8
+ changing the type of an existing field. Consumers must update parsers.
9
+ - **MINOR** (0.X.0): New optional fields, new models, new enum values added to
10
+ existing Literal types. Existing parsers continue to work without changes.
11
+ - **PATCH** (0.0.X): Docstring fixes, validation constraint adjustments that do
12
+ not change the serialized format, bug fixes in computed fields.
13
+
14
+ ## Pre-1.0 stability
15
+
16
+ During 0.x development, MINOR bumps may include breaking changes. The schema is
17
+ not yet stable. Pin to exact versions (`opentraces-schema==0.1.0`) rather than
18
+ ranges until 1.0.
19
+
20
+ ## Where the version lives
21
+
22
+ The single source of truth is `src/opentraces_schema/version.py`. The `SCHEMA_VERSION`
23
+ constant is used by:
24
+
25
+ - `pyproject.toml` via hatch dynamic versioning
26
+ - `TraceRecord.schema_version` default
27
+ - `Attribution.version` default
28
+
29
+ Note: `TraceRecord.schema_version` and `Attribution.version` are currently coupled
30
+ to the same `SCHEMA_VERSION`. If the attribution spec needs to version independently
31
+ in the future, this coupling can be broken by introducing a separate constant.
32
+
33
+ ## Bump checklist
34
+
35
+ 1. Update `SCHEMA_VERSION` in `src/opentraces_schema/version.py`
36
+ 2. Add entry to `CHANGELOG.md` under `[Unreleased]`, then move to new version header
37
+ 3. Create `RATIONALE-{VERSION}.md` documenting design decisions for the new version
38
+ 4. Link the rationale file from the CHANGELOG entry
39
+ 5. Tag the commit: `git tag schema-v{VERSION}`
@@ -0,0 +1,25 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "opentraces-schema"
7
+ dynamic = ["version"]
8
+ description = "Schema models for the opentraces.ai agent trace JSONL format"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ dependencies = [
13
+ "pydantic>=2.0",
14
+ ]
15
+
16
+ [project.urls]
17
+ Homepage = "https://opentraces.ai"
18
+ Repository = "https://github.com/JayFarei/opentraces"
19
+
20
+ [tool.hatch.version]
21
+ path = "src/opentraces_schema/version.py"
22
+ pattern = "SCHEMA_VERSION = \"(?P<version>[^\"]+)\""
23
+
24
+ [tool.hatch.build.targets.wheel]
25
+ packages = ["src/opentraces_schema"]
@@ -0,0 +1,43 @@
1
+ """opentraces-schema: Pydantic models for the opentraces.ai JSONL trace format."""
2
+
3
+ from .models import (
4
+ Agent,
5
+ Attribution,
6
+ AttributionConversation,
7
+ AttributionFile,
8
+ AttributionRange,
9
+ Environment,
10
+ Metrics,
11
+ Observation,
12
+ Outcome,
13
+ SecurityMetadata,
14
+ Snippet,
15
+ Step,
16
+ Task,
17
+ TokenUsage,
18
+ ToolCall,
19
+ TraceRecord,
20
+ VCS,
21
+ )
22
+ from .version import SCHEMA_VERSION
23
+
24
+ __all__ = [
25
+ "Agent",
26
+ "Attribution",
27
+ "AttributionConversation",
28
+ "AttributionFile",
29
+ "AttributionRange",
30
+ "Environment",
31
+ "Metrics",
32
+ "Observation",
33
+ "Outcome",
34
+ "SCHEMA_VERSION",
35
+ "SecurityMetadata",
36
+ "Snippet",
37
+ "Step",
38
+ "Task",
39
+ "TokenUsage",
40
+ "ToolCall",
41
+ "TraceRecord",
42
+ "VCS",
43
+ ]
@@ -0,0 +1,242 @@
1
+ """Pydantic v2 models for the opentraces.ai JSONL trace schema.
2
+
3
+ This module defines the complete schema for enriched agent session traces.
4
+ Each TraceRecord represents one complete agent session or task unit.
5
+
6
+ The schema is informed by ATIF v1.6, ADP, Agent Trace spec, and field patterns
7
+ found in existing HF datasets (nlile, Nebius, CoderForge).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import json
14
+ from typing import Any, Literal
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+ from .version import SCHEMA_VERSION
19
+
20
+
21
+ class Task(BaseModel):
22
+ """Structured task metadata for filtering and grouping."""
23
+
24
+ description: str | None = None
25
+ source: str | None = Field(None, description="user_prompt, cli_arg, skill, etc.")
26
+ repository: str | None = Field(None, description="owner/repo format")
27
+ base_commit: str | None = None
28
+
29
+
30
+ class Agent(BaseModel):
31
+ """Agent identity following provider/model convention."""
32
+
33
+ name: str = Field(description="Agent identifier: claude-code, cursor, codex, etc.")
34
+ version: str | None = None
35
+ model: str | None = Field(None, description="provider/model-name, e.g. anthropic/claude-sonnet-4-20250514")
36
+
37
+
38
+ class VCS(BaseModel):
39
+ """Version control metadata. type='none' when not in a git repo."""
40
+
41
+ type: Literal["git", "none"] = "none"
42
+ base_commit: str | None = None
43
+ branch: str | None = None
44
+ diff: str | None = Field(None, description="Unified diff string or null")
45
+
46
+
47
+ class Environment(BaseModel):
48
+ """Runtime environment metadata for filtering and reproducibility."""
49
+
50
+ os: str | None = None
51
+ shell: str | None = None
52
+ vcs: VCS = Field(default_factory=VCS)
53
+ language_ecosystem: list[str] = Field(default_factory=list)
54
+
55
+
56
+ class ToolCall(BaseModel):
57
+ """A single tool invocation within a step."""
58
+
59
+ tool_call_id: str
60
+ tool_name: str
61
+ input: dict[str, Any] = Field(default_factory=dict)
62
+ duration_ms: int | None = None
63
+
64
+
65
+ class Observation(BaseModel):
66
+ """Result of a tool call, linked back via source_call_id."""
67
+
68
+ source_call_id: str
69
+ content: str | None = None
70
+ output_summary: str | None = Field(None, description="Lightweight preview of tool result")
71
+ error: str | None = Field(None, description="Error info, e.g. 'no_result' for dangling tool calls")
72
+
73
+
74
+ class Snippet(BaseModel):
75
+ """Code block extracted from tool results or agent responses."""
76
+
77
+ file_path: str
78
+ start_line: int | None = None
79
+ end_line: int | None = None
80
+ language: str | None = None
81
+ text: str | None = None
82
+ source_step: int | None = Field(None, description="Step index that produced this snippet")
83
+
84
+
85
+ class TokenUsage(BaseModel):
86
+ """Per-step token usage breakdown for cost and efficiency analysis."""
87
+
88
+ input_tokens: int = 0
89
+ output_tokens: int = 0
90
+ cache_read_tokens: int = 0
91
+ cache_write_tokens: int = 0
92
+ prefix_reuse_tokens: int = 0
93
+
94
+
95
+ class Step(BaseModel):
96
+ """A single LLM API call (request + response) in the TAO loop.
97
+
98
+ Each step represents one thought-action-observation cycle, not a
99
+ conversational turn. This aligns with ATIF's step-based model.
100
+ """
101
+
102
+ step_index: int
103
+ role: Literal["system", "user", "agent"]
104
+ content: str | None = None
105
+ reasoning_content: str | None = Field(None, description="Chain-of-thought / extended thinking")
106
+ model: str | None = None
107
+ system_prompt_hash: str | None = Field(None, description="Key into top-level system_prompts map")
108
+ agent_role: str | None = Field(None, description="main, explore, plan, etc.")
109
+ parent_step: int | None = Field(None, description="Step index of parent for sub-agent hierarchy")
110
+ call_type: Literal["main", "subagent", "warmup"] | None = None
111
+ subagent_trajectory_ref: str | None = Field(None, description="Session ID of sub-agent trajectory")
112
+ tools_available: list[str] = Field(default_factory=list)
113
+ tool_calls: list[ToolCall] = Field(default_factory=list)
114
+ observations: list[Observation] = Field(default_factory=list)
115
+ snippets: list[Snippet] = Field(default_factory=list)
116
+ token_usage: TokenUsage = Field(default_factory=TokenUsage)
117
+ timestamp: str | None = None
118
+
119
+
120
+ class Outcome(BaseModel):
121
+ """Session outcome signals for RL/reward modeling.
122
+
123
+ signal_confidence indicates how trustworthy the signal is:
124
+ - derived: deterministic extraction (e.g. committed from git)
125
+ - inferred: heuristic-based (e.g. success from test output patterns)
126
+ - annotated: human or CI annotation
127
+ """
128
+
129
+ success: bool | None = None
130
+ signal_source: str = "deterministic"
131
+ signal_confidence: Literal["derived", "inferred", "annotated"] = "derived"
132
+ description: str | None = None
133
+ patch: str | None = Field(None, description="Unified diff produced by the session")
134
+ committed: bool = False
135
+ commit_sha: str | None = None
136
+
137
+
138
+ class AttributionRange(BaseModel):
139
+ """A range of lines attributed to an agent conversation."""
140
+
141
+ start_line: int
142
+ end_line: int
143
+ content_hash: str | None = Field(None, description="murmur3 hash for cross-refactor tracking")
144
+ confidence: Literal["high", "medium", "low"] | None = None
145
+
146
+
147
+ class AttributionConversation(BaseModel):
148
+ """Links attributed code ranges to the conversation that produced them."""
149
+
150
+ contributor: dict[str, str] = Field(
151
+ default_factory=dict,
152
+ description="e.g. {type: 'ai', model_id: 'anthropic/claude-sonnet-4-20250514'}",
153
+ )
154
+ url: str | None = Field(None, description="opentraces://trace_id/step_N")
155
+ ranges: list[AttributionRange] = Field(default_factory=list)
156
+
157
+
158
+ class AttributionFile(BaseModel):
159
+ """Attribution data for a single file."""
160
+
161
+ path: str
162
+ conversations: list[AttributionConversation] = Field(default_factory=list)
163
+
164
+
165
+ class Attribution(BaseModel):
166
+ """Embedded Agent Trace-compatible attribution block.
167
+
168
+ Bridges trajectory (process) and attribution (output). Records which
169
+ files and line ranges were produced by the agent session.
170
+
171
+ Marked experimental in v0.1 - confidence varies by session complexity.
172
+ """
173
+
174
+ version: str = SCHEMA_VERSION
175
+ experimental: bool = True
176
+ files: list[AttributionFile] = Field(default_factory=list)
177
+
178
+
179
+ class Metrics(BaseModel):
180
+ """Aggregated session-level metrics for analytics and cost modeling."""
181
+
182
+ total_steps: int = 0
183
+ total_input_tokens: int = 0
184
+ total_output_tokens: int = 0
185
+ total_duration_s: float | None = None
186
+ cache_hit_rate: float | None = Field(None, ge=0.0, le=1.0)
187
+ estimated_cost_usd: float | None = None
188
+
189
+
190
+ class SecurityMetadata(BaseModel):
191
+ """Records what security processing was applied and what was flagged/redacted."""
192
+
193
+ scanned: bool = False
194
+ flags_reviewed: int = 0
195
+ redactions_applied: int = 0
196
+ classifier_version: str | None = None
197
+
198
+
199
+ class TraceRecord(BaseModel):
200
+ """Top-level model for one complete agent session trace.
201
+
202
+ Each line in the JSONL file is one TraceRecord. The schema bridges
203
+ trajectory data (ATIF/ADP) with code attribution (Agent Trace spec),
204
+ creating the complete record of process + output.
205
+ """
206
+
207
+ schema_version: str = SCHEMA_VERSION
208
+ trace_id: str
209
+ session_id: str
210
+ content_hash: str | None = None
211
+ timestamp_start: str | None = None
212
+ timestamp_end: str | None = None
213
+ task: Task = Field(default_factory=Task)
214
+ agent: Agent
215
+ environment: Environment = Field(default_factory=Environment)
216
+ system_prompts: dict[str, str] = Field(
217
+ default_factory=dict,
218
+ description="Deduplicated system prompts keyed by hash",
219
+ )
220
+ tool_definitions: list[dict[str, Any]] = Field(default_factory=list)
221
+ steps: list[Step] = Field(default_factory=list)
222
+ outcome: Outcome = Field(default_factory=Outcome)
223
+ dependencies: list[str] = Field(default_factory=list)
224
+ metrics: Metrics = Field(default_factory=Metrics)
225
+ security: SecurityMetadata = Field(default_factory=SecurityMetadata)
226
+ attribution: Attribution | None = None
227
+ metadata: dict[str, Any] = Field(default_factory=dict)
228
+
229
+ def compute_content_hash(self) -> str:
230
+ """Compute SHA-256 hash of the trace content for deduplication.
231
+
232
+ Excludes content_hash and trace_id so re-parsing identical content
233
+ produces the same hash regardless of the random UUID assigned.
234
+ """
235
+ data = self.model_dump(exclude={"content_hash", "trace_id"})
236
+ serialized = json.dumps(data, sort_keys=True, default=str)
237
+ return hashlib.sha256(serialized.encode()).hexdigest()
238
+
239
+ def to_jsonl_line(self) -> str:
240
+ """Serialize to a single JSONL line with computed content_hash."""
241
+ self.content_hash = self.compute_content_hash()
242
+ return self.model_dump_json()
@@ -0,0 +1,3 @@
1
+ """Schema version for opentraces JSONL format."""
2
+
3
+ SCHEMA_VERSION = "0.1.0"