open-agent-traces 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. open_agent_traces-0.1.0/.github/workflows/ci.yml +35 -0
  2. open_agent_traces-0.1.0/.github/workflows/publish.yml +73 -0
  3. open_agent_traces-0.1.0/.gitignore +16 -0
  4. open_agent_traces-0.1.0/.python-version +1 -0
  5. open_agent_traces-0.1.0/LICENSE +21 -0
  6. open_agent_traces-0.1.0/PKG-INFO +153 -0
  7. open_agent_traces-0.1.0/README.md +113 -0
  8. open_agent_traces-0.1.0/docs/announcements/linkedin.md +18 -0
  9. open_agent_traces-0.1.0/docs/announcements/x.md +7 -0
  10. open_agent_traces-0.1.0/docs/parallel-workflow-example.png +0 -0
  11. open_agent_traces-0.1.0/docs/quickstart.md +117 -0
  12. open_agent_traces-0.1.0/docs/user-guide.md +328 -0
  13. open_agent_traces-0.1.0/examples/basic_generation.py +59 -0
  14. open_agent_traces-0.1.0/examples/conformance_demo.py +49 -0
  15. open_agent_traces-0.1.0/pyproject.toml +95 -0
  16. open_agent_traces-0.1.0/schemas/ocel20-schema-json.json +117 -0
  17. open_agent_traces-0.1.0/src/ocelgen/__init__.py +3 -0
  18. open_agent_traces-0.1.0/src/ocelgen/cli.py +421 -0
  19. open_agent_traces-0.1.0/src/ocelgen/deviations/__init__.py +1 -0
  20. open_agent_traces-0.1.0/src/ocelgen/deviations/injector.py +58 -0
  21. open_agent_traces-0.1.0/src/ocelgen/deviations/registry.py +39 -0
  22. open_agent_traces-0.1.0/src/ocelgen/deviations/strategies.py +737 -0
  23. open_agent_traces-0.1.0/src/ocelgen/deviations/types.py +55 -0
  24. open_agent_traces-0.1.0/src/ocelgen/enrichment/__init__.py +1 -0
  25. open_agent_traces-0.1.0/src/ocelgen/enrichment/client.py +129 -0
  26. open_agent_traces-0.1.0/src/ocelgen/enrichment/enricher.py +475 -0
  27. open_agent_traces-0.1.0/src/ocelgen/enrichment/prompts.py +71 -0
  28. open_agent_traces-0.1.0/src/ocelgen/export/__init__.py +1 -0
  29. open_agent_traces-0.1.0/src/ocelgen/export/manifest.py +71 -0
  30. open_agent_traces-0.1.0/src/ocelgen/export/normative.py +22 -0
  31. open_agent_traces-0.1.0/src/ocelgen/export/ocel_json.py +22 -0
  32. open_agent_traces-0.1.0/src/ocelgen/generation/__init__.py +1 -0
  33. open_agent_traces-0.1.0/src/ocelgen/generation/attributes.py +81 -0
  34. open_agent_traces-0.1.0/src/ocelgen/generation/engine.py +125 -0
  35. open_agent_traces-0.1.0/src/ocelgen/generation/run_simulator.py +579 -0
  36. open_agent_traces-0.1.0/src/ocelgen/generation/timestamp.py +44 -0
  37. open_agent_traces-0.1.0/src/ocelgen/models/__init__.py +1 -0
  38. open_agent_traces-0.1.0/src/ocelgen/models/langchain.py +42 -0
  39. open_agent_traces-0.1.0/src/ocelgen/models/ocel.py +130 -0
  40. open_agent_traces-0.1.0/src/ocelgen/models/workflow.py +121 -0
  41. open_agent_traces-0.1.0/src/ocelgen/patterns/__init__.py +1 -0
  42. open_agent_traces-0.1.0/src/ocelgen/patterns/base.py +25 -0
  43. open_agent_traces-0.1.0/src/ocelgen/patterns/parallel.py +89 -0
  44. open_agent_traces-0.1.0/src/ocelgen/patterns/sequential.py +65 -0
  45. open_agent_traces-0.1.0/src/ocelgen/patterns/supervisor.py +87 -0
  46. open_agent_traces-0.1.0/src/ocelgen/scenarios/__init__.py +13 -0
  47. open_agent_traces-0.1.0/src/ocelgen/scenarios/domain.py +22 -0
  48. open_agent_traces-0.1.0/src/ocelgen/scenarios/loader.py +115 -0
  49. open_agent_traces-0.1.0/src/ocelgen/scenarios/registry.py +355 -0
  50. open_agent_traces-0.1.0/src/ocelgen/upload/__init__.py +1 -0
  51. open_agent_traces-0.1.0/src/ocelgen/upload/flatten.py +113 -0
  52. open_agent_traces-0.1.0/src/ocelgen/upload/hf_upload.py +215 -0
  53. open_agent_traces-0.1.0/src/ocelgen/upload/readme.py +266 -0
  54. open_agent_traces-0.1.0/src/ocelgen/validation/__init__.py +1 -0
  55. open_agent_traces-0.1.0/src/ocelgen/validation/schema.py +42 -0
  56. open_agent_traces-0.1.0/tests/__init__.py +0 -0
  57. open_agent_traces-0.1.0/tests/test_cli_new.py +61 -0
  58. open_agent_traces-0.1.0/tests/test_deviations.py +265 -0
  59. open_agent_traces-0.1.0/tests/test_enrichment.py +390 -0
  60. open_agent_traces-0.1.0/tests/test_flatten.py +63 -0
  61. open_agent_traces-0.1.0/tests/test_integration.py +242 -0
  62. open_agent_traces-0.1.0/tests/test_ocel_models.py +199 -0
  63. open_agent_traces-0.1.0/tests/test_patterns.py +139 -0
  64. open_agent_traces-0.1.0/tests/test_scenarios.py +105 -0
  65. open_agent_traces-0.1.0/tests/test_sequential.py +148 -0
  66. open_agent_traces-0.1.0/tests/test_upload.py +122 -0
  67. open_agent_traces-0.1.0/tests/test_yaml_loader.py +276 -0
  68. open_agent_traces-0.1.0/uv.lock +2074 -0
@@ -0,0 +1,35 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v4
21
+
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ run: uv python install ${{ matrix.python-version }}
24
+
25
+ - name: Install dependencies
26
+ run: uv sync --extra dev
27
+
28
+ - name: Lint
29
+ run: uv run ruff check src tests
30
+
31
+ - name: Type check
32
+ run: uv run mypy src
33
+
34
+ - name: Test
35
+ run: uv run pytest tests/ -v --tb=short
@@ -0,0 +1,73 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ python-version: ["3.11", "3.12"]
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v4
19
+
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ run: uv python install ${{ matrix.python-version }}
22
+
23
+ - name: Install dependencies
24
+ run: uv sync --extra dev
25
+
26
+ - name: Lint
27
+ run: uv run ruff check src tests
28
+
29
+ - name: Type check
30
+ run: uv run mypy src
31
+
32
+ - name: Test
33
+ run: uv run pytest tests/ -v --tb=short
34
+
35
+ publish-testpypi:
36
+ needs: test
37
+ if: contains(github.ref_name, 'rc') || contains(github.ref_name, 'dev')
38
+ runs-on: ubuntu-latest
39
+ environment: release
40
+ permissions:
41
+ id-token: write
42
+ steps:
43
+ - uses: actions/checkout@v4
44
+
45
+ - name: Install uv
46
+ uses: astral-sh/setup-uv@v4
47
+
48
+ - name: Build package
49
+ run: uv build
50
+
51
+ - name: Publish to TestPyPI
52
+ uses: pypa/gh-action-pypi-publish@release/v1
53
+ with:
54
+ repository-url: https://test.pypi.org/legacy/
55
+
56
+ publish-pypi:
57
+ needs: test
58
+ if: "!contains(github.ref_name, 'rc') && !contains(github.ref_name, 'dev')"
59
+ runs-on: ubuntu-latest
60
+ environment: release
61
+ permissions:
62
+ id-token: write
63
+ steps:
64
+ - uses: actions/checkout@v4
65
+
66
+ - name: Install uv
67
+ uses: astral-sh/setup-uv@v4
68
+
69
+ - name: Build package
70
+ run: uv build
71
+
72
+ - name: Publish to PyPI
73
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # macOS
13
+ .DS_Store
14
+
15
+ # Claude/superpowers
16
+ docs/superpowers/
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Julien Simon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: open-agent-traces
3
+ Version: 0.1.0
4
+ Summary: Mock OCEL 2.0 event log generator for LangChain multi-agent runs
5
+ Project-URL: Homepage, https://github.com/juliensimon/ocel-generator
6
+ Project-URL: Documentation, https://github.com/juliensimon/ocel-generator/tree/main/docs
7
+ Project-URL: Repository, https://github.com/juliensimon/ocel-generator
8
+ Project-URL: Issues, https://github.com/juliensimon/ocel-generator/issues
9
+ Project-URL: Dataset, https://huggingface.co/datasets/juliensimon/open-agent-traces
10
+ Author-email: Julien Simon <julien@arcee.ai>
11
+ License-Expression: MIT
12
+ License-File: LICENSE
13
+ Keywords: agent-traces,ai-agents,anomaly-detection,langchain,llm,multi-agent,observability,ocel,process-mining,synthetic-data
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Testing :: Traffic Generation
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: faker>=24.0
24
+ Requires-Dist: huggingface-hub>=0.20
25
+ Requires-Dist: jsonschema>=4.21
26
+ Requires-Dist: openai>=1.0
27
+ Requires-Dist: pyarrow>=15.0
28
+ Requires-Dist: pydantic>=2.6
29
+ Requires-Dist: pyyaml>=6.0
30
+ Requires-Dist: rich>=13.0
31
+ Requires-Dist: typer>=0.12
32
+ Provides-Extra: conformance
33
+ Requires-Dist: pm4py>=2.7; extra == 'conformance'
34
+ Provides-Extra: dev
35
+ Requires-Dist: mypy>=1.8; extra == 'dev'
36
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
37
+ Requires-Dist: pytest>=8.0; extra == 'dev'
38
+ Requires-Dist: ruff>=0.3; extra == 'dev'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # ocelgen — Open Agent Traces Dataset Generator
42
+
43
+ Generate realistic multi-agent workflow trace datasets with LLM-enriched content. Built for the AI agent ecosystem.
44
+
45
+ [![Dataset on HF](https://img.shields.io/badge/%F0%9F%A4%97%20Dataset-open--agent--traces-yellow)](https://huggingface.co/datasets/juliensimon/open-agent-traces)
46
+ [![PyPI](https://img.shields.io/pypi/v/open-agent-traces)](https://pypi.org/project/open-agent-traces/)
47
+ [![CI](https://github.com/juliensimon/ocel-generator/actions/workflows/ci.yml/badge.svg)](https://github.com/juliensimon/ocel-generator/actions)
48
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
49
+ [![Python 3.11+](https://img.shields.io/badge/Python-3.11%2B-blue.svg)](https://python.org)
50
+ [![OCEL 2.0](https://img.shields.io/badge/OCEL-2.0-orange.svg)](https://www.ocel-standard.org/)
51
+ [![OpenAI Compatible](https://img.shields.io/badge/API-OpenAI%20Compatible-lightgrey.svg)](docs/user-guide.md#model-and-endpoint-configuration)
52
+
53
+ ![Parallel workflow trace — market research domain](docs/parallel-workflow-example.png)
54
+
55
+ ## The problem
56
+
57
+ Real agent traces are scarce. Production multi-agent systems generate rich execution data — LLM prompts, tool calls, agent reasoning, handoff messages — but these traces are proprietary and rarely shared. Teams building agent observability, evaluation, and debugging tools lack open datasets to develop against.
58
+
59
+ ## The solution
60
+
61
+ ocelgen generates **structurally valid, semantically rich** agent traces that look and feel like real multi-agent executions:
62
+
63
+ - **Full trace content** — LLM prompts and completions, tool call inputs/outputs, agent reasoning, inter-agent messages
64
+ - **10 enterprise domains** — customer support, code review, incident response, financial analysis, and 6 more (plus custom domains via YAML)
65
+ - **3 workflow patterns** — sequential, supervisor/worker, parallel fan-out/fan-in
66
+ - **Labeled deviations** — 10 types of anomalies (wrong tools, skipped steps, timeouts) with ground-truth annotations
67
+ - **OCEL 2.0 standard** — compatible with process mining tools (PM4Py, Celonis)
68
+ - **Any LLM backend** — OpenRouter, OpenAI, Anthropic, local models via OpenAI-compatible API
69
+
70
+ ## Quick start
71
+
72
+ ```bash
73
+ pip install open-agent-traces
74
+ ```
75
+
76
+ ### Development setup
77
+
78
+ ```bash
79
+ git clone https://github.com/juliensimon/ocel-generator.git && cd ocel-generator
80
+ uv sync
81
+ ```
82
+
83
+ ### LLM setup
84
+
85
+ Enrichment requires an OpenAI-compatible endpoint. Pick one:
86
+
87
+ **Cloud (OpenRouter, OpenAI, etc.)**
88
+ ```bash
89
+ export OPENAI_API_KEY="your-key"
90
+ # Default: OpenRouter with Gemini Flash. Override with --model:
91
+ ocelgen enrich output.jsonocel -d customer-support-triage --model anthropic/claude-sonnet-4
92
+ ```
93
+
94
+ **Local (llama.cpp, Ollama, vLLM, etc.)**
95
+ ```bash
96
+ # Example: start llama.cpp with auto-download from Hugging Face
97
+ llama-server -hfr unsloth/Qwen3-30B-A3B-GGUF:Q6_K -ngl 99 -c 4096
98
+
99
+ # Point ocelgen at the local endpoint (no API key needed)
100
+ ocelgen enrich output.jsonocel -d customer-support-triage \
101
+ --model unsloth/Qwen3-30B-A3B-GGUF:Q6_K \
102
+ --base-url http://localhost:8080/v1
103
+ ```
104
+
105
+ ### Generate and enrich
106
+
107
+ ```bash
108
+ # Generate traces
109
+ ocelgen generate --pattern sequential --runs 50 --noise 0.2
110
+
111
+ # Enrich with LLM-generated content
112
+ ocelgen enrich output.jsonocel --domain customer-support-triage
113
+
114
+ # Or run the full pipeline (generate + enrich + upload to HF)
115
+ ocelgen pipeline --domain customer-support-triage --namespace your-hf-username
116
+
117
+ # Use custom domains defined in YAML
118
+ ocelgen pipeline --domain my-domain --config domains.yaml --namespace your-hf-username
119
+ ```
120
+
121
+ ## Use the pre-built dataset
122
+
123
+ Skip generation — load the dataset directly from Hugging Face:
124
+
125
+ ```python
126
+ from datasets import load_dataset
127
+
128
+ ds = load_dataset("juliensimon/open-agent-traces", "incident-response")
129
+
130
+ for event in ds["train"]:
131
+ if event["run_id"] == "run-0000":
132
+ print(f"{event['event_type']:25s} | {event['agent_role']:12s} | {event['reasoning'][:60] if event['reasoning'] else ''}")
133
+ ```
134
+
135
+ 10 domains available: `customer-support-triage` · `code-review-pipeline` · `market-research` · `legal-document-analysis` · `data-pipeline-debugging` · `content-generation` · `financial-analysis` · `incident-response` · `academic-paper-review` · `ecommerce-product-enrichment`
136
+
137
+ ## Who is this for?
138
+
139
+ - **Agent observability teams** — build dashboards with realistic trace data (timestamps, token counts, costs)
140
+ - **ML researchers** — train anomaly detectors on labeled conformant vs deviant traces
141
+ - **Process mining researchers** — apply OCEL 2.0 conformance checking to agent workflows
142
+ - **Agent framework developers** — test LangGraph, CrewAI, AutoGen, Smolagents against realistic traces
143
+ - **Evaluation teams** — benchmark agent reasoning quality across domains and architectures
144
+
145
+ ## Documentation
146
+
147
+ - **[Quick Start](docs/quickstart.md)** — first dataset in 5 minutes
148
+ - **[User Guide](docs/user-guide.md)** — CLI reference, patterns, domains, custom YAML config, model configuration
149
+ - **[Dataset on Hugging Face](https://huggingface.co/datasets/juliensimon/open-agent-traces)** — 17,000+ events, ready to use
150
+
151
+ ## License
152
+
153
+ MIT
@@ -0,0 +1,113 @@
1
+ # ocelgen — Open Agent Traces Dataset Generator
2
+
3
+ Generate realistic multi-agent workflow trace datasets with LLM-enriched content. Built for the AI agent ecosystem.
4
+
5
+ [![Dataset on HF](https://img.shields.io/badge/%F0%9F%A4%97%20Dataset-open--agent--traces-yellow)](https://huggingface.co/datasets/juliensimon/open-agent-traces)
6
+ [![PyPI](https://img.shields.io/pypi/v/open-agent-traces)](https://pypi.org/project/open-agent-traces/)
7
+ [![CI](https://github.com/juliensimon/ocel-generator/actions/workflows/ci.yml/badge.svg)](https://github.com/juliensimon/ocel-generator/actions)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
9
+ [![Python 3.11+](https://img.shields.io/badge/Python-3.11%2B-blue.svg)](https://python.org)
10
+ [![OCEL 2.0](https://img.shields.io/badge/OCEL-2.0-orange.svg)](https://www.ocel-standard.org/)
11
+ [![OpenAI Compatible](https://img.shields.io/badge/API-OpenAI%20Compatible-lightgrey.svg)](docs/user-guide.md#model-and-endpoint-configuration)
12
+
13
+ ![Parallel workflow trace — market research domain](docs/parallel-workflow-example.png)
14
+
15
+ ## The problem
16
+
17
+ Real agent traces are scarce. Production multi-agent systems generate rich execution data — LLM prompts, tool calls, agent reasoning, handoff messages — but these traces are proprietary and rarely shared. Teams building agent observability, evaluation, and debugging tools lack open datasets to develop against.
18
+
19
+ ## The solution
20
+
21
+ ocelgen generates **structurally valid, semantically rich** agent traces that look and feel like real multi-agent executions:
22
+
23
+ - **Full trace content** — LLM prompts and completions, tool call inputs/outputs, agent reasoning, inter-agent messages
24
+ - **10 enterprise domains** — customer support, code review, incident response, financial analysis, and 6 more (plus custom domains via YAML)
25
+ - **3 workflow patterns** — sequential, supervisor/worker, parallel fan-out/fan-in
26
+ - **Labeled deviations** — 10 types of anomalies (wrong tools, skipped steps, timeouts) with ground-truth annotations
27
+ - **OCEL 2.0 standard** — compatible with process mining tools (PM4Py, Celonis)
28
+ - **Any LLM backend** — OpenRouter, OpenAI, Anthropic, local models via OpenAI-compatible API
29
+
30
+ ## Quick start
31
+
32
+ ```bash
33
+ pip install open-agent-traces
34
+ ```
35
+
36
+ ### Development setup
37
+
38
+ ```bash
39
+ git clone https://github.com/juliensimon/ocel-generator.git && cd ocel-generator
40
+ uv sync
41
+ ```
42
+
43
+ ### LLM setup
44
+
45
+ Enrichment requires an OpenAI-compatible endpoint. Pick one:
46
+
47
+ **Cloud (OpenRouter, OpenAI, etc.)**
48
+ ```bash
49
+ export OPENAI_API_KEY="your-key"
50
+ # Default: OpenRouter with Gemini Flash. Override with --model:
51
+ ocelgen enrich output.jsonocel -d customer-support-triage --model anthropic/claude-sonnet-4
52
+ ```
53
+
54
+ **Local (llama.cpp, Ollama, vLLM, etc.)**
55
+ ```bash
56
+ # Example: start llama.cpp with auto-download from Hugging Face
57
+ llama-server -hfr unsloth/Qwen3-30B-A3B-GGUF:Q6_K -ngl 99 -c 4096
58
+
59
+ # Point ocelgen at the local endpoint (no API key needed)
60
+ ocelgen enrich output.jsonocel -d customer-support-triage \
61
+ --model unsloth/Qwen3-30B-A3B-GGUF:Q6_K \
62
+ --base-url http://localhost:8080/v1
63
+ ```
64
+
65
+ ### Generate and enrich
66
+
67
+ ```bash
68
+ # Generate traces
69
+ ocelgen generate --pattern sequential --runs 50 --noise 0.2
70
+
71
+ # Enrich with LLM-generated content
72
+ ocelgen enrich output.jsonocel --domain customer-support-triage
73
+
74
+ # Or run the full pipeline (generate + enrich + upload to HF)
75
+ ocelgen pipeline --domain customer-support-triage --namespace your-hf-username
76
+
77
+ # Use custom domains defined in YAML
78
+ ocelgen pipeline --domain my-domain --config domains.yaml --namespace your-hf-username
79
+ ```
80
+
81
+ ## Use the pre-built dataset
82
+
83
+ Skip generation — load the dataset directly from Hugging Face:
84
+
85
+ ```python
86
+ from datasets import load_dataset
87
+
88
+ ds = load_dataset("juliensimon/open-agent-traces", "incident-response")
89
+
90
+ for event in ds["train"]:
91
+ if event["run_id"] == "run-0000":
92
+ print(f"{event['event_type']:25s} | {event['agent_role']:12s} | {event['reasoning'][:60] if event['reasoning'] else ''}")
93
+ ```
94
+
95
+ 10 domains available: `customer-support-triage` · `code-review-pipeline` · `market-research` · `legal-document-analysis` · `data-pipeline-debugging` · `content-generation` · `financial-analysis` · `incident-response` · `academic-paper-review` · `ecommerce-product-enrichment`
96
+
97
+ ## Who is this for?
98
+
99
+ - **Agent observability teams** — build dashboards with realistic trace data (timestamps, token counts, costs)
100
+ - **ML researchers** — train anomaly detectors on labeled conformant vs deviant traces
101
+ - **Process mining researchers** — apply OCEL 2.0 conformance checking to agent workflows
102
+ - **Agent framework developers** — test LangGraph, CrewAI, AutoGen, Smolagents against realistic traces
103
+ - **Evaluation teams** — benchmark agent reasoning quality across domains and architectures
104
+
105
+ ## Documentation
106
+
107
+ - **[Quick Start](docs/quickstart.md)** — first dataset in 5 minutes
108
+ - **[User Guide](docs/user-guide.md)** — CLI reference, patterns, domains, custom YAML config, model configuration
109
+ - **[Dataset on Hugging Face](https://huggingface.co/datasets/juliensimon/open-agent-traces)** — 17,000+ events, ready to use
110
+
111
+ ## License
112
+
113
+ MIT
@@ -0,0 +1,18 @@
1
+ Real agent traces are scarce. If you're building observability, evaluation, or debugging tools for multi-agent systems, you know the pain — production traces are proprietary, and toy examples don't cut it.
2
+
3
+ I built open-agent-traces to fix this. It generates structurally valid, semantically rich execution traces that look and feel like real multi-agent workflows:
4
+
5
+ - 10 enterprise domains (customer support, code review, incident response, financial analysis...)
6
+ - 3 workflow patterns (sequential, supervisor/worker, parallel fan-out)
7
+ - LLM-enriched content — real prompts, completions, tool calls, agent reasoning
8
+ - Labeled anomalies for training detectors (wrong tools, skipped steps, timeouts)
9
+ - OCEL 2.0 standard — works with PM4Py, Celonis, and other process mining tools
10
+
11
+ pip install open-agent-traces
12
+
13
+ Pre-built dataset on Hugging Face: https://huggingface.co/datasets/juliensimon/open-agent-traces
14
+ Code: https://github.com/juliensimon/ocel-generator
15
+
16
+ MIT licensed. Contributions welcome.
17
+
18
+ #AI #agents #opensource #processmining
@@ -0,0 +1,7 @@
1
+ No open trace data for multi-agent systems? I built a fix.
2
+
3
+ open-agent-traces generates realistic LLM-enriched execution traces — 10 domains, 3 workflow patterns, labeled anomalies, OCEL 2.0 compliant.
4
+
5
+ pip install open-agent-traces
6
+
7
+ https://github.com/juliensimon/ocel-generator
@@ -0,0 +1,117 @@
1
+ # Quick Start
2
+
3
+ Generate your first synthetic agent traces dataset in under 5 minutes.
4
+
5
+ ## Prerequisites
6
+
7
+ - Python 3.11+
8
+ - [uv](https://docs.astral.sh/uv/) (recommended) or pip
9
+ - An [OpenRouter](https://openrouter.ai) API key (for LLM enrichment)
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ git clone https://github.com/juliensimon/ocel-generator.git
15
+ cd ocel-generator
16
+ uv sync
17
+ ```
18
+
19
+ ## Step 1: Generate structural traces
20
+
21
+ Generate 20 sequential workflow runs with 20% noise (deviations):
22
+
23
+ ```bash
24
+ ocelgen generate --pattern sequential --runs 20 --noise 0.2 --seed 42
25
+ ```
26
+
27
+ This creates three files:
28
+ - `output.jsonocel` — the OCEL 2.0 event log
29
+ - `normative_model.json` — the expected workflow template
30
+ - `manifest.json` — generation metadata and injected deviations
31
+
32
+ ## Step 2: Enrich with LLM content
33
+
34
+ Set your OpenRouter API key:
35
+
36
+ ```bash
37
+ export OPENAI_API_KEY="sk-or-v1-your-key-here"
38
+ ```
39
+
40
+ Enrich the traces with realistic prompts, completions, and tool I/O:
41
+
42
+ ```bash
43
+ ocelgen enrich output.jsonocel --domain customer-support-triage
44
+ ```
45
+
46
+ This produces `enriched-output.jsonocel` with LLM-generated content for each agent step.
47
+
48
+ ## Step 3: Explore the data
49
+
50
+ ```python
51
+ import json
52
+
53
+ with open("enriched-output.jsonocel") as f:
54
+ log = json.load(f)
55
+
56
+ # See what's inside
57
+ print(f"Events: {len(log['events'])}")
58
+ print(f"Objects: {len(log['objects'])}")
59
+
60
+ # Look at an enriched LLM call
61
+ for obj in log["objects"]:
62
+ if obj["type"] == "llm_call":
63
+ attrs = {a["name"]: a["value"] for a in obj["attributes"]}
64
+ if attrs.get("prompt"):
65
+ print(f"\nPrompt: {attrs['prompt'][:200]}")
66
+ print(f"Completion: {attrs['completion'][:200]}")
67
+ break
68
+ ```
69
+
70
+ ## Step 4: Upload to Hugging Face (optional)
71
+
72
+ ```bash
73
+ ocelgen pipeline --domain customer-support-triage --namespace your-hf-username
74
+ ```
75
+
76
+ This runs the full pipeline (generate + enrich + flatten + upload) and creates a dataset on HF Hub.
77
+
78
+ ## Step 5: Use custom domains (optional)
79
+
80
+ Define your own domains in a YAML file:
81
+
82
+ ```yaml
83
+ # my-domains.yaml
84
+ domains:
85
+ - name: "hr-onboarding"
86
+ description: "HR onboarding: collect docs, run checks, provision access"
87
+ pattern: "sequential"
88
+ runs: 30
89
+ noise: 0.15
90
+ seed: 50001
91
+ user_queries:
92
+ - "New hire John Smith starting March 15 as Senior Engineer"
93
+ - "Onboard contractor Maria Garcia for 6-month engagement"
94
+ agent_personas:
95
+ researcher: "You are an HR coordinator collecting new hire documentation"
96
+ analyst: "You are a compliance officer verifying background checks"
97
+ summarizer: "You are an IT provisioner setting up accounts and access"
98
+ tool_descriptions:
99
+ web_search: "Search HR knowledge base for onboarding checklists"
100
+ file_reader: "Read employee records and compliance documents"
101
+ ```
102
+
103
+ Then pass it with `--config`:
104
+
105
+ ```bash
106
+ ocelgen enrich output.jsonocel --domain hr-onboarding --config my-domains.yaml
107
+ ocelgen pipeline --domain hr-onboarding --config my-domains.yaml --namespace your-hf-username
108
+ ```
109
+
110
+ Custom domains merge with the 10 built-ins. To override a built-in, use the same `name`.
111
+
112
+ ## Next steps
113
+
114
+ - Read the [User Guide](user-guide.md) for detailed configuration options
115
+ - Try different [workflow patterns](user-guide.md#workflow-patterns): `sequential`, `supervisor`, `parallel`
116
+ - Explore all 10 [built-in domains](user-guide.md#domains) or [define your own](user-guide.md#custom-domains)
117
+ - Use the [pre-built dataset](https://huggingface.co/datasets/juliensimon/open-agent-traces) directly