open-agent-traces 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_agent_traces-0.1.0/.github/workflows/ci.yml +35 -0
- open_agent_traces-0.1.0/.github/workflows/publish.yml +73 -0
- open_agent_traces-0.1.0/.gitignore +16 -0
- open_agent_traces-0.1.0/.python-version +1 -0
- open_agent_traces-0.1.0/LICENSE +21 -0
- open_agent_traces-0.1.0/PKG-INFO +153 -0
- open_agent_traces-0.1.0/README.md +113 -0
- open_agent_traces-0.1.0/docs/announcements/linkedin.md +18 -0
- open_agent_traces-0.1.0/docs/announcements/x.md +7 -0
- open_agent_traces-0.1.0/docs/parallel-workflow-example.png +0 -0
- open_agent_traces-0.1.0/docs/quickstart.md +117 -0
- open_agent_traces-0.1.0/docs/user-guide.md +328 -0
- open_agent_traces-0.1.0/examples/basic_generation.py +59 -0
- open_agent_traces-0.1.0/examples/conformance_demo.py +49 -0
- open_agent_traces-0.1.0/pyproject.toml +95 -0
- open_agent_traces-0.1.0/schemas/ocel20-schema-json.json +117 -0
- open_agent_traces-0.1.0/src/ocelgen/__init__.py +3 -0
- open_agent_traces-0.1.0/src/ocelgen/cli.py +421 -0
- open_agent_traces-0.1.0/src/ocelgen/deviations/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/deviations/injector.py +58 -0
- open_agent_traces-0.1.0/src/ocelgen/deviations/registry.py +39 -0
- open_agent_traces-0.1.0/src/ocelgen/deviations/strategies.py +737 -0
- open_agent_traces-0.1.0/src/ocelgen/deviations/types.py +55 -0
- open_agent_traces-0.1.0/src/ocelgen/enrichment/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/enrichment/client.py +129 -0
- open_agent_traces-0.1.0/src/ocelgen/enrichment/enricher.py +475 -0
- open_agent_traces-0.1.0/src/ocelgen/enrichment/prompts.py +71 -0
- open_agent_traces-0.1.0/src/ocelgen/export/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/export/manifest.py +71 -0
- open_agent_traces-0.1.0/src/ocelgen/export/normative.py +22 -0
- open_agent_traces-0.1.0/src/ocelgen/export/ocel_json.py +22 -0
- open_agent_traces-0.1.0/src/ocelgen/generation/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/generation/attributes.py +81 -0
- open_agent_traces-0.1.0/src/ocelgen/generation/engine.py +125 -0
- open_agent_traces-0.1.0/src/ocelgen/generation/run_simulator.py +579 -0
- open_agent_traces-0.1.0/src/ocelgen/generation/timestamp.py +44 -0
- open_agent_traces-0.1.0/src/ocelgen/models/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/models/langchain.py +42 -0
- open_agent_traces-0.1.0/src/ocelgen/models/ocel.py +130 -0
- open_agent_traces-0.1.0/src/ocelgen/models/workflow.py +121 -0
- open_agent_traces-0.1.0/src/ocelgen/patterns/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/patterns/base.py +25 -0
- open_agent_traces-0.1.0/src/ocelgen/patterns/parallel.py +89 -0
- open_agent_traces-0.1.0/src/ocelgen/patterns/sequential.py +65 -0
- open_agent_traces-0.1.0/src/ocelgen/patterns/supervisor.py +87 -0
- open_agent_traces-0.1.0/src/ocelgen/scenarios/__init__.py +13 -0
- open_agent_traces-0.1.0/src/ocelgen/scenarios/domain.py +22 -0
- open_agent_traces-0.1.0/src/ocelgen/scenarios/loader.py +115 -0
- open_agent_traces-0.1.0/src/ocelgen/scenarios/registry.py +355 -0
- open_agent_traces-0.1.0/src/ocelgen/upload/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/upload/flatten.py +113 -0
- open_agent_traces-0.1.0/src/ocelgen/upload/hf_upload.py +215 -0
- open_agent_traces-0.1.0/src/ocelgen/upload/readme.py +266 -0
- open_agent_traces-0.1.0/src/ocelgen/validation/__init__.py +1 -0
- open_agent_traces-0.1.0/src/ocelgen/validation/schema.py +42 -0
- open_agent_traces-0.1.0/tests/__init__.py +0 -0
- open_agent_traces-0.1.0/tests/test_cli_new.py +61 -0
- open_agent_traces-0.1.0/tests/test_deviations.py +265 -0
- open_agent_traces-0.1.0/tests/test_enrichment.py +390 -0
- open_agent_traces-0.1.0/tests/test_flatten.py +63 -0
- open_agent_traces-0.1.0/tests/test_integration.py +242 -0
- open_agent_traces-0.1.0/tests/test_ocel_models.py +199 -0
- open_agent_traces-0.1.0/tests/test_patterns.py +139 -0
- open_agent_traces-0.1.0/tests/test_scenarios.py +105 -0
- open_agent_traces-0.1.0/tests/test_sequential.py +148 -0
- open_agent_traces-0.1.0/tests/test_upload.py +122 -0
- open_agent_traces-0.1.0/tests/test_yaml_loader.py +276 -0
- open_agent_traces-0.1.0/uv.lock +2074 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
run: uv python install ${{ matrix.python-version }}
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: uv sync --extra dev
|
|
27
|
+
|
|
28
|
+
- name: Lint
|
|
29
|
+
run: uv run ruff check src tests
|
|
30
|
+
|
|
31
|
+
- name: Type check
|
|
32
|
+
run: uv run mypy src
|
|
33
|
+
|
|
34
|
+
- name: Test
|
|
35
|
+
run: uv run pytest tests/ -v --tb=short
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.11", "3.12"]
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
21
|
+
run: uv python install ${{ matrix.python-version }}
|
|
22
|
+
|
|
23
|
+
- name: Install dependencies
|
|
24
|
+
run: uv sync --extra dev
|
|
25
|
+
|
|
26
|
+
- name: Lint
|
|
27
|
+
run: uv run ruff check src tests
|
|
28
|
+
|
|
29
|
+
- name: Type check
|
|
30
|
+
run: uv run mypy src
|
|
31
|
+
|
|
32
|
+
- name: Test
|
|
33
|
+
run: uv run pytest tests/ -v --tb=short
|
|
34
|
+
|
|
35
|
+
publish-testpypi:
|
|
36
|
+
needs: test
|
|
37
|
+
if: contains(github.ref_name, 'rc') || contains(github.ref_name, 'dev')
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
environment: release
|
|
40
|
+
permissions:
|
|
41
|
+
id-token: write
|
|
42
|
+
steps:
|
|
43
|
+
- uses: actions/checkout@v4
|
|
44
|
+
|
|
45
|
+
- name: Install uv
|
|
46
|
+
uses: astral-sh/setup-uv@v4
|
|
47
|
+
|
|
48
|
+
- name: Build package
|
|
49
|
+
run: uv build
|
|
50
|
+
|
|
51
|
+
- name: Publish to TestPyPI
|
|
52
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
53
|
+
with:
|
|
54
|
+
repository-url: https://test.pypi.org/legacy/
|
|
55
|
+
|
|
56
|
+
publish-pypi:
|
|
57
|
+
needs: test
|
|
58
|
+
if: "!contains(github.ref_name, 'rc') && !contains(github.ref_name, 'dev')"
|
|
59
|
+
runs-on: ubuntu-latest
|
|
60
|
+
environment: release
|
|
61
|
+
permissions:
|
|
62
|
+
id-token: write
|
|
63
|
+
steps:
|
|
64
|
+
- uses: actions/checkout@v4
|
|
65
|
+
|
|
66
|
+
- name: Install uv
|
|
67
|
+
uses: astral-sh/setup-uv@v4
|
|
68
|
+
|
|
69
|
+
- name: Build package
|
|
70
|
+
run: uv build
|
|
71
|
+
|
|
72
|
+
- name: Publish to PyPI
|
|
73
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Julien Simon
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: open-agent-traces
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Mock OCEL 2.0 event log generator for LangChain multi-agent runs
|
|
5
|
+
Project-URL: Homepage, https://github.com/juliensimon/ocel-generator
|
|
6
|
+
Project-URL: Documentation, https://github.com/juliensimon/ocel-generator/tree/main/docs
|
|
7
|
+
Project-URL: Repository, https://github.com/juliensimon/ocel-generator
|
|
8
|
+
Project-URL: Issues, https://github.com/juliensimon/ocel-generator/issues
|
|
9
|
+
Project-URL: Dataset, https://huggingface.co/datasets/juliensimon/open-agent-traces
|
|
10
|
+
Author-email: Julien Simon <julien@arcee.ai>
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: agent-traces,ai-agents,anomaly-detection,langchain,llm,multi-agent,observability,ocel,process-mining,synthetic-data
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Testing :: Traffic Generation
|
|
22
|
+
Requires-Python: >=3.11
|
|
23
|
+
Requires-Dist: faker>=24.0
|
|
24
|
+
Requires-Dist: huggingface-hub>=0.20
|
|
25
|
+
Requires-Dist: jsonschema>=4.21
|
|
26
|
+
Requires-Dist: openai>=1.0
|
|
27
|
+
Requires-Dist: pyarrow>=15.0
|
|
28
|
+
Requires-Dist: pydantic>=2.6
|
|
29
|
+
Requires-Dist: pyyaml>=6.0
|
|
30
|
+
Requires-Dist: rich>=13.0
|
|
31
|
+
Requires-Dist: typer>=0.12
|
|
32
|
+
Provides-Extra: conformance
|
|
33
|
+
Requires-Dist: pm4py>=2.7; extra == 'conformance'
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: ruff>=0.3; extra == 'dev'
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
|
|
41
|
+
# ocelgen — Open Agent Traces Dataset Generator
|
|
42
|
+
|
|
43
|
+
Generate realistic multi-agent workflow trace datasets with LLM-enriched content. Built for the AI agent ecosystem.
|
|
44
|
+
|
|
45
|
+
[](https://huggingface.co/datasets/juliensimon/open-agent-traces)
|
|
46
|
+
[](https://pypi.org/project/open-agent-traces/)
|
|
47
|
+
[](https://github.com/juliensimon/ocel-generator/actions)
|
|
48
|
+
[](LICENSE)
|
|
49
|
+
[](https://python.org)
|
|
50
|
+
[](https://www.ocel-standard.org/)
|
|
51
|
+
[](docs/user-guide.md#model-and-endpoint-configuration)
|
|
52
|
+
|
|
53
|
+

|
|
54
|
+
|
|
55
|
+
## The problem
|
|
56
|
+
|
|
57
|
+
Real agent traces are scarce. Production multi-agent systems generate rich execution data — LLM prompts, tool calls, agent reasoning, handoff messages — but these traces are proprietary and rarely shared. Teams building agent observability, evaluation, and debugging tools lack open datasets to develop against.
|
|
58
|
+
|
|
59
|
+
## The solution
|
|
60
|
+
|
|
61
|
+
ocelgen generates **structurally valid, semantically rich** agent traces that look and feel like real multi-agent executions:
|
|
62
|
+
|
|
63
|
+
- **Full trace content** — LLM prompts and completions, tool call inputs/outputs, agent reasoning, inter-agent messages
|
|
64
|
+
- **10 enterprise domains** — customer support, code review, incident response, financial analysis, and 6 more (plus custom domains via YAML)
|
|
65
|
+
- **3 workflow patterns** — sequential, supervisor/worker, parallel fan-out/fan-in
|
|
66
|
+
- **Labeled deviations** — 10 types of anomalies (wrong tools, skipped steps, timeouts) with ground-truth annotations
|
|
67
|
+
- **OCEL 2.0 standard** — compatible with process mining tools (PM4Py, Celonis)
|
|
68
|
+
- **Any LLM backend** — OpenRouter, OpenAI, Anthropic, local models via OpenAI-compatible API
|
|
69
|
+
|
|
70
|
+
## Quick start
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install open-agent-traces
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Development setup
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
git clone https://github.com/juliensimon/ocel-generator.git && cd ocel-generator
|
|
80
|
+
uv sync
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### LLM setup
|
|
84
|
+
|
|
85
|
+
Enrichment requires an OpenAI-compatible endpoint. Pick one:
|
|
86
|
+
|
|
87
|
+
**Cloud (OpenRouter, OpenAI, etc.)**
|
|
88
|
+
```bash
|
|
89
|
+
export OPENAI_API_KEY="your-key"
|
|
90
|
+
# Default: OpenRouter with Gemini Flash. Override with --model:
|
|
91
|
+
ocelgen enrich output.jsonocel -d customer-support-triage --model anthropic/claude-sonnet-4
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Local (llama.cpp, Ollama, vLLM, etc.)**
|
|
95
|
+
```bash
|
|
96
|
+
# Example: start llama.cpp with auto-download from Hugging Face
|
|
97
|
+
llama-server -hfr unsloth/Qwen3-30B-A3B-GGUF:Q6_K -ngl 99 -c 4096
|
|
98
|
+
|
|
99
|
+
# Point ocelgen at the local endpoint (no API key needed)
|
|
100
|
+
ocelgen enrich output.jsonocel -d customer-support-triage \
|
|
101
|
+
--model unsloth/Qwen3-30B-A3B-GGUF:Q6_K \
|
|
102
|
+
--base-url http://localhost:8080/v1
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Generate and enrich
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Generate traces
|
|
109
|
+
ocelgen generate --pattern sequential --runs 50 --noise 0.2
|
|
110
|
+
|
|
111
|
+
# Enrich with LLM-generated content
|
|
112
|
+
ocelgen enrich output.jsonocel --domain customer-support-triage
|
|
113
|
+
|
|
114
|
+
# Or run the full pipeline (generate + enrich + upload to HF)
|
|
115
|
+
ocelgen pipeline --domain customer-support-triage --namespace your-hf-username
|
|
116
|
+
|
|
117
|
+
# Use custom domains defined in YAML
|
|
118
|
+
ocelgen pipeline --domain my-domain --config domains.yaml --namespace your-hf-username
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Use the pre-built dataset
|
|
122
|
+
|
|
123
|
+
Skip generation — load the dataset directly from Hugging Face:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from datasets import load_dataset
|
|
127
|
+
|
|
128
|
+
ds = load_dataset("juliensimon/open-agent-traces", "incident-response")
|
|
129
|
+
|
|
130
|
+
for event in ds["train"]:
|
|
131
|
+
if event["run_id"] == "run-0000":
|
|
132
|
+
print(f"{event['event_type']:25s} | {event['agent_role']:12s} | {event['reasoning'][:60] if event['reasoning'] else ''}")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
10 domains available: `customer-support-triage` · `code-review-pipeline` · `market-research` · `legal-document-analysis` · `data-pipeline-debugging` · `content-generation` · `financial-analysis` · `incident-response` · `academic-paper-review` · `ecommerce-product-enrichment`
|
|
136
|
+
|
|
137
|
+
## Who is this for?
|
|
138
|
+
|
|
139
|
+
- **Agent observability teams** — build dashboards with realistic trace data (timestamps, token counts, costs)
|
|
140
|
+
- **ML researchers** — train anomaly detectors on labeled conformant vs deviant traces
|
|
141
|
+
- **Process mining researchers** — apply OCEL 2.0 conformance checking to agent workflows
|
|
142
|
+
- **Agent framework developers** — test LangGraph, CrewAI, AutoGen, Smolagents against realistic traces
|
|
143
|
+
- **Evaluation teams** — benchmark agent reasoning quality across domains and architectures
|
|
144
|
+
|
|
145
|
+
## Documentation
|
|
146
|
+
|
|
147
|
+
- **[Quick Start](docs/quickstart.md)** — first dataset in 5 minutes
|
|
148
|
+
- **[User Guide](docs/user-guide.md)** — CLI reference, patterns, domains, custom YAML config, model configuration
|
|
149
|
+
- **[Dataset on Hugging Face](https://huggingface.co/datasets/juliensimon/open-agent-traces)** — 17,000+ events, ready to use
|
|
150
|
+
|
|
151
|
+
## License
|
|
152
|
+
|
|
153
|
+
MIT
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# ocelgen — Open Agent Traces Dataset Generator
|
|
2
|
+
|
|
3
|
+
Generate realistic multi-agent workflow trace datasets with LLM-enriched content. Built for the AI agent ecosystem.
|
|
4
|
+
|
|
5
|
+
[](https://huggingface.co/datasets/juliensimon/open-agent-traces)
|
|
6
|
+
[](https://pypi.org/project/open-agent-traces/)
|
|
7
|
+
[](https://github.com/juliensimon/ocel-generator/actions)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
[](https://python.org)
|
|
10
|
+
[](https://www.ocel-standard.org/)
|
|
11
|
+
[](docs/user-guide.md#model-and-endpoint-configuration)
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+
|
|
15
|
+
## The problem
|
|
16
|
+
|
|
17
|
+
Real agent traces are scarce. Production multi-agent systems generate rich execution data — LLM prompts, tool calls, agent reasoning, handoff messages — but these traces are proprietary and rarely shared. Teams building agent observability, evaluation, and debugging tools lack open datasets to develop against.
|
|
18
|
+
|
|
19
|
+
## The solution
|
|
20
|
+
|
|
21
|
+
ocelgen generates **structurally valid, semantically rich** agent traces that look and feel like real multi-agent executions:
|
|
22
|
+
|
|
23
|
+
- **Full trace content** — LLM prompts and completions, tool call inputs/outputs, agent reasoning, inter-agent messages
|
|
24
|
+
- **10 enterprise domains** — customer support, code review, incident response, financial analysis, and 6 more (plus custom domains via YAML)
|
|
25
|
+
- **3 workflow patterns** — sequential, supervisor/worker, parallel fan-out/fan-in
|
|
26
|
+
- **Labeled deviations** — 10 types of anomalies (wrong tools, skipped steps, timeouts) with ground-truth annotations
|
|
27
|
+
- **OCEL 2.0 standard** — compatible with process mining tools (PM4Py, Celonis)
|
|
28
|
+
- **Any LLM backend** — OpenRouter, OpenAI, Anthropic, local models via OpenAI-compatible API
|
|
29
|
+
|
|
30
|
+
## Quick start
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install open-agent-traces
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Development setup
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
git clone https://github.com/juliensimon/ocel-generator.git && cd ocel-generator
|
|
40
|
+
uv sync
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### LLM setup
|
|
44
|
+
|
|
45
|
+
Enrichment requires an OpenAI-compatible endpoint. Pick one:
|
|
46
|
+
|
|
47
|
+
**Cloud (OpenRouter, OpenAI, etc.)**
|
|
48
|
+
```bash
|
|
49
|
+
export OPENAI_API_KEY="your-key"
|
|
50
|
+
# Default: OpenRouter with Gemini Flash. Override with --model:
|
|
51
|
+
ocelgen enrich output.jsonocel -d customer-support-triage --model anthropic/claude-sonnet-4
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Local (llama.cpp, Ollama, vLLM, etc.)**
|
|
55
|
+
```bash
|
|
56
|
+
# Example: start llama.cpp with auto-download from Hugging Face
|
|
57
|
+
llama-server -hfr unsloth/Qwen3-30B-A3B-GGUF:Q6_K -ngl 99 -c 4096
|
|
58
|
+
|
|
59
|
+
# Point ocelgen at the local endpoint (no API key needed)
|
|
60
|
+
ocelgen enrich output.jsonocel -d customer-support-triage \
|
|
61
|
+
--model unsloth/Qwen3-30B-A3B-GGUF:Q6_K \
|
|
62
|
+
--base-url http://localhost:8080/v1
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Generate and enrich
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# Generate traces
|
|
69
|
+
ocelgen generate --pattern sequential --runs 50 --noise 0.2
|
|
70
|
+
|
|
71
|
+
# Enrich with LLM-generated content
|
|
72
|
+
ocelgen enrich output.jsonocel --domain customer-support-triage
|
|
73
|
+
|
|
74
|
+
# Or run the full pipeline (generate + enrich + upload to HF)
|
|
75
|
+
ocelgen pipeline --domain customer-support-triage --namespace your-hf-username
|
|
76
|
+
|
|
77
|
+
# Use custom domains defined in YAML
|
|
78
|
+
ocelgen pipeline --domain my-domain --config domains.yaml --namespace your-hf-username
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Use the pre-built dataset
|
|
82
|
+
|
|
83
|
+
Skip generation — load the dataset directly from Hugging Face:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from datasets import load_dataset
|
|
87
|
+
|
|
88
|
+
ds = load_dataset("juliensimon/open-agent-traces", "incident-response")
|
|
89
|
+
|
|
90
|
+
for event in ds["train"]:
|
|
91
|
+
if event["run_id"] == "run-0000":
|
|
92
|
+
print(f"{event['event_type']:25s} | {event['agent_role']:12s} | {event['reasoning'][:60] if event['reasoning'] else ''}")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
10 domains available: `customer-support-triage` · `code-review-pipeline` · `market-research` · `legal-document-analysis` · `data-pipeline-debugging` · `content-generation` · `financial-analysis` · `incident-response` · `academic-paper-review` · `ecommerce-product-enrichment`
|
|
96
|
+
|
|
97
|
+
## Who is this for?
|
|
98
|
+
|
|
99
|
+
- **Agent observability teams** — build dashboards with realistic trace data (timestamps, token counts, costs)
|
|
100
|
+
- **ML researchers** — train anomaly detectors on labeled conformant vs deviant traces
|
|
101
|
+
- **Process mining researchers** — apply OCEL 2.0 conformance checking to agent workflows
|
|
102
|
+
- **Agent framework developers** — test LangGraph, CrewAI, AutoGen, Smolagents against realistic traces
|
|
103
|
+
- **Evaluation teams** — benchmark agent reasoning quality across domains and architectures
|
|
104
|
+
|
|
105
|
+
## Documentation
|
|
106
|
+
|
|
107
|
+
- **[Quick Start](docs/quickstart.md)** — first dataset in 5 minutes
|
|
108
|
+
- **[User Guide](docs/user-guide.md)** — CLI reference, patterns, domains, custom YAML config, model configuration
|
|
109
|
+
- **[Dataset on Hugging Face](https://huggingface.co/datasets/juliensimon/open-agent-traces)** — 17,000+ events, ready to use
|
|
110
|
+
|
|
111
|
+
## License
|
|
112
|
+
|
|
113
|
+
MIT
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Real agent traces are scarce. If you're building observability, evaluation, or debugging tools for multi-agent systems, you know the pain — production traces are proprietary, and toy examples don't cut it.
|
|
2
|
+
|
|
3
|
+
I built open-agent-traces to fix this. It generates structurally valid, semantically rich execution traces that look and feel like real multi-agent workflows:
|
|
4
|
+
|
|
5
|
+
- 10 enterprise domains (customer support, code review, incident response, financial analysis...)
|
|
6
|
+
- 3 workflow patterns (sequential, supervisor/worker, parallel fan-out)
|
|
7
|
+
- LLM-enriched content — real prompts, completions, tool calls, agent reasoning
|
|
8
|
+
- Labeled anomalies for training detectors (wrong tools, skipped steps, timeouts)
|
|
9
|
+
- OCEL 2.0 standard — works with PM4Py, Celonis, and other process mining tools
|
|
10
|
+
|
|
11
|
+
pip install open-agent-traces
|
|
12
|
+
|
|
13
|
+
Pre-built dataset on Hugging Face: https://huggingface.co/datasets/juliensimon/open-agent-traces
|
|
14
|
+
Code: https://github.com/juliensimon/ocel-generator
|
|
15
|
+
|
|
16
|
+
MIT licensed. Contributions welcome.
|
|
17
|
+
|
|
18
|
+
#AI #agents #opensource #processmining
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
No open trace data for multi-agent systems? I built a fix.
|
|
2
|
+
|
|
3
|
+
open-agent-traces generates realistic LLM-enriched execution traces — 10 domains, 3 workflow patterns, labeled anomalies, OCEL 2.0 compliant.
|
|
4
|
+
|
|
5
|
+
pip install open-agent-traces
|
|
6
|
+
|
|
7
|
+
https://github.com/juliensimon/ocel-generator
|
|
Binary file
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Quick Start
|
|
2
|
+
|
|
3
|
+
Generate your first synthetic agent traces dataset in under 5 minutes.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
- Python 3.11+
|
|
8
|
+
- [uv](https://docs.astral.sh/uv/) (recommended) or pip
|
|
9
|
+
- An [OpenRouter](https://openrouter.ai) API key (for LLM enrichment)
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
git clone https://github.com/juliensimon/ocel-generator.git
|
|
15
|
+
cd ocel-generator
|
|
16
|
+
uv sync
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Step 1: Generate structural traces
|
|
20
|
+
|
|
21
|
+
Generate 20 sequential workflow runs with 20% noise (deviations):
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
ocelgen generate --pattern sequential --runs 20 --noise 0.2 --seed 42
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
This creates three files:
|
|
28
|
+
- `output.jsonocel` — the OCEL 2.0 event log
|
|
29
|
+
- `normative_model.json` — the expected workflow template
|
|
30
|
+
- `manifest.json` — generation metadata and injected deviations
|
|
31
|
+
|
|
32
|
+
## Step 2: Enrich with LLM content
|
|
33
|
+
|
|
34
|
+
Set your OpenRouter API key:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
export OPENAI_API_KEY="sk-or-v1-your-key-here"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Enrich the traces with realistic prompts, completions, and tool I/O:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
ocelgen enrich output.jsonocel --domain customer-support-triage
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
This produces `enriched-output.jsonocel` with LLM-generated content for each agent step.
|
|
47
|
+
|
|
48
|
+
## Step 3: Explore the data
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import json
|
|
52
|
+
|
|
53
|
+
with open("enriched-output.jsonocel") as f:
|
|
54
|
+
log = json.load(f)
|
|
55
|
+
|
|
56
|
+
# See what's inside
|
|
57
|
+
print(f"Events: {len(log['events'])}")
|
|
58
|
+
print(f"Objects: {len(log['objects'])}")
|
|
59
|
+
|
|
60
|
+
# Look at an enriched LLM call
|
|
61
|
+
for obj in log["objects"]:
|
|
62
|
+
if obj["type"] == "llm_call":
|
|
63
|
+
attrs = {a["name"]: a["value"] for a in obj["attributes"]}
|
|
64
|
+
if attrs.get("prompt"):
|
|
65
|
+
print(f"\nPrompt: {attrs['prompt'][:200]}")
|
|
66
|
+
print(f"Completion: {attrs['completion'][:200]}")
|
|
67
|
+
break
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Step 4: Upload to Hugging Face (optional)
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
ocelgen pipeline --domain customer-support-triage --namespace your-hf-username
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
This runs the full pipeline (generate + enrich + flatten + upload) and creates a dataset on HF Hub.
|
|
77
|
+
|
|
78
|
+
## Step 5: Use custom domains (optional)
|
|
79
|
+
|
|
80
|
+
Define your own domains in a YAML file:
|
|
81
|
+
|
|
82
|
+
```yaml
|
|
83
|
+
# my-domains.yaml
|
|
84
|
+
domains:
|
|
85
|
+
- name: "hr-onboarding"
|
|
86
|
+
description: "HR onboarding: collect docs, run checks, provision access"
|
|
87
|
+
pattern: "sequential"
|
|
88
|
+
runs: 30
|
|
89
|
+
noise: 0.15
|
|
90
|
+
seed: 50001
|
|
91
|
+
user_queries:
|
|
92
|
+
- "New hire John Smith starting March 15 as Senior Engineer"
|
|
93
|
+
- "Onboard contractor Maria Garcia for 6-month engagement"
|
|
94
|
+
agent_personas:
|
|
95
|
+
researcher: "You are an HR coordinator collecting new hire documentation"
|
|
96
|
+
analyst: "You are a compliance officer verifying background checks"
|
|
97
|
+
summarizer: "You are an IT provisioner setting up accounts and access"
|
|
98
|
+
tool_descriptions:
|
|
99
|
+
web_search: "Search HR knowledge base for onboarding checklists"
|
|
100
|
+
file_reader: "Read employee records and compliance documents"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Then pass it with `--config`:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
ocelgen enrich output.jsonocel --domain hr-onboarding --config my-domains.yaml
|
|
107
|
+
ocelgen pipeline --domain hr-onboarding --config my-domains.yaml --namespace your-hf-username
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Custom domains merge with the 10 built-ins. To override a built-in, use the same `name`.
|
|
111
|
+
|
|
112
|
+
## Next steps
|
|
113
|
+
|
|
114
|
+
- Read the [User Guide](user-guide.md) for detailed configuration options
|
|
115
|
+
- Try different [workflow patterns](user-guide.md#workflow-patterns): `sequential`, `supervisor`, `parallel`
|
|
116
|
+
- Explore all 10 [built-in domains](user-guide.md#domains) or [define your own](user-guide.md#custom-domains)
|
|
117
|
+
- Use the [pre-built dataset](https://huggingface.co/datasets/juliensimon/open-agent-traces) directly
|