clustertrace 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clustertrace-0.5.0/.github/ISSUE_TEMPLATE/bug.yml +45 -0
- clustertrace-0.5.0/.github/ISSUE_TEMPLATE/feature.yml +23 -0
- clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_reorder_clustering.md +42 -0
- clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_streaming_capture.md +32 -0
- clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_wrap_bedrock.md +27 -0
- clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_wrap_gemini.md +30 -0
- clustertrace-0.5.0/.github/pull_request_template.md +22 -0
- clustertrace-0.5.0/.github/workflows/publish.yml +66 -0
- clustertrace-0.5.0/.github/workflows/release.yml +65 -0
- clustertrace-0.5.0/.github/workflows/test.yml +40 -0
- clustertrace-0.5.0/.gitignore +30 -0
- clustertrace-0.5.0/ARCHITECTURE.md +105 -0
- clustertrace-0.5.0/CHANGELOG.md +122 -0
- clustertrace-0.5.0/CONTRIBUTING.md +88 -0
- clustertrace-0.5.0/LICENSE +21 -0
- clustertrace-0.5.0/PKG-INFO +224 -0
- clustertrace-0.5.0/README.md +192 -0
- clustertrace-0.5.0/SECURITY.md +30 -0
- clustertrace-0.5.0/STATUS.md +24 -0
- clustertrace-0.5.0/docs/hero.svg +94 -0
- clustertrace-0.5.0/examples/agents.py +250 -0
- clustertrace-0.5.0/examples/benchmark.py +119 -0
- clustertrace-0.5.0/examples/generate_demo_data.py +105 -0
- clustertrace-0.5.0/examples/langchain_example.py +65 -0
- clustertrace-0.5.0/examples/llamaindex_example.py +59 -0
- clustertrace-0.5.0/examples/sample-trace.html +193 -0
- clustertrace-0.5.0/pyproject.toml +64 -0
- clustertrace-0.5.0/scripts/preview_dashboard.py +8 -0
- clustertrace-0.5.0/src/clustertrace/__init__.py +58 -0
- clustertrace-0.5.0/src/clustertrace/_ctx.py +16 -0
- clustertrace-0.5.0/src/clustertrace/anthropic.py +197 -0
- clustertrace-0.5.0/src/clustertrace/cli.py +228 -0
- clustertrace-0.5.0/src/clustertrace/cluster.py +311 -0
- clustertrace-0.5.0/src/clustertrace/cost.py +132 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/__init__.py +0 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/app.py +404 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/static/style.css +397 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/templates/base.html +47 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/templates/clusters.html +148 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/templates/failures.html +159 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/templates/index.html +142 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/templates/metrics.html +77 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/templates/search.html +55 -0
- clustertrace-0.5.0/src/clustertrace/dashboard/templates/trace.html +85 -0
- clustertrace-0.5.0/src/clustertrace/data/demo-traces.jsonl +61 -0
- clustertrace-0.5.0/src/clustertrace/export.py +163 -0
- clustertrace-0.5.0/src/clustertrace/maintenance.py +101 -0
- clustertrace-0.5.0/src/clustertrace/openai.py +198 -0
- clustertrace-0.5.0/src/clustertrace/otel.py +166 -0
- clustertrace-0.5.0/src/clustertrace/py.typed +0 -0
- clustertrace-0.5.0/src/clustertrace/replay.py +81 -0
- clustertrace-0.5.0/src/clustertrace/snapshot.py +179 -0
- clustertrace-0.5.0/src/clustertrace/storage.py +434 -0
- clustertrace-0.5.0/src/clustertrace/trace.py +426 -0
- clustertrace-0.5.0/tests/conftest.py +13 -0
- clustertrace-0.5.0/tests/test_cluster.py +123 -0
- clustertrace-0.5.0/tests/test_cost.py +95 -0
- clustertrace-0.5.0/tests/test_dashboard.py +166 -0
- clustertrace-0.5.0/tests/test_export_snapshot_replay.py +109 -0
- clustertrace-0.5.0/tests/test_metric.py +36 -0
- clustertrace-0.5.0/tests/test_migrations.py +87 -0
- clustertrace-0.5.0/tests/test_otel.py +203 -0
- clustertrace-0.5.0/tests/test_search.py +62 -0
- clustertrace-0.5.0/tests/test_tags.py +52 -0
- clustertrace-0.5.0/tests/test_trace_async.py +64 -0
- clustertrace-0.5.0/tests/test_trace_sync.py +90 -0
- clustertrace-0.5.0/tests/test_truncation.py +35 -0
- clustertrace-0.5.0/tests/test_v04_features.py +343 -0
- clustertrace-0.5.0/tests/test_wrap_anthropic.py +105 -0
- clustertrace-0.5.0/tests/test_wrap_openai.py +141 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: Bug report
|
|
2
|
+
description: Something doesn't work the way the README says it does.
|
|
3
|
+
labels: ["bug"]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: what-happened
|
|
7
|
+
attributes:
|
|
8
|
+
label: What happened?
|
|
9
|
+
description: Quick description of the broken behaviour and what you expected.
|
|
10
|
+
placeholder: "@clustertrace.trace on my async function logs duration=None instead of the actual ms."
|
|
11
|
+
validations:
|
|
12
|
+
required: true
|
|
13
|
+
- type: textarea
|
|
14
|
+
id: repro
|
|
15
|
+
attributes:
|
|
16
|
+
label: Minimal reproduction
|
|
17
|
+
description: A code snippet that triggers it. Include any flags / env vars (`CLUSTERTRACE_DB`, etc.).
|
|
18
|
+
render: python
|
|
19
|
+
validations:
|
|
20
|
+
required: true
|
|
21
|
+
- type: input
|
|
22
|
+
id: version
|
|
23
|
+
attributes:
|
|
24
|
+
label: clustertrace version
|
|
25
|
+
description: Output of `clustertrace --version`.
|
|
26
|
+
placeholder: "0.3.0"
|
|
27
|
+
validations:
|
|
28
|
+
required: true
|
|
29
|
+
- type: input
|
|
30
|
+
id: python
|
|
31
|
+
attributes:
|
|
32
|
+
label: Python version
|
|
33
|
+
placeholder: "3.12.4"
|
|
34
|
+
validations:
|
|
35
|
+
required: true
|
|
36
|
+
- type: input
|
|
37
|
+
id: os
|
|
38
|
+
attributes:
|
|
39
|
+
label: OS
|
|
40
|
+
placeholder: "macOS 14 / Ubuntu 22.04 / Windows 11"
|
|
41
|
+
- type: textarea
|
|
42
|
+
id: extras
|
|
43
|
+
attributes:
|
|
44
|
+
label: Anything else?
|
|
45
|
+
description: Logs, stack traces, full trace IDs, anything that helps narrow it down.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: Feature request
|
|
2
|
+
description: Propose a new feature or behaviour change.
|
|
3
|
+
labels: ["enhancement"]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: problem
|
|
7
|
+
attributes:
|
|
8
|
+
label: What problem are you trying to solve?
|
|
9
|
+
description: The concrete situation that made you wish clustertrace did this. Skip "wouldn't it be cool if…".
|
|
10
|
+
validations:
|
|
11
|
+
required: true
|
|
12
|
+
- type: textarea
|
|
13
|
+
id: proposal
|
|
14
|
+
attributes:
|
|
15
|
+
label: Proposed shape of the fix
|
|
16
|
+
description: API sketch, screenshot, or rough mechanism. Doesn't need to be final.
|
|
17
|
+
validations:
|
|
18
|
+
required: true
|
|
19
|
+
- type: textarea
|
|
20
|
+
id: alternatives
|
|
21
|
+
attributes:
|
|
22
|
+
label: What would you do instead today?
|
|
23
|
+
description: Workarounds, other tools that have it, why the workarounds aren't enough.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: 'Wanted: tree-edit-distance clustering mode'
|
|
3
|
+
about: Help wanted — better clustering for traces that differ by reorderings
|
|
4
|
+
title: 'Add mode=''tree_edit'' to signature_for_spans (reorder + insertion tolerance)'
|
|
5
|
+
labels: enhancement, help wanted, algorithm
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What
|
|
10
|
+
|
|
11
|
+
Today clustertrace has two clustering modes:
|
|
12
|
+
- `mode='ordered'` — exact-string equality on RLE-collapsed sequence
|
|
13
|
+
- `mode='set'` — sorted unique pairs (collapses all reorderings)
|
|
14
|
+
|
|
15
|
+
Both are extremes. We want a middle ground: a signature mode that tolerates small reorderings and insertions (e.g. one extra retry, two tool calls swapped) but still distinguishes structurally different paths.
|
|
16
|
+
|
|
17
|
+
## Why
|
|
18
|
+
|
|
19
|
+
Real agents are noisy. With `mode='ordered'` you get 100+ clusters from 1000 traces (one extra retry creates a new cluster). With `mode='set'` you collapse everything that has the same step inventory regardless of order — too coarse.
|
|
20
|
+
|
|
21
|
+
Tree-edit-distance or Levenshtein on the step sequence would give a tunable threshold.
|
|
22
|
+
|
|
23
|
+
## Design sketch
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
def signature_for_spans(spans, mode='ordered', edit_distance_threshold=None):
|
|
27
|
+
if mode == 'tree_edit':
|
|
28
|
+
# Compute a normalized step sequence
|
|
29
|
+
# Bucket traces whose pairwise edit distance < threshold
|
|
30
|
+
...
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Storage implication: the canonical signature for a cluster becomes the "representative" sequence; non-canonical traces store a reference. This needs a `signature_canonical_id` column on traces (v4 migration).
|
|
34
|
+
|
|
35
|
+
## Pointers
|
|
36
|
+
|
|
37
|
+
- [`src/clustertrace/cluster.py`](../../src/clustertrace/cluster.py).
|
|
38
|
+
- [Wagner-Fischer algorithm](https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm) is the obvious starting point.
|
|
39
|
+
- [Zhang-Shasha](https://epubs.siam.org/doi/10.1137/0218082) for the tree-edit-distance variant if you want hierarchical structure too.
|
|
40
|
+
- This is real algorithmic work; not a 4-hour PR.
|
|
41
|
+
|
|
42
|
+
Estimated effort: a weekend of focused work + a benchmark showing it's better than `mode='set'` on real data.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: 'Wanted: streaming chunk capture'
|
|
3
|
+
about: Help wanted — capture streaming response chunks as they arrive
|
|
4
|
+
title: 'Capture streaming response chunks instead of completion-only'
|
|
5
|
+
labels: enhancement, help wanted
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What
|
|
10
|
+
|
|
11
|
+
Today, when you call `messages.create(stream=True)` or `chat.completions.create(stream=True)`, clustertrace records the span on completion only and tags it with `streaming: true`. It does **not** capture the intermediate chunks.
|
|
12
|
+
|
|
13
|
+
We want: chunk-by-chunk capture so debugging an agent that gets stuck mid-stream (or fails partway through) is possible.
|
|
14
|
+
|
|
15
|
+
## Why
|
|
16
|
+
|
|
17
|
+
Streaming is the default for most production agents. Without chunk capture, clustertrace is invisible to that workflow.
|
|
18
|
+
|
|
19
|
+
## Design sketch
|
|
20
|
+
|
|
21
|
+
- Wrap the returned stream/iterator in a `_StreamingResponseWrapper`.
|
|
22
|
+
- On each `__next__`, append to an in-memory list on the wrapper.
|
|
23
|
+
- On `__exit__` / `StopIteration`, write the accumulated chunks into `spans.output_json` as a list, plus a `chunks_received: N` attr.
|
|
24
|
+
- Be careful with the OpenAI streaming context manager (`with client.chat.completions.create(stream=True) as stream:`) — needs `__enter__` / `__exit__` proxies.
|
|
25
|
+
- Handle async streams (`async for chunk in stream`) too — `__aiter__` / `__anext__`.
|
|
26
|
+
|
|
27
|
+
## Pointers
|
|
28
|
+
|
|
29
|
+
- [`src/clustertrace/anthropic.py`](../../src/clustertrace/anthropic.py) `_WrappedMessages.create`.
|
|
30
|
+
- [`src/clustertrace/openai.py`](../../src/clustertrace/openai.py) `_WrappedCompletions.create`.
|
|
31
|
+
|
|
32
|
+
Estimated effort: 300 LOC + 200 LOC tests, a full day. The fiddly bits are sync vs async + context-manager vs iterator semantics.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: 'Wanted: native wrap_bedrock'
|
|
3
|
+
about: Help wanted — native AWS Bedrock wrapper for non-Anthropic-SDK paths
|
|
4
|
+
title: 'Add wrap_bedrock for the boto3 Bedrock runtime client'
|
|
5
|
+
labels: enhancement, good first issue, help wanted
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What
|
|
10
|
+
|
|
11
|
+
A native `clustertrace.wrap_bedrock(client)` that wraps `boto3.client("bedrock-runtime")` and logs `invoke_model` / `invoke_model_with_response_stream` calls.
|
|
12
|
+
|
|
13
|
+
## Why
|
|
14
|
+
|
|
15
|
+
Today, Bedrock works through `wrap_anthropic(AnthropicBedrock())` — but a lot of users hit Bedrock via the raw `boto3` client, which doesn't expose `.messages.create`. A native wrapper closes the gap and broadens addressable users meaningfully.
|
|
16
|
+
|
|
17
|
+
## Pointers
|
|
18
|
+
|
|
19
|
+
- Mirror the pattern in [`src/clustertrace/anthropic.py`](../../src/clustertrace/anthropic.py): `_record_*_span`, `_finish`, `_WrappedClient`.
|
|
20
|
+
- Parse Bedrock's `body` (it's a JSON-encoded string) to extract the model, input, output, and usage.
|
|
21
|
+
- Add `boto3` as an optional install extra: `clustertrace[bedrock]`.
|
|
22
|
+
- Add a price entry in [`src/clustertrace/cost.py`](../../src/clustertrace/cost.py) for the Bedrock model IDs you support (`anthropic.claude-*-v2`, `meta.llama3-*`, etc.).
|
|
23
|
+
- Write `tests/test_wrap_bedrock.py` against a `FakeBedrockClient` (no AWS credentials needed).
|
|
24
|
+
|
|
25
|
+
[CONTRIBUTING.md](../../CONTRIBUTING.md) has the step-by-step recipe.
|
|
26
|
+
|
|
27
|
+
Estimated effort: 250 LOC code + 150 LOC tests, half a day.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: 'Wanted: native wrap_gemini'
|
|
3
|
+
about: Help wanted — native Google Gemini wrapper
|
|
4
|
+
title: 'Add wrap_gemini for google-genai SDK'
|
|
5
|
+
labels: enhancement, good first issue, help wanted
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What
|
|
10
|
+
|
|
11
|
+
`clustertrace.wrap_gemini(client)` wrapping the `google.genai.Client.models.generate_content` (and the streaming + async variants).
|
|
12
|
+
|
|
13
|
+
## Why
|
|
14
|
+
|
|
15
|
+
Gemini works through OpenTelemetry today, but a native wrapper:
|
|
16
|
+
- captures fields the OTel auto-instrumentor doesn't (function-calling tool args, safety settings)
|
|
17
|
+
- lands costs from the bundled pricing table without needing the OTel `gen_ai.*` attribute mapping
|
|
18
|
+
- gives a more accurate `messages.create`-style span shape
|
|
19
|
+
|
|
20
|
+
## Pointers
|
|
21
|
+
|
|
22
|
+
- Mirror [`src/clustertrace/openai.py`](../../src/clustertrace/openai.py).
|
|
23
|
+
- Watch out for sync vs async (`Client` vs `AsyncClient`).
|
|
24
|
+
- Add `google-genai` as an extra: `clustertrace[gemini]`.
|
|
25
|
+
- Add prices to [`src/clustertrace/cost.py`](../../src/clustertrace/cost.py) — `gemini-2.5-pro`, `gemini-2.5-flash`, etc.
|
|
26
|
+
- Tests in `tests/test_wrap_gemini.py` against a `FakeGeminiClient`.
|
|
27
|
+
|
|
28
|
+
[CONTRIBUTING.md](../../CONTRIBUTING.md) has the recipe.
|
|
29
|
+
|
|
30
|
+
Estimated effort: 250 LOC code + 150 LOC tests, half a day.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
<!-- Thanks for sending a PR. Keep PRs small — one logical change per PR. -->
|
|
2
|
+
|
|
3
|
+
## What this changes
|
|
4
|
+
|
|
5
|
+
<!-- One or two sentences. -->
|
|
6
|
+
|
|
7
|
+
## Why
|
|
8
|
+
|
|
9
|
+
<!-- What problem does this solve? Link issue if relevant. -->
|
|
10
|
+
|
|
11
|
+
## How
|
|
12
|
+
|
|
13
|
+
<!-- Brief overview of the implementation choice and any trade-offs. -->
|
|
14
|
+
|
|
15
|
+
## Checklist
|
|
16
|
+
|
|
17
|
+
- [ ] `ruff check .` is clean
|
|
18
|
+
- [ ] `pytest -q` is green
|
|
19
|
+
- [ ] Added/updated tests for new behaviour
|
|
20
|
+
- [ ] Updated README if the public API or pitch changed
|
|
21
|
+
- [ ] If adding a new SDK wrapper, followed the steps in CONTRIBUTING.md
|
|
22
|
+
- [ ] Appended a line to `STATUS.md`
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: publish to pypi
|
|
2
|
+
|
|
3
|
+
# Triggers on any v* tag push. Uses PyPI's OIDC trusted-publisher flow —
|
|
4
|
+
# no API tokens stored in the repo. Set up once at
|
|
5
|
+
# https://pypi.org/manage/account/publishing/ with:
|
|
6
|
+
# owner=harrywinter06 repo=clustertrace
|
|
7
|
+
# workflow=publish.yml environment=pypi project=clustertrace
|
|
8
|
+
|
|
9
|
+
on:
|
|
10
|
+
push:
|
|
11
|
+
tags:
|
|
12
|
+
- "v*"
|
|
13
|
+
workflow_dispatch:
|
|
14
|
+
|
|
15
|
+
permissions:
|
|
16
|
+
contents: read
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
build:
|
|
20
|
+
name: Build wheel + sdist
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- uses: astral-sh/setup-uv@v3
|
|
26
|
+
with:
|
|
27
|
+
enable-cache: false # uv.lock is gitignored
|
|
28
|
+
|
|
29
|
+
- name: Set up Python
|
|
30
|
+
run: uv python install 3.12
|
|
31
|
+
|
|
32
|
+
- name: Build sdist + wheel
|
|
33
|
+
# `uv build` produces both without needing a venv or --system.
|
|
34
|
+
run: uv build
|
|
35
|
+
|
|
36
|
+
- name: Show artifacts
|
|
37
|
+
run: ls -la dist/
|
|
38
|
+
|
|
39
|
+
- name: Upload artifacts
|
|
40
|
+
uses: actions/upload-artifact@v4
|
|
41
|
+
with:
|
|
42
|
+
name: dist
|
|
43
|
+
path: dist/
|
|
44
|
+
|
|
45
|
+
publish:
|
|
46
|
+
name: Publish to PyPI
|
|
47
|
+
needs: build
|
|
48
|
+
runs-on: ubuntu-latest
|
|
49
|
+
# Restrict OIDC to a named environment so a stolen token can't
|
|
50
|
+
# publish without an approval step on protected releases.
|
|
51
|
+
environment:
|
|
52
|
+
name: pypi
|
|
53
|
+
url: https://pypi.org/project/clustertrace/
|
|
54
|
+
permissions:
|
|
55
|
+
id-token: write # required for OIDC
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/download-artifact@v4
|
|
58
|
+
with:
|
|
59
|
+
name: dist
|
|
60
|
+
path: dist/
|
|
61
|
+
|
|
62
|
+
- name: Publish to PyPI
|
|
63
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
64
|
+
# No username/password — OIDC token is exchanged automatically.
|
|
65
|
+
# PyPI verifies the workflow filename + repo + environment match
|
|
66
|
+
# what you configured on the trusted-publisher page.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
name: github release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
tag:
|
|
10
|
+
description: "Tag name (e.g. v0.5.0)"
|
|
11
|
+
required: true
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: write # required to create releases
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
release:
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
with:
|
|
22
|
+
fetch-depth: 0 # need the full history for prev-tag lookup
|
|
23
|
+
|
|
24
|
+
- name: Extract release notes from CHANGELOG
|
|
25
|
+
id: notes
|
|
26
|
+
run: |
|
|
27
|
+
set -euo pipefail
|
|
28
|
+
if [ -n "${{ inputs.tag }}" ]; then
|
|
29
|
+
TAG="${{ inputs.tag }}"
|
|
30
|
+
else
|
|
31
|
+
TAG="${GITHUB_REF#refs/tags/}"
|
|
32
|
+
fi
|
|
33
|
+
VERSION="${TAG#v}"
|
|
34
|
+
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
|
|
35
|
+
# Pull the section between "## [<version>]" and the next "## [" header.
|
|
36
|
+
NOTES=$(awk -v ver="$VERSION" '
|
|
37
|
+
/^## \[/ {
|
|
38
|
+
if (in_section) exit
|
|
39
|
+
if ($0 ~ "## \\["ver"\\]") { in_section=1; next }
|
|
40
|
+
}
|
|
41
|
+
in_section { print }
|
|
42
|
+
' CHANGELOG.md)
|
|
43
|
+
if [ -z "$NOTES" ]; then
|
|
44
|
+
echo "no CHANGELOG section found for $VERSION; falling back to tag message"
|
|
45
|
+
NOTES=$(git tag -l --format="%(contents)" "$TAG")
|
|
46
|
+
fi
|
|
47
|
+
{
|
|
48
|
+
echo "notes<<EOF"
|
|
49
|
+
echo "$NOTES"
|
|
50
|
+
echo ""
|
|
51
|
+
echo "---"
|
|
52
|
+
echo ""
|
|
53
|
+
echo "Install: \`pip install clustertrace==$VERSION\`"
|
|
54
|
+
echo "Try: \`clustertrace demo\`"
|
|
55
|
+
echo "EOF"
|
|
56
|
+
} >> "$GITHUB_OUTPUT"
|
|
57
|
+
|
|
58
|
+
- name: Create GitHub Release
|
|
59
|
+
uses: softprops/action-gh-release@v2
|
|
60
|
+
with:
|
|
61
|
+
tag_name: ${{ steps.notes.outputs.tag || github.ref_name }}
|
|
62
|
+
name: ${{ steps.notes.outputs.tag || github.ref_name }}
|
|
63
|
+
body: ${{ steps.notes.outputs.notes }}
|
|
64
|
+
draft: false
|
|
65
|
+
prerelease: ${{ contains(github.ref_name, 'rc') || contains(github.ref_name, 'beta') || contains(github.ref_name, 'alpha') }}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, master]
|
|
6
|
+
pull_request:
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
name: Python ${{ matrix.python }} on ${{ matrix.os }}
|
|
12
|
+
runs-on: ${{ matrix.os }}
|
|
13
|
+
strategy:
|
|
14
|
+
fail-fast: false
|
|
15
|
+
matrix:
|
|
16
|
+
python: ["3.11", "3.12", "3.13"]
|
|
17
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Install uv
|
|
22
|
+
uses: astral-sh/setup-uv@v3
|
|
23
|
+
with:
|
|
24
|
+
enable-cache: false # uv.lock is gitignored
|
|
25
|
+
|
|
26
|
+
- name: Set up Python ${{ matrix.python }}
|
|
27
|
+
run: uv python install ${{ matrix.python }}
|
|
28
|
+
|
|
29
|
+
- name: Create venv + install
|
|
30
|
+
# Use a venv (not --system) because the runner's host Python is
|
|
31
|
+
# externally managed (PEP 668) on Ubuntu/macOS.
|
|
32
|
+
run: |
|
|
33
|
+
uv venv --python ${{ matrix.python }}
|
|
34
|
+
uv pip install -e ".[anthropic,openai,dev]"
|
|
35
|
+
|
|
36
|
+
- name: Lint
|
|
37
|
+
run: uv run ruff check src tests examples
|
|
38
|
+
|
|
39
|
+
- name: Run tests
|
|
40
|
+
run: uv run pytest -q
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.pyc
|
|
3
|
+
*.pyo
|
|
4
|
+
*.pyd
|
|
5
|
+
.Python
|
|
6
|
+
*.egg-info/
|
|
7
|
+
.eggs/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
env/
|
|
13
|
+
.env
|
|
14
|
+
*.db
|
|
15
|
+
*.db-journal
|
|
16
|
+
*.db-wal
|
|
17
|
+
*.db-shm
|
|
18
|
+
.pytest_cache/
|
|
19
|
+
.ruff_cache/
|
|
20
|
+
.coverage
|
|
21
|
+
.idea/
|
|
22
|
+
.vscode/
|
|
23
|
+
*.log
|
|
24
|
+
.DS_Store
|
|
25
|
+
Thumbs.db
|
|
26
|
+
uv.lock
|
|
27
|
+
demo-traces/
|
|
28
|
+
|
|
29
|
+
# Internal strategy docs - not for public consumption
|
|
30
|
+
_internal/
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# ARCHITECTURE
|
|
2
|
+
|
|
3
|
+
This document explains the design choices in clustertrace. It's intentionally short — the codebase is small enough that you can read it end-to-end in an hour. Read this first.
|
|
4
|
+
|
|
5
|
+
## Storage
|
|
6
|
+
|
|
7
|
+
One SQLite file, WAL mode, schema versioned by a `schema_meta.version` row and a list of migration scripts in `storage._MIGRATIONS`. Adding a column or table = append a new script and bump `_SCHEMA_VERSION`. Migrations are applied in order, exactly once, by `_ensure_initialized` on first connect.
|
|
8
|
+
|
|
9
|
+
There are five tables:
|
|
10
|
+
|
|
11
|
+
| Table | Purpose |
|
|
12
|
+
|---|---|
|
|
13
|
+
| `traces` | one row per top-level trace (status, duration, signature, cost) |
|
|
14
|
+
| `spans` | every span in every trace, parent-linked |
|
|
15
|
+
| `trace_tags` | key/value tags filterable in the dashboard |
|
|
16
|
+
| `trace_metrics` | numeric metrics from `clustertrace.metric()` for time-series charts |
|
|
17
|
+
| `spans_fts` | FTS5 virtual table mirroring span name + I/O + error_message |
|
|
18
|
+
|
|
19
|
+
The FTS5 table is kept in sync via three triggers (insert/update/delete on `spans`). When migration v3 runs on an existing DB, the table is backfilled inline.
|
|
20
|
+
|
|
21
|
+
## Tracing
|
|
22
|
+
|
|
23
|
+
`@clustertrace.trace` opens a span when the function is called and closes it on return or exception. Parent linkage uses `contextvars` — a `ContextVar` for `trace_id` and one for `span_id`. This is what makes async work without manual plumbing: `contextvars` propagate across `await` boundaries and `asyncio.gather`, so a span opened in a parent task is visible to spans opened in child tasks.
|
|
24
|
+
|
|
25
|
+
The decorator distinguishes sync vs. async functions via `inspect.iscoroutinefunction(fn)` and dispatches to different code paths (`_sync_call_span` and `_AsyncCallSpan`) so that we never accidentally `await` a non-coroutine or block on a coroutine.
|
|
26
|
+
|
|
27
|
+
## Signatures and clustering
|
|
28
|
+
|
|
29
|
+
The differentiator. A trace's *signature* is the ordered sequence of `(normalized_name, status)` pairs from its non-root spans, with consecutive duplicates collapsed (RLE). Two traces with the same signature took the same execution path. Clustering by signature reveals the distinct ways an agent runs.
|
|
30
|
+
|
|
31
|
+
Three design choices worth justifying:
|
|
32
|
+
|
|
33
|
+
**Why exact-string equality, not fuzzy?** Trade-offs in v0.3:
|
|
34
|
+
|
|
35
|
+
| Approach | Pros | Cons |
|
|
36
|
+
|---|---|---|
|
|
37
|
+
| Exact ordered + RLE | Cheap, deterministic, easy to explain | Reorderings split clusters |
|
|
38
|
+
| Set-of-edges | Reorder-insensitive | Loses order information that's often diagnostic |
|
|
39
|
+
| Tree-edit distance | Captures intuitive similarity | O(n²) per pair, expensive at scale |
|
|
40
|
+
| Learned embeddings | Captures semantic similarity | Adds ML dep, opaque, harder to debug |
|
|
41
|
+
|
|
42
|
+
For v0.3 we go with the simple approach. Reorder-insensitive clustering is a real gap and is on the v0.4 list. Fuzzy clustering on top of the exact signatures is straightforward to layer in later.
|
|
43
|
+
|
|
44
|
+
**Why normalize LLM-call names by provider, not by model?** Because changing models is the most common thing an agent dev does, and we don't want every prompt-tuning iteration to fragment your clusters into N×(number of models) groups. The model is preserved in span attributes; clustering uses the provider prefix only.
|
|
45
|
+
|
|
46
|
+
**Why collapse consecutive duplicates (RLE)?** Agents loop. "Fetched 3 papers" and "fetched 5 papers" are the same execution shape. RLE collapse keeps the cluster count manageable.
|
|
47
|
+
|
|
48
|
+
The cost: RLE means "loop ran 3 times" and "loop ran 30 times" look identical. There's no flag to disable it yet; if you need to distinguish, the per-span data is still in the spans table.
|
|
49
|
+
|
|
50
|
+
## OpenTelemetry ingestion
|
|
51
|
+
|
|
52
|
+
`clustertrace.otel.ClustertraceSpanExporter` implements the OTel exporter protocol (`export`, `shutdown`, `force_flush`) and writes received spans directly into the SQLite store. The mapping:
|
|
53
|
+
|
|
54
|
+
- OTel `trace_id` (16 bytes → 32 hex chars) → `traces.id`
|
|
55
|
+
- OTel `span_id` (8 bytes → 16 hex chars) → `spans.id`
|
|
56
|
+
- OTel `attributes` → `spans.attrs_json`
|
|
57
|
+
- OTel `gen_ai.*` / `llm.*` attributes → mapped onto clustertrace's conventions so the cost module works
|
|
58
|
+
- OTel `events` with `name=exception` → `spans.error_type` / `error_message`
|
|
59
|
+
- OTel parent context → `spans.parent_id`
|
|
60
|
+
|
|
61
|
+
Why ingestion, not export? The clustering page only works if all your traces live in one store. We let users keep their existing OTel instrumentation and route it to us.
|
|
62
|
+
|
|
63
|
+
## Cost
|
|
64
|
+
|
|
65
|
+
`clustertrace.cost.PRICING` is a dict keyed by model id mapping to `(input_per_million, output_per_million)` in USD. `estimate_span_cost(attrs)` looks up the model (exact match → date-suffix strip → longest-prefix match) and multiplies by the captured input/output token counts.
|
|
66
|
+
|
|
67
|
+
`cost.backfill()` walks every `kind='llm_call'` span, computes its cost, writes it to `spans.cost_usd`, then rolls up per trace into `traces.cost_usd`. The dashboard surfaces these in the recent-traces list, the trace detail page, the snapshot HTML, and the header.
|
|
68
|
+
|
|
69
|
+
Users can override or extend the pricing table at runtime via `$CLUSTERTRACE_PRICING_JSON='{"some-model": [2.0, 10.0]}'`.
|
|
70
|
+
|
|
71
|
+
## Replay
|
|
72
|
+
|
|
73
|
+
`clustertrace replay <trace_id> --entry module:function` imports the named entrypoint, reads the captured `args`/`kwargs` from the trace's root span input, and re-invokes the function. The new trace is tagged `replay_of=<original_id>` so the dashboard can pair them.
|
|
74
|
+
|
|
75
|
+
Limitations are intentional:
|
|
76
|
+
|
|
77
|
+
- The entrypoint module must be importable in the current Python env (you can't replay a trace from a different codebase without putting that code on the path).
|
|
78
|
+
- Non-JSON-serializable args (file handles, sockets) don't round-trip.
|
|
79
|
+
- Truncated root inputs (the `__truncated` marker) refuse to replay because the result wouldn't match the original.
|
|
80
|
+
|
|
81
|
+
Replay-with-modified-prompt is the natural extension — same machinery, but with a stage that lets you edit the captured kwargs before the re-invocation. v0.4 target.
|
|
82
|
+
|
|
83
|
+
## Wrappers
|
|
84
|
+
|
|
85
|
+
`wrap_anthropic` and `wrap_openai` are explicit: you pass an instance, you get a wrapped instance back. No global monkey-patching, no import-time side effects. The wrapper detects sync vs. async by class name (`AsyncAnthropic`, `AsyncOpenAI`) and exposes `.messages.create` (Anthropic) or `.chat.completions.create` (OpenAI). Both providers' Bedrock and Vertex clients (`AnthropicBedrock`, `AnthropicVertex`) work through `wrap_anthropic` for free because they expose the same `.messages.create` interface.
|
|
86
|
+
|
|
87
|
+
## Dashboard
|
|
88
|
+
|
|
89
|
+
FastAPI + vanilla HTML + small inline JS. No build step, no framework. Static files served from `clustertrace/dashboard/static/`. Templates are Jinja2. The frontend uses `fetch()` for JSON endpoints and renders dynamically.
|
|
90
|
+
|
|
91
|
+
| Page | Purpose |
|
|
92
|
+
|---|---|
|
|
93
|
+
| `/` | recent traces with status/tag/name-search filters and live polling |
|
|
94
|
+
| `/clusters` | execution patterns, failure rates, common failure prefix |
|
|
95
|
+
| `/search` | FTS5 search over span I/O and errors |
|
|
96
|
+
| `/metrics` | per-metric aggregates and rolling-mean sparklines |
|
|
97
|
+
| `/failures` | per-span error-rate bars + force-directed call graph |
|
|
98
|
+
| `/trace/<id>` | Gantt timeline + expandable I/O + tags + metrics |
|
|
99
|
+
|
|
100
|
+
## What's intentionally absent
|
|
101
|
+
|
|
102
|
+
- **No auth.** Local-first means one user.
|
|
103
|
+
- **No retention policy.** Delete `~/.clustertrace/traces.db` when you want to start fresh; export with `clustertrace export --all` first if you want a backup.
|
|
104
|
+
- **No alerting.** Alerts belong with a production observability stack, not a debug tool.
|
|
105
|
+
- **No streaming chunk capture.** We log on completion. Streaming sequences add complexity that doesn't justify itself for debugging.
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to clustertrace. Format roughly follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); semver applies.
|
|
4
|
+
|
|
5
|
+
> **Renamed from `agentlog` to `clustertrace` in v0.5.0** — PyPI's name-similarity check rejected `agentlog` as too close to the existing `agentlogger` package. The new name lands the differentiator (clustering of traces) more directly anyway.
|
|
6
|
+
|
|
7
|
+
## [0.5.0] — 2026-05-20
|
|
8
|
+
|
|
9
|
+
### Breaking
|
|
10
|
+
- **Package renamed `agentlog` → `clustertrace`.** Update your imports: `from agentlog import ...` → `from clustertrace import ...`. The decorator (`@clustertrace.trace`), wrappers (`clustertrace.wrap_anthropic`, `clustertrace.wrap_openai`), exporter (`ClustertraceSpanExporter`), and CLI (`clustertrace dashboard`, `clustertrace demo`) all use the new name.
|
|
11
|
+
- **Environment variables renamed**: `AGENTLOG_DB` → `CLUSTERTRACE_DB`, `AGENTLOG_MAX_PAYLOAD_BYTES` → `CLUSTERTRACE_MAX_PAYLOAD_BYTES`, `AGENTLOG_PRICING_JSON` → `CLUSTERTRACE_PRICING_JSON`, `AGENTLOG_SAMPLE_RATE` → `CLUSTERTRACE_SAMPLE_RATE`.
|
|
12
|
+
- **Default DB path** moved from `~/.agentlog/traces.db` to `~/.clustertrace/traces.db`. If you have existing traces, set `CLUSTERTRACE_DB=~/.agentlog/traces.db` or `mv ~/.agentlog ~/.clustertrace`.
|
|
13
|
+
- **Export format header key** changed from `_agentlog_header` to `_clustertrace_header`. The bundled demo dataset was re-exported under the new key. Old export files will fail to import — re-export from the old version if you need them.
|
|
14
|
+
|
|
15
|
+
### Why the rename
|
|
16
|
+
PyPI's ultranormalization rejected `agentlog` as too similar to the existing `agentlogger` (0.1.2) package. Rather than fight the similarity check, we picked `clustertrace` — a name that lands the actual differentiator (structural clustering of traces) more clearly. PyPI confirmed `clustertrace` is free.
|
|
17
|
+
|
|
18
|
+
### Same
|
|
19
|
+
Everything functional from v0.4.2 is unchanged — same `@trace` decorator API, same dashboard pages, same clustering algorithm, same OpenTelemetry exporter, same wrappers, same 95-test suite (still 95/95 passing under the new name).
|
|
20
|
+
|
|
21
|
+
## [0.4.2] — 2026-05-20
|
|
22
|
+
|
|
23
|
+
### Fixed
|
|
24
|
+
- **Windows cp1252 crash on `clustertrace demo`** — the CLI used Unicode arrows (`→`) and bullets (`·`) in its output, which crashed on a fresh Windows install where Python's stdout defaults to cp1252. Found in a first-5-minutes UX test. CLI output is now ASCII-only; regression test enforces this.
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
- **`publish.yml` workflow** — pushes a tag, builds the wheel + sdist, publishes to PyPI via OIDC trusted publisher (no API token stored). One-time setup at https://pypi.org/manage/account/publishing/ then `git push --tags` releases.
|
|
28
|
+
- **`release.yml` workflow** — same trigger creates a GitHub Release with notes auto-extracted from the matching `CHANGELOG.md` section.
|
|
29
|
+
- **`OUTREACH.md`** — researched, named-target list: 6 awesome-lists with categories, 9 AI infra bloggers/outlets with specific pitch angles, 9 Slack/Discord communities with channel names, named individuals worth engaging.
|
|
30
|
+
- **README hero restructured** to match the patterns of higher-star competitors (Langfuse 27k, Phoenix 9.8k, Helicone 5.7k): centered banner image, tagline, badges row including PyPI; install command before features.
|
|
31
|
+
|
|
32
|
+
### Improved
|
|
33
|
+
- `LAUNCH.md` updated with researched data: 605-post Show HN survival study (1% survive 7 days on front page), 23,000-post timing analysis confirming weekday US morning slots, plus the contrarian Sunday-late-evening option. Specific times now grounded, not generic.
|
|
34
|
+
|
|
35
|
+
## [0.4.1] — 2026-05-20
|
|
36
|
+
|
|
37
|
+
Launch-readiness release. Code is stable from 0.4.0; new content + integration examples.
|
|
38
|
+
|
|
39
|
+
### Added
|
|
40
|
+
- `examples/langchain_example.py` — five-line OpenTelemetry path showing LangChain → clustertrace.
|
|
41
|
+
- `examples/llamaindex_example.py` — same for LlamaIndex.
|
|
42
|
+
- `examples/benchmark.py` — overhead-per-trace measurements; prints a Markdown table you can paste.
|
|
43
|
+
- `docs/hero.svg` — inline-renderable hero image for the README (no external host).
|
|
44
|
+
- `LAUNCH.md` — internal launch playbook with Show HN draft, Twitter thread, outreach templates.
|
|
45
|
+
- Pinned `good first issue` templates for `wrap_bedrock`, `wrap_gemini`, streaming-chunk capture, and tree-edit-distance clustering.
|
|
46
|
+
- "Overhead" section in the README with honest numbers and the sampling/skip escape hatches.
|
|
47
|
+
|
|
48
|
+
## [0.4.0] — 2026-05-20
|
|
49
|
+
|
|
50
|
+
### Performance
|
|
51
|
+
- **Thread-local connection pool** — 200 concurrent async traces now finish in ~9s instead of ~55s (6× speedup). Previously each storage helper opened a fresh SQLite connection with PRAGMA setup; now the connection is reused within a thread.
|
|
52
|
+
|
|
53
|
+
### Added
|
|
54
|
+
- **`signature_for_spans(mode='set')`** — reorder-insensitive cluster signature. `A→B` and `B→A` collapse to one cluster. Dashboard `/clusters` page now has a mode toggle.
|
|
55
|
+
- **Pagination on `/api/clusters`** — `limit` + `offset` parameters; "Load more" button on the page.
|
|
56
|
+
- **Auto-cost** — `finish_span` now estimates and stores `cost_usd` automatically for any span with a known LLM model + token counts. `finish_trace` rolls up the per-span costs into the trace's `cost_usd`. No more manual `clustertrace backfill-cost`.
|
|
57
|
+
- **`@trace(sample=0.1)` + `@trace(skip=True)`** — production-grade sampling. `skip=True` returns the original function unwrapped (zero overhead). `sample` accepts a float in (0, 1] or reads `$CLUSTERTRACE_SAMPLE_RATE`. Sampling is bypassed inside an active trace so child spans are always recorded.
|
|
58
|
+
- **`clustertrace.flush()` + `atexit` hook** — orphan `running` traces from crashes/kills are cleaned up automatically on process exit.
|
|
59
|
+
- **`clustertrace cleanup --stale-after 5m`** — explicit CLI for the same.
|
|
60
|
+
- **`clustertrace vacuum --older-than 30d [--dry-run]`** — retention policy. `ON DELETE CASCADE` removes spans, tags, metrics; `VACUUM` reclaims disk space.
|
|
61
|
+
- **Per-tag failure-prefix mining** — `failure_summary(group_by_tag='agent')` returns a separate longest-common-prefix per tag value. When you have multiple agents in the same DB, the global prefix is empty but the per-tag ones are diagnostic. The `/clusters` page shows them as labelled chip rows.
|
|
62
|
+
- **Streaming-aware attrs** — when you call `messages.create(stream=True)` or `chat.completions.create(stream=True)`, the span's attrs include `streaming: true` so you can filter or cluster on it. (Full chunk-by-chunk capture is v0.5.)
|
|
63
|
+
- **Versioned JSON exports** — `clustertrace export` emits a header line with `clustertrace_version` and `export_format_version`. `clustertrace import` refuses streams from a newer format than it supports.
|
|
64
|
+
|
|
65
|
+
### Fixed
|
|
66
|
+
- **OTel ingestion**: child-span errors now propagate to trace-level status. Previously OTel-ingested traces where a child errored but the root ended OK were silently classified as successful and excluded from the failure clusters.
|
|
67
|
+
|
|
68
|
+
### Tests
|
|
69
|
+
- 94 tests (was 75). New suite `tests/test_v04_features.py` covers connection pooling, auto-cost, set-mode clustering, sampling, per-tag prefix, streaming attrs, flush + cleanup, vacuum + duration parsing, versioned export.
|
|
70
|
+
- 88% line coverage maintained. Pyright clean. Ruff clean.
|
|
71
|
+
|
|
72
|
+
## [0.3.1] — 2026-05-20
|
|
73
|
+
|
|
74
|
+
### Added
|
|
75
|
+
- `clustertrace demo` command — zero-config trial: imports 60 bundled traces and launches the dashboard. No API key needed.
|
|
76
|
+
- 60-trace `demo-traces.jsonl` bundled in the wheel (3 agent topologies, real failure clustering).
|
|
77
|
+
- Empty-state guidance on the dashboard when there are no traces: `clustertrace demo`, instrument-your-code snippet, and OpenTelemetry one-liner.
|
|
78
|
+
- `py.typed` marker so type checkers respect clustertrace's type hints.
|
|
79
|
+
- GitHub Actions CI on push and PR — Python 3.11/3.12/3.13 × ubuntu/macOS/windows.
|
|
80
|
+
- Issue and PR templates.
|
|
81
|
+
- `CHANGELOG.md`, `SECURITY.md`.
|
|
82
|
+
|
|
83
|
+
### Documentation
|
|
84
|
+
- README hero ASCII cluster view + badges + comparison table + FAQ.
|
|
85
|
+
- Install-from-git instructions for the pre-PyPI window.
|
|
86
|
+
|
|
87
|
+
## [0.3.0] — 2026-05-20
|
|
88
|
+
|
|
89
|
+
### Added
|
|
90
|
+
- **OpenTelemetry exporter** (`clustertrace.otel.ClustertraceSpanExporter`) — any OTel-instrumented app pipes spans into clustertrace with one line. Maps `gen_ai.*` and `llm.*` attribute conventions.
|
|
91
|
+
- **Cost tracking** — pricing table for Anthropic, OpenAI, Gemini with date-suffix stripping. `$CLUSTERTRACE_PRICING_JSON` override. Cached per span + rolled up per trace.
|
|
92
|
+
- **Replay** — `clustertrace replay <id> --entry mod:fn` re-runs a trace with captured args. New trace tagged `replay_of=<original>`.
|
|
93
|
+
- **FTS5 search** across span name + input + output + error_message. Supports phrases, OR, NEAR. `/search` page.
|
|
94
|
+
- **`clustertrace.metric(name, value)`** — numeric metrics per trace, time-series sparkline charts on `/metrics`.
|
|
95
|
+
- **JSONL export/import** — `clustertrace export [--all]` and `clustertrace import`.
|
|
96
|
+
- **Self-contained HTML snapshots** — `clustertrace snapshot <id>` produces a single shareable file with no external assets.
|
|
97
|
+
- Schema migration v3: `trace_metrics` table, `cost_usd` columns, `spans_fts` virtual table with sync triggers.
|
|
98
|
+
- `ARCHITECTURE.md` (design choices and trade-offs), `CONTRIBUTING.md` (how to add an SDK wrapper).
|
|
99
|
+
|
|
100
|
+
### Changed
|
|
101
|
+
- README leads with the differentiator + cost + OTel + replay.
|
|
102
|
+
|
|
103
|
+
## [0.2.0] — 2026-05-20
|
|
104
|
+
|
|
105
|
+
### Added
|
|
106
|
+
- **Trace clustering by structural signature** — RLE-collapsed sequence of `(name, status)` pairs. `/clusters` page.
|
|
107
|
+
- **Longest common failure prefix** mining across failed traces.
|
|
108
|
+
- `clustertrace.tag(key, value)` and `@trace(tags={...})` for filterable tags.
|
|
109
|
+
- `wrap_openai` for sync + async OpenAI clients.
|
|
110
|
+
- Filter UI on the recent-traces page (status, tag, name search) with live polling.
|
|
111
|
+
- Per-field payload truncation (`CLUSTERTRACE_MAX_PAYLOAD_BYTES`, default 32 KB).
|
|
112
|
+
- Real schema migration runner (`schema_meta.version` + ordered DDL list). v1 → v2 adds `trace_tags`, `traces.signature`.
|
|
113
|
+
|
|
114
|
+
## [0.1.0] — 2026-05-20
|
|
115
|
+
|
|
116
|
+
### Added
|
|
117
|
+
- Initial release.
|
|
118
|
+
- `@clustertrace.trace` decorator (sync + async), `clustertrace.span(...)`, `clustertrace.tool_call(...)`.
|
|
119
|
+
- `wrap_anthropic` for sync + async Anthropic clients (works with `AnthropicBedrock` and `AnthropicVertex` via the shared `.messages.create` interface).
|
|
120
|
+
- SQLite storage at `~/.clustertrace/traces.db`, `CLUSTERTRACE_DB` override, WAL mode.
|
|
121
|
+
- FastAPI dashboard on port 7777: recent traces, trace timeline, failure-cluster visualization (per-span error rate + step-of-failure histogram + force-directed call graph).
|
|
122
|
+
- 18 tests.
|