clustertrace 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. clustertrace-0.5.0/.github/ISSUE_TEMPLATE/bug.yml +45 -0
  2. clustertrace-0.5.0/.github/ISSUE_TEMPLATE/feature.yml +23 -0
  3. clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_reorder_clustering.md +42 -0
  4. clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_streaming_capture.md +32 -0
  5. clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_wrap_bedrock.md +27 -0
  6. clustertrace-0.5.0/.github/ISSUE_TEMPLATE/good_first_issue_wrap_gemini.md +30 -0
  7. clustertrace-0.5.0/.github/pull_request_template.md +22 -0
  8. clustertrace-0.5.0/.github/workflows/publish.yml +66 -0
  9. clustertrace-0.5.0/.github/workflows/release.yml +65 -0
  10. clustertrace-0.5.0/.github/workflows/test.yml +40 -0
  11. clustertrace-0.5.0/.gitignore +30 -0
  12. clustertrace-0.5.0/ARCHITECTURE.md +105 -0
  13. clustertrace-0.5.0/CHANGELOG.md +122 -0
  14. clustertrace-0.5.0/CONTRIBUTING.md +88 -0
  15. clustertrace-0.5.0/LICENSE +21 -0
  16. clustertrace-0.5.0/PKG-INFO +224 -0
  17. clustertrace-0.5.0/README.md +192 -0
  18. clustertrace-0.5.0/SECURITY.md +30 -0
  19. clustertrace-0.5.0/STATUS.md +24 -0
  20. clustertrace-0.5.0/docs/hero.svg +94 -0
  21. clustertrace-0.5.0/examples/agents.py +250 -0
  22. clustertrace-0.5.0/examples/benchmark.py +119 -0
  23. clustertrace-0.5.0/examples/generate_demo_data.py +105 -0
  24. clustertrace-0.5.0/examples/langchain_example.py +65 -0
  25. clustertrace-0.5.0/examples/llamaindex_example.py +59 -0
  26. clustertrace-0.5.0/examples/sample-trace.html +193 -0
  27. clustertrace-0.5.0/pyproject.toml +64 -0
  28. clustertrace-0.5.0/scripts/preview_dashboard.py +8 -0
  29. clustertrace-0.5.0/src/clustertrace/__init__.py +58 -0
  30. clustertrace-0.5.0/src/clustertrace/_ctx.py +16 -0
  31. clustertrace-0.5.0/src/clustertrace/anthropic.py +197 -0
  32. clustertrace-0.5.0/src/clustertrace/cli.py +228 -0
  33. clustertrace-0.5.0/src/clustertrace/cluster.py +311 -0
  34. clustertrace-0.5.0/src/clustertrace/cost.py +132 -0
  35. clustertrace-0.5.0/src/clustertrace/dashboard/__init__.py +0 -0
  36. clustertrace-0.5.0/src/clustertrace/dashboard/app.py +404 -0
  37. clustertrace-0.5.0/src/clustertrace/dashboard/static/style.css +397 -0
  38. clustertrace-0.5.0/src/clustertrace/dashboard/templates/base.html +47 -0
  39. clustertrace-0.5.0/src/clustertrace/dashboard/templates/clusters.html +148 -0
  40. clustertrace-0.5.0/src/clustertrace/dashboard/templates/failures.html +159 -0
  41. clustertrace-0.5.0/src/clustertrace/dashboard/templates/index.html +142 -0
  42. clustertrace-0.5.0/src/clustertrace/dashboard/templates/metrics.html +77 -0
  43. clustertrace-0.5.0/src/clustertrace/dashboard/templates/search.html +55 -0
  44. clustertrace-0.5.0/src/clustertrace/dashboard/templates/trace.html +85 -0
  45. clustertrace-0.5.0/src/clustertrace/data/demo-traces.jsonl +61 -0
  46. clustertrace-0.5.0/src/clustertrace/export.py +163 -0
  47. clustertrace-0.5.0/src/clustertrace/maintenance.py +101 -0
  48. clustertrace-0.5.0/src/clustertrace/openai.py +198 -0
  49. clustertrace-0.5.0/src/clustertrace/otel.py +166 -0
  50. clustertrace-0.5.0/src/clustertrace/py.typed +0 -0
  51. clustertrace-0.5.0/src/clustertrace/replay.py +81 -0
  52. clustertrace-0.5.0/src/clustertrace/snapshot.py +179 -0
  53. clustertrace-0.5.0/src/clustertrace/storage.py +434 -0
  54. clustertrace-0.5.0/src/clustertrace/trace.py +426 -0
  55. clustertrace-0.5.0/tests/conftest.py +13 -0
  56. clustertrace-0.5.0/tests/test_cluster.py +123 -0
  57. clustertrace-0.5.0/tests/test_cost.py +95 -0
  58. clustertrace-0.5.0/tests/test_dashboard.py +166 -0
  59. clustertrace-0.5.0/tests/test_export_snapshot_replay.py +109 -0
  60. clustertrace-0.5.0/tests/test_metric.py +36 -0
  61. clustertrace-0.5.0/tests/test_migrations.py +87 -0
  62. clustertrace-0.5.0/tests/test_otel.py +203 -0
  63. clustertrace-0.5.0/tests/test_search.py +62 -0
  64. clustertrace-0.5.0/tests/test_tags.py +52 -0
  65. clustertrace-0.5.0/tests/test_trace_async.py +64 -0
  66. clustertrace-0.5.0/tests/test_trace_sync.py +90 -0
  67. clustertrace-0.5.0/tests/test_truncation.py +35 -0
  68. clustertrace-0.5.0/tests/test_v04_features.py +343 -0
  69. clustertrace-0.5.0/tests/test_wrap_anthropic.py +105 -0
  70. clustertrace-0.5.0/tests/test_wrap_openai.py +141 -0
@@ -0,0 +1,45 @@
1
+ name: Bug report
2
+ description: Something doesn't work the way the README says it does.
3
+ labels: ["bug"]
4
+ body:
5
+ - type: textarea
6
+ id: what-happened
7
+ attributes:
8
+ label: What happened?
9
+ description: Quick description of the broken behaviour and what you expected.
10
+ placeholder: "@clustertrace.trace on my async function logs duration=None instead of the actual ms."
11
+ validations:
12
+ required: true
13
+ - type: textarea
14
+ id: repro
15
+ attributes:
16
+ label: Minimal reproduction
17
+ description: A code snippet that triggers it. Include any flags / env vars (`CLUSTERTRACE_DB`, etc.).
18
+ render: python
19
+ validations:
20
+ required: true
21
+ - type: input
22
+ id: version
23
+ attributes:
24
+ label: clustertrace version
25
+ description: Output of `clustertrace --version`.
26
+ placeholder: "0.3.0"
27
+ validations:
28
+ required: true
29
+ - type: input
30
+ id: python
31
+ attributes:
32
+ label: Python version
33
+ placeholder: "3.12.4"
34
+ validations:
35
+ required: true
36
+ - type: input
37
+ id: os
38
+ attributes:
39
+ label: OS
40
+ placeholder: "macOS 14 / Ubuntu 22.04 / Windows 11"
41
+ - type: textarea
42
+ id: extras
43
+ attributes:
44
+ label: Anything else?
45
+ description: Logs, stack traces, full trace IDs, anything that helps narrow it down.
@@ -0,0 +1,23 @@
1
+ name: Feature request
2
+ description: Propose a new feature or behaviour change.
3
+ labels: ["enhancement"]
4
+ body:
5
+ - type: textarea
6
+ id: problem
7
+ attributes:
8
+ label: What problem are you trying to solve?
9
+ description: The concrete situation that made you wish clustertrace did this. Skip "wouldn't it be cool if…".
10
+ validations:
11
+ required: true
12
+ - type: textarea
13
+ id: proposal
14
+ attributes:
15
+ label: Proposed shape of the fix
16
+ description: API sketch, screenshot, or rough mechanism. Doesn't need to be final.
17
+ validations:
18
+ required: true
19
+ - type: textarea
20
+ id: alternatives
21
+ attributes:
22
+ label: What would you do instead today?
23
+ description: Workarounds, other tools that have it, why the workarounds aren't enough.
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: 'Wanted: tree-edit-distance clustering mode'
3
+ about: Help wanted — better clustering for traces that differ by reorderings
4
+ title: 'Add mode=''tree_edit'' to signature_for_spans (reorder + insertion tolerance)'
5
+ labels: enhancement, help wanted, algorithm
6
+ assignees: ''
7
+ ---
8
+
9
+ ## What
10
+
11
+ Today clustertrace has two clustering modes:
12
+ - `mode='ordered'` — exact-string equality on RLE-collapsed sequence
13
+ - `mode='set'` — sorted unique pairs (collapses all reorderings)
14
+
15
+ Both are extremes. We want a middle ground: a signature mode that tolerates small reorderings and insertions (e.g. one extra retry, two tool calls swapped) but still distinguishes structurally different paths.
16
+
17
+ ## Why
18
+
19
+ Real agents are noisy. With `mode='ordered'` you get 100+ clusters from 1000 traces (one extra retry creates a new cluster). With `mode='set'` you collapse everything that has the same step inventory regardless of order — too coarse.
20
+
21
+ Tree-edit-distance or Levenshtein on the step sequence would give a tunable threshold.
22
+
23
+ ## Design sketch
24
+
25
+ ```python
26
+ def signature_for_spans(spans, mode='ordered', edit_distance_threshold=None):
27
+ if mode == 'tree_edit':
28
+ # Compute a normalized step sequence
29
+ # Bucket traces whose pairwise edit distance < threshold
30
+ ...
31
+ ```
32
+
33
+ Storage implication: the canonical signature for a cluster becomes the "representative" sequence; non-canonical traces store a reference. This needs a `signature_canonical_id` column on traces (v4 migration).
34
+
35
+ ## Pointers
36
+
37
+ - [`src/clustertrace/cluster.py`](../../src/clustertrace/cluster.py).
38
+ - [Wagner-Fischer algorithm](https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm) is the obvious starting point.
39
+ - [Zhang-Shasha](https://epubs.siam.org/doi/10.1137/0218082) for the tree-edit-distance variant if you want hierarchical structure too.
40
+ - This is real algorithmic work; not a 4-hour PR.
41
+
42
+ Estimated effort: a weekend of focused work + a benchmark showing it's better than `mode='set'` on real data.
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: 'Wanted: streaming chunk capture'
3
+ about: Help wanted — capture streaming response chunks as they arrive
4
+ title: 'Capture streaming response chunks instead of completion-only'
5
+ labels: enhancement, help wanted
6
+ assignees: ''
7
+ ---
8
+
9
+ ## What
10
+
11
+ Today, when you call `messages.create(stream=True)` or `chat.completions.create(stream=True)`, clustertrace records the span on completion only and tags it with `streaming: true`. It does **not** capture the intermediate chunks.
12
+
13
+ We want: chunk-by-chunk capture so debugging an agent that gets stuck mid-stream (or fails partway through) is possible.
14
+
15
+ ## Why
16
+
17
+ Streaming is the default for most production agents. Without chunk capture, clustertrace is invisible to that workflow.
18
+
19
+ ## Design sketch
20
+
21
+ - Wrap the returned stream/iterator in a `_StreamingResponseWrapper`.
22
+ - On each `__next__`, append to an in-memory list on the wrapper.
23
+ - On `__exit__` / `StopIteration`, write the accumulated chunks into `spans.output_json` as a list, plus a `chunks_received: N` attr.
24
+ - Be careful with the OpenAI streaming context manager (`with client.chat.completions.create(stream=True) as stream:`) — needs `__enter__` / `__exit__` proxies.
25
+ - Handle async streams (`async for chunk in stream`) too — `__aiter__` / `__anext__`.
26
+
27
+ ## Pointers
28
+
29
+ - [`src/clustertrace/anthropic.py`](../../src/clustertrace/anthropic.py) `_WrappedMessages.create`.
30
+ - [`src/clustertrace/openai.py`](../../src/clustertrace/openai.py) `_WrappedCompletions.create`.
31
+
32
+ Estimated effort: 300 LOC + 200 LOC tests, a full day. The fiddly bits are sync vs async + context-manager vs iterator semantics.
@@ -0,0 +1,27 @@
1
+ ---
2
+ name: 'Wanted: native wrap_bedrock'
3
+ about: Help wanted — native AWS Bedrock wrapper for non-Anthropic-SDK paths
4
+ title: 'Add wrap_bedrock for the boto3 Bedrock runtime client'
5
+ labels: enhancement, good first issue, help wanted
6
+ assignees: ''
7
+ ---
8
+
9
+ ## What
10
+
11
+ A native `clustertrace.wrap_bedrock(client)` that wraps `boto3.client("bedrock-runtime")` and logs `invoke_model` / `invoke_model_with_response_stream` calls.
12
+
13
+ ## Why
14
+
15
+ Today, Bedrock works through `wrap_anthropic(AnthropicBedrock())` — but a lot of users hit Bedrock via the raw `boto3` client, which doesn't expose `.messages.create`. A native wrapper closes the gap and broadens addressable users meaningfully.
16
+
17
+ ## Pointers
18
+
19
+ - Mirror the pattern in [`src/clustertrace/anthropic.py`](../../src/clustertrace/anthropic.py): `_record_*_span`, `_finish`, `_WrappedClient`.
20
+ - Parse Bedrock's `body` (it's a JSON-encoded string) to extract the model, input, output, and usage.
21
+ - Add `boto3` as an optional install extra: `clustertrace[bedrock]`.
22
+ - Add a price entry in [`src/clustertrace/cost.py`](../../src/clustertrace/cost.py) for the Bedrock model IDs you support (`anthropic.claude-*-v2`, `meta.llama3-*`, etc.).
23
+ - Write `tests/test_wrap_bedrock.py` against a `FakeBedrockClient` (no AWS credentials needed).
24
+
25
+ [CONTRIBUTING.md](../../CONTRIBUTING.md) has the step-by-step recipe.
26
+
27
+ Estimated effort: 250 LOC code + 150 LOC tests, half a day.
@@ -0,0 +1,30 @@
1
+ ---
2
+ name: 'Wanted: native wrap_gemini'
3
+ about: Help wanted — native Google Gemini wrapper
4
+ title: 'Add wrap_gemini for google-genai SDK'
5
+ labels: enhancement, good first issue, help wanted
6
+ assignees: ''
7
+ ---
8
+
9
+ ## What
10
+
11
+ `clustertrace.wrap_gemini(client)` wrapping the `google.genai.Client.models.generate_content` (and the streaming + async variants).
12
+
13
+ ## Why
14
+
15
+ Gemini works through OpenTelemetry today, but a native wrapper:
16
+ - captures fields the OTel auto-instrumentor doesn't (function-calling tool args, safety settings)
17
+ - lands costs from the bundled pricing table without needing the OTel `gen_ai.*` attribute mapping
18
+ - gives a more accurate `messages.create`-style span shape
19
+
20
+ ## Pointers
21
+
22
+ - Mirror [`src/clustertrace/openai.py`](../../src/clustertrace/openai.py).
23
+ - Watch out for sync vs async (`Client` vs `AsyncClient`).
24
+ - Add `google-genai` as an extra: `clustertrace[gemini]`.
25
+ - Add prices to [`src/clustertrace/cost.py`](../../src/clustertrace/cost.py) — `gemini-2.5-pro`, `gemini-2.5-flash`, etc.
26
+ - Tests in `tests/test_wrap_gemini.py` against a `FakeGeminiClient`.
27
+
28
+ [CONTRIBUTING.md](../../CONTRIBUTING.md) has the recipe.
29
+
30
+ Estimated effort: 250 LOC code + 150 LOC tests, half a day.
@@ -0,0 +1,22 @@
1
+ <!-- Thanks for sending a PR. Keep PRs small — one logical change per PR. -->
2
+
3
+ ## What this changes
4
+
5
+ <!-- One or two sentences. -->
6
+
7
+ ## Why
8
+
9
+ <!-- What problem does this solve? Link issue if relevant. -->
10
+
11
+ ## How
12
+
13
+ <!-- Brief overview of the implementation choice and any trade-offs. -->
14
+
15
+ ## Checklist
16
+
17
+ - [ ] `ruff check .` is clean
18
+ - [ ] `pytest -q` is green
19
+ - [ ] Added/updated tests for new behaviour
20
+ - [ ] Updated README if the public API or pitch changed
21
+ - [ ] If adding a new SDK wrapper, followed the steps in CONTRIBUTING.md
22
+ - [ ] Appended a line to `STATUS.md`
@@ -0,0 +1,66 @@
1
+ name: publish to pypi
2
+
3
+ # Triggers on any v* tag push. Uses PyPI's OIDC trusted-publisher flow —
4
+ # no API tokens stored in the repo. Set up once at
5
+ # https://pypi.org/manage/account/publishing/ with:
6
+ # owner=harrywinter06 repo=clustertrace
7
+ # workflow=publish.yml environment=pypi project=clustertrace
8
+
9
+ on:
10
+ push:
11
+ tags:
12
+ - "v*"
13
+ workflow_dispatch:
14
+
15
+ permissions:
16
+ contents: read
17
+
18
+ jobs:
19
+ build:
20
+ name: Build wheel + sdist
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+
25
+ - uses: astral-sh/setup-uv@v3
26
+ with:
27
+ enable-cache: false # uv.lock is gitignored
28
+
29
+ - name: Set up Python
30
+ run: uv python install 3.12
31
+
32
+ - name: Build sdist + wheel
33
+ # `uv build` produces both without needing a venv or --system.
34
+ run: uv build
35
+
36
+ - name: Show artifacts
37
+ run: ls -la dist/
38
+
39
+ - name: Upload artifacts
40
+ uses: actions/upload-artifact@v4
41
+ with:
42
+ name: dist
43
+ path: dist/
44
+
45
+ publish:
46
+ name: Publish to PyPI
47
+ needs: build
48
+ runs-on: ubuntu-latest
49
+ # Restrict OIDC to a named environment so a stolen token can't
50
+ # publish without an approval step on protected releases.
51
+ environment:
52
+ name: pypi
53
+ url: https://pypi.org/project/clustertrace/
54
+ permissions:
55
+ id-token: write # required for OIDC
56
+ steps:
57
+ - uses: actions/download-artifact@v4
58
+ with:
59
+ name: dist
60
+ path: dist/
61
+
62
+ - name: Publish to PyPI
63
+ uses: pypa/gh-action-pypi-publish@release/v1
64
+ # No username/password — OIDC token is exchanged automatically.
65
+ # PyPI verifies the workflow filename + repo + environment match
66
+ # what you configured on the trusted-publisher page.
@@ -0,0 +1,65 @@
1
+ name: github release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+ inputs:
9
+ tag:
10
+ description: "Tag name (e.g. v0.5.0)"
11
+ required: true
12
+
13
+ permissions:
14
+ contents: write # required to create releases
15
+
16
+ jobs:
17
+ release:
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ with:
22
+ fetch-depth: 0 # need the full history for prev-tag lookup
23
+
24
+ - name: Extract release notes from CHANGELOG
25
+ id: notes
26
+ run: |
27
+ set -euo pipefail
28
+ if [ -n "${{ inputs.tag }}" ]; then
29
+ TAG="${{ inputs.tag }}"
30
+ else
31
+ TAG="${GITHUB_REF#refs/tags/}"
32
+ fi
33
+ VERSION="${TAG#v}"
34
+ echo "tag=$TAG" >> "$GITHUB_OUTPUT"
35
+ # Pull the section between "## [<version>]" and the next "## [" header.
36
+ NOTES=$(awk -v ver="$VERSION" '
37
+ /^## \[/ {
38
+ if (in_section) exit
39
+ if ($0 ~ "## \\["ver"\\]") { in_section=1; next }
40
+ }
41
+ in_section { print }
42
+ ' CHANGELOG.md)
43
+ if [ -z "$NOTES" ]; then
44
+ echo "no CHANGELOG section found for $VERSION; falling back to tag message"
45
+ NOTES=$(git tag -l --format="%(contents)" "$TAG")
46
+ fi
47
+ {
48
+ echo "notes<<EOF"
49
+ echo "$NOTES"
50
+ echo ""
51
+ echo "---"
52
+ echo ""
53
+ echo "Install: \`pip install clustertrace==$VERSION\`"
54
+ echo "Try: \`clustertrace demo\`"
55
+ echo "EOF"
56
+ } >> "$GITHUB_OUTPUT"
57
+
58
+ - name: Create GitHub Release
59
+ uses: softprops/action-gh-release@v2
60
+ with:
61
+ tag_name: ${{ steps.notes.outputs.tag || github.ref_name }}
62
+ name: ${{ steps.notes.outputs.tag || github.ref_name }}
63
+ body: ${{ steps.notes.outputs.notes }}
64
+ draft: false
65
+ prerelease: ${{ contains(github.ref_name, 'rc') || contains(github.ref_name, 'beta') || contains(github.ref_name, 'alpha') }}
@@ -0,0 +1,40 @@
1
+ name: tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ test:
11
+ name: Python ${{ matrix.python }} on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ python: ["3.11", "3.12", "3.13"]
17
+ os: [ubuntu-latest, macos-latest, windows-latest]
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Install uv
22
+ uses: astral-sh/setup-uv@v3
23
+ with:
24
+ enable-cache: false # uv.lock is gitignored
25
+
26
+ - name: Set up Python ${{ matrix.python }}
27
+ run: uv python install ${{ matrix.python }}
28
+
29
+ - name: Create venv + install
30
+ # Use a venv (not --system) because the runner's host Python is
31
+ # externally managed (PEP 668) on Ubuntu/macOS.
32
+ run: |
33
+ uv venv --python ${{ matrix.python }}
34
+ uv pip install -e ".[anthropic,openai,dev]"
35
+
36
+ - name: Lint
37
+ run: uv run ruff check src tests examples
38
+
39
+ - name: Run tests
40
+ run: uv run pytest -q
@@ -0,0 +1,30 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .Python
6
+ *.egg-info/
7
+ .eggs/
8
+ dist/
9
+ build/
10
+ .venv/
11
+ venv/
12
+ env/
13
+ .env
14
+ *.db
15
+ *.db-journal
16
+ *.db-wal
17
+ *.db-shm
18
+ .pytest_cache/
19
+ .ruff_cache/
20
+ .coverage
21
+ .idea/
22
+ .vscode/
23
+ *.log
24
+ .DS_Store
25
+ Thumbs.db
26
+ uv.lock
27
+ demo-traces/
28
+
29
+ # Internal strategy docs - not for public consumption
30
+ _internal/
@@ -0,0 +1,105 @@
1
+ # ARCHITECTURE
2
+
3
+ This document explains the design choices in clustertrace. It's intentionally short — the codebase is small enough that you can read it end-to-end in an hour. Read this first.
4
+
5
+ ## Storage
6
+
7
+ One SQLite file, WAL mode, schema versioned by a `schema_meta.version` row and a list of migration scripts in `storage._MIGRATIONS`. Adding a column or table = append a new script and bump `_SCHEMA_VERSION`. Migrations are applied in order, exactly once, by `_ensure_initialized` on first connect.
8
+
9
+ There are five tables:
10
+
11
+ | Table | Purpose |
12
+ |---|---|
13
+ | `traces` | one row per top-level trace (status, duration, signature, cost) |
14
+ | `spans` | every span in every trace, parent-linked |
15
+ | `trace_tags` | key/value tags filterable in the dashboard |
16
+ | `trace_metrics` | numeric metrics from `clustertrace.metric()` for time-series charts |
17
+ | `spans_fts` | FTS5 virtual table mirroring span name + I/O + error_message |
18
+
19
+ The FTS5 table is kept in sync via three triggers (insert/update/delete on `spans`). When migration v3 runs on an existing DB, the table is backfilled inline.
20
+
21
+ ## Tracing
22
+
23
+ `@clustertrace.trace` opens a span when the function is called and closes it on return or exception. Parent linkage uses `contextvars` — a `ContextVar` for `trace_id` and one for `span_id`. This is what makes async work without manual plumbing: `contextvars` propagate across `await` boundaries and `asyncio.gather`, so a span opened in a parent task is visible to spans opened in child tasks.
24
+
25
+ The decorator distinguishes sync vs. async functions via `inspect.iscoroutinefunction(fn)` and dispatches to different code paths (`_sync_call_span` and `_AsyncCallSpan`) so that we never accidentally `await` a non-coroutine or block on a coroutine.
26
+
27
+ ## Signatures and clustering
28
+
29
+ The differentiator. A trace's *signature* is the ordered sequence of `(normalized_name, status)` pairs from its non-root spans, with consecutive duplicates collapsed (RLE). Two traces with the same signature took the same execution path. Clustering by signature reveals the distinct ways an agent runs.
30
+
31
+ Three design choices worth justifying:
32
+
33
+ **Why exact-string equality, not fuzzy?** Trade-offs in v0.3:
34
+
35
+ | Approach | Pros | Cons |
36
+ |---|---|---|
37
+ | Exact ordered + RLE | Cheap, deterministic, easy to explain | Reorderings split clusters |
38
+ | Set-of-edges | Reorder-insensitive | Loses order information that's often diagnostic |
39
+ | Tree-edit distance | Captures intuitive similarity | O(n²) per pair, expensive at scale |
40
+ | Learned embeddings | Captures semantic similarity | Adds ML dep, opaque, harder to debug |
41
+
42
+ For v0.3 we go with the simple approach. Reorder-insensitive clustering is a real gap and is on the v0.4 list. Fuzzy clustering on top of the exact signatures is straightforward to layer in later.
43
+
44
+ **Why normalize LLM-call names by provider, not by model?** Because changing models is the most common thing an agent dev does, and we don't want every prompt-tuning iteration to fragment your clusters into N×(number of models) groups. The model is preserved in span attributes; clustering uses the provider prefix only.
45
+
46
+ **Why collapse consecutive duplicates (RLE)?** Agents loop. "Fetched 3 papers" and "fetched 5 papers" are the same execution shape. RLE collapse keeps the cluster count manageable.
47
+
48
+ The cost: RLE means "loop ran 3 times" and "loop ran 30 times" look identical. There's no flag to disable it yet; if you need to distinguish, the per-span data is still in the spans table.
49
+
50
+ ## OpenTelemetry ingestion
51
+
52
+ `clustertrace.otel.ClustertraceSpanExporter` implements the OTel exporter protocol (`export`, `shutdown`, `force_flush`) and writes received spans directly into the SQLite store. The mapping:
53
+
54
+ - OTel `trace_id` (16 bytes → 32 hex chars) → `traces.id`
55
+ - OTel `span_id` (8 bytes → 16 hex chars) → `spans.id`
56
+ - OTel `attributes` → `spans.attrs_json`
57
+ - OTel `gen_ai.*` / `llm.*` attributes → mapped onto clustertrace's conventions so the cost module works
58
+ - OTel `events` with `name=exception` → `spans.error_type` / `error_message`
59
+ - OTel parent context → `spans.parent_id`
60
+
61
+ Why ingestion, not export? The clustering page only works if all your traces live in one store. We let users keep their existing OTel instrumentation and route it to us.
62
+
63
+ ## Cost
64
+
65
+ `clustertrace.cost.PRICING` is a dict keyed by model id mapping to `(input_per_million, output_per_million)` in USD. `estimate_span_cost(attrs)` looks up the model (exact match → date-suffix strip → longest-prefix match) and multiplies by the captured input/output token counts.
66
+
67
+ `cost.backfill()` walks every `kind='llm_call'` span, computes its cost, writes it to `spans.cost_usd`, then rolls up per trace into `traces.cost_usd`. The dashboard surfaces these in the recent-traces list, the trace detail page, the snapshot HTML, and the header.
68
+
69
+ Users can override or extend the pricing table at runtime via `$CLUSTERTRACE_PRICING_JSON='{"some-model": [2.0, 10.0]}'`.
70
+
71
+ ## Replay
72
+
73
+ `clustertrace replay <trace_id> --entry module:function` imports the named entrypoint, reads the captured `args`/`kwargs` from the trace's root span input, and re-invokes the function. The new trace is tagged `replay_of=<original_id>` so the dashboard can pair them.
74
+
75
+ Limitations are intentional:
76
+
77
+ - The entrypoint module must be importable in the current Python env (you can't replay a trace from a different codebase without putting that code on the path).
78
+ - Non-JSON-serializable args (file handles, sockets) don't round-trip.
79
+ - Truncated root inputs (the `__truncated` marker) refuse to replay because the result wouldn't match the original.
80
+
81
+ Replay-with-modified-prompt is the natural extension — same machinery, but with a stage that lets you edit the captured kwargs before the re-invocation. v0.4 target.
82
+
83
+ ## Wrappers
84
+
85
+ `wrap_anthropic` and `wrap_openai` are explicit: you pass an instance, you get a wrapped instance back. No global monkey-patching, no import-time side effects. The wrapper detects sync vs. async by class name (`AsyncAnthropic`, `AsyncOpenAI`) and exposes `.messages.create` (Anthropic) or `.chat.completions.create` (OpenAI). Both providers' Bedrock and Vertex clients (`AnthropicBedrock`, `AnthropicVertex`) work through `wrap_anthropic` for free because they expose the same `.messages.create` interface.
86
+
87
+ ## Dashboard
88
+
89
+ FastAPI + vanilla HTML + small inline JS. No build step, no framework. Static files served from `clustertrace/dashboard/static/`. Templates are Jinja2. The frontend uses `fetch()` for JSON endpoints and renders dynamically.
90
+
91
+ | Page | Purpose |
92
+ |---|---|
93
+ | `/` | recent traces with status/tag/name-search filters and live polling |
94
+ | `/clusters` | execution patterns, failure rates, common failure prefix |
95
+ | `/search` | FTS5 search over span I/O and errors |
96
+ | `/metrics` | per-metric aggregates and rolling-mean sparklines |
97
+ | `/failures` | per-span error-rate bars + force-directed call graph |
98
+ | `/trace/<id>` | Gantt timeline + expandable I/O + tags + metrics |
99
+
100
+ ## What's intentionally absent
101
+
102
+ - **No auth.** Local-first means one user.
103
+ - **No retention policy.** Delete `~/.clustertrace/traces.db` when you want to start fresh; export with `clustertrace export --all` first if you want a backup.
104
+ - **No alerting.** Alerts belong with a production observability stack, not a debug tool.
105
+ - **No streaming chunk capture.** We log on completion. Streaming sequences add complexity that doesn't justify itself for debugging.
@@ -0,0 +1,122 @@
1
+ # Changelog
2
+
3
+ All notable changes to clustertrace. Format roughly follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); semver applies.
4
+
5
+ > **Renamed from `agentlog` to `clustertrace` in v0.5.0** — PyPI's name-similarity check rejected `agentlog` as too close to the existing `agentlogger` package. The new name lands the differentiator (clustering of traces) more directly anyway.
6
+
7
+ ## [0.5.0] — 2026-05-20
8
+
9
+ ### Breaking
10
+ - **Package renamed `agentlog` → `clustertrace`.** Update your imports: `from agentlog import ...` → `from clustertrace import ...`. The decorator (`@clustertrace.trace`), wrappers (`clustertrace.wrap_anthropic`, `clustertrace.wrap_openai`), exporter (`ClustertraceSpanExporter`), and CLI (`clustertrace dashboard`, `clustertrace demo`) all use the new name.
11
+ - **Environment variables renamed**: `AGENTLOG_DB` → `CLUSTERTRACE_DB`, `AGENTLOG_MAX_PAYLOAD_BYTES` → `CLUSTERTRACE_MAX_PAYLOAD_BYTES`, `AGENTLOG_PRICING_JSON` → `CLUSTERTRACE_PRICING_JSON`, `AGENTLOG_SAMPLE_RATE` → `CLUSTERTRACE_SAMPLE_RATE`.
12
+ - **Default DB path** moved from `~/.agentlog/traces.db` to `~/.clustertrace/traces.db`. If you have existing traces, set `CLUSTERTRACE_DB=~/.agentlog/traces.db` or `mv ~/.agentlog ~/.clustertrace`.
13
+ - **Export format header key** changed from `_agentlog_header` to `_clustertrace_header`. The bundled demo dataset was re-exported under the new key. Old export files will fail to import — re-export from the old version if you need them.
14
+
15
+ ### Why the rename
16
+ PyPI's ultranormalization rejected `agentlog` as too similar to the existing `agentlogger` (0.1.2) package. Rather than fight the similarity check, we picked `clustertrace` — a name that lands the actual differentiator (structural clustering of traces) more clearly. PyPI confirmed `clustertrace` is free.
17
+
18
+ ### Same
19
+ Everything functional from v0.4.2 is unchanged — same `@trace` decorator API, same dashboard pages, same clustering algorithm, same OpenTelemetry exporter, same wrappers, same 95-test suite (still 95/95 passing under the new name).
20
+
21
+ ## [0.4.2] — 2026-05-20
22
+
23
+ ### Fixed
24
+ - **Windows cp1252 crash on `clustertrace demo`** — the CLI used Unicode arrows (`→`) and bullets (`·`) in its output, which crashed on a fresh Windows install where Python's stdout defaults to cp1252. Found in a first-5-minutes UX test. CLI output is now ASCII-only; regression test enforces this.
25
+
26
+ ### Added
27
+ - **`publish.yml` workflow** — pushes a tag, builds the wheel + sdist, publishes to PyPI via OIDC trusted publisher (no API token stored). One-time setup at https://pypi.org/manage/account/publishing/ then `git push --tags` releases.
28
+ - **`release.yml` workflow** — same trigger creates a GitHub Release with notes auto-extracted from the matching `CHANGELOG.md` section.
29
+ - **`OUTREACH.md`** — researched, named-target list: 6 awesome-lists with categories, 9 AI infra bloggers/outlets with specific pitch angles, 9 Slack/Discord communities with channel names, named individuals worth engaging.
30
+ - **README hero restructured** to match the patterns of higher-star competitors (Langfuse 27k, Phoenix 9.8k, Helicone 5.7k): centered banner image, tagline, badges row including PyPI; install command before features.
31
+
32
+ ### Improved
33
+ - `LAUNCH.md` updated with researched data: 605-post Show HN survival study (1% survive 7 days on front page), 23,000-post timing analysis confirming weekday US morning slots, plus the contrarian Sunday-late-evening option. Specific times now grounded, not generic.
34
+
35
+ ## [0.4.1] — 2026-05-20
36
+
37
+ Launch-readiness release. Code is stable from 0.4.0; new content + integration examples.
38
+
39
+ ### Added
40
+ - `examples/langchain_example.py` — five-line OpenTelemetry path showing LangChain → clustertrace.
41
+ - `examples/llamaindex_example.py` — same for LlamaIndex.
42
+ - `examples/benchmark.py` — overhead-per-trace measurements; prints a Markdown table you can paste.
43
+ - `docs/hero.svg` — inline-renderable hero image for the README (no external host).
44
+ - `LAUNCH.md` — internal launch playbook with Show HN draft, Twitter thread, outreach templates.
45
+ - Pinned `good first issue` templates for `wrap_bedrock`, `wrap_gemini`, streaming-chunk capture, and tree-edit-distance clustering.
46
+ - "Overhead" section in the README with honest numbers and the sampling/skip escape hatches.
47
+
48
+ ## [0.4.0] — 2026-05-20
49
+
50
+ ### Performance
51
+ - **Thread-local connection pool** — 200 concurrent async traces now finish in ~9s instead of ~55s (6× speedup). Previously each storage helper opened a fresh SQLite connection with PRAGMA setup; now the connection is reused within a thread.
52
+
53
+ ### Added
54
+ - **`signature_for_spans(mode='set')`** — reorder-insensitive cluster signature. `A→B` and `B→A` collapse to one cluster. Dashboard `/clusters` page now has a mode toggle.
55
+ - **Pagination on `/api/clusters`** — `limit` + `offset` parameters; "Load more" button on the page.
56
+ - **Auto-cost** — `finish_span` now estimates and stores `cost_usd` automatically for any span with a known LLM model + token counts. `finish_trace` rolls up the per-span costs into the trace's `cost_usd`. No more manual `clustertrace backfill-cost`.
57
+ - **`@trace(sample=0.1)` + `@trace(skip=True)`** — production-grade sampling. `skip=True` returns the original function unwrapped (zero overhead). `sample` accepts a float in (0, 1] or reads `$CLUSTERTRACE_SAMPLE_RATE`. Sampling is bypassed inside an active trace so child spans are always recorded.
58
+ - **`clustertrace.flush()` + `atexit` hook** — orphan `running` traces from crashes/kills are cleaned up automatically on process exit.
59
+ - **`clustertrace cleanup --stale-after 5m`** — explicit CLI for the same.
60
+ - **`clustertrace vacuum --older-than 30d [--dry-run]`** — retention policy. `ON DELETE CASCADE` removes spans, tags, metrics; `VACUUM` reclaims disk space.
61
+ - **Per-tag failure-prefix mining** — `failure_summary(group_by_tag='agent')` returns a separate longest-common-prefix per tag value. When you have multiple agents in the same DB, the global prefix is empty but the per-tag ones are diagnostic. The `/clusters` page shows them as labelled chip rows.
62
+ - **Streaming-aware attrs** — when you call `messages.create(stream=True)` or `chat.completions.create(stream=True)`, the span's attrs include `streaming: true` so you can filter or cluster on it. (Full chunk-by-chunk capture is v0.5.)
63
+ - **Versioned JSON exports** — `clustertrace export` emits a header line with `clustertrace_version` and `export_format_version`. `clustertrace import` refuses streams from a newer format than it supports.
64
+
65
+ ### Fixed
66
+ - **OTel ingestion**: child-span errors now propagate to trace-level status. Previously OTel-ingested traces where a child errored but the root ended OK were silently classified as successful and excluded from the failure clusters.
67
+
68
+ ### Tests
69
+ - 94 tests (was 75). New suite `tests/test_v04_features.py` covers connection pooling, auto-cost, set-mode clustering, sampling, per-tag prefix, streaming attrs, flush + cleanup, vacuum + duration parsing, versioned export.
70
+ - 88% line coverage maintained. Pyright clean. Ruff clean.
71
+
72
+ ## [0.3.1] — 2026-05-20
73
+
74
+ ### Added
75
+ - `clustertrace demo` command — zero-config trial: imports 60 bundled traces and launches the dashboard. No API key needed.
76
+ - 60-trace `demo-traces.jsonl` bundled in the wheel (3 agent topologies, real failure clustering).
77
+ - Empty-state guidance on the dashboard when there are no traces: `clustertrace demo`, instrument-your-code snippet, and OpenTelemetry one-liner.
78
+ - `py.typed` marker so type checkers respect clustertrace's type hints.
79
+ - GitHub Actions CI on push and PR — Python 3.11/3.12/3.13 × ubuntu/macOS/windows.
80
+ - Issue and PR templates.
81
+ - `CHANGELOG.md`, `SECURITY.md`.
82
+
83
+ ### Documentation
84
+ - README hero ASCII cluster view + badges + comparison table + FAQ.
85
+ - Install-from-git instructions for the pre-PyPI window.
86
+
87
+ ## [0.3.0] — 2026-05-20
88
+
89
+ ### Added
90
+ - **OpenTelemetry exporter** (`clustertrace.otel.ClustertraceSpanExporter`) — any OTel-instrumented app pipes spans into clustertrace with one line. Maps `gen_ai.*` and `llm.*` attribute conventions.
91
+ - **Cost tracking** — pricing table for Anthropic, OpenAI, Gemini with date-suffix stripping. `$CLUSTERTRACE_PRICING_JSON` override. Cached per span + rolled up per trace.
92
+ - **Replay** — `clustertrace replay <id> --entry mod:fn` re-runs a trace with captured args. New trace tagged `replay_of=<original>`.
93
+ - **FTS5 search** across span name + input + output + error_message. Supports phrases, OR, NEAR. `/search` page.
94
+ - **`clustertrace.metric(name, value)`** — numeric metrics per trace, time-series sparkline charts on `/metrics`.
95
+ - **JSONL export/import** — `clustertrace export [--all]` and `clustertrace import`.
96
+ - **Self-contained HTML snapshots** — `clustertrace snapshot <id>` produces a single shareable file with no external assets.
97
+ - Schema migration v3: `trace_metrics` table, `cost_usd` columns, `spans_fts` virtual table with sync triggers.
98
+ - `ARCHITECTURE.md` (design choices and trade-offs), `CONTRIBUTING.md` (how to add an SDK wrapper).
99
+
100
+ ### Changed
101
+ - README leads with the differentiator + cost + OTel + replay.
102
+
103
+ ## [0.2.0] — 2026-05-20
104
+
105
+ ### Added
106
+ - **Trace clustering by structural signature** — RLE-collapsed sequence of `(name, status)` pairs. `/clusters` page.
107
+ - **Longest common failure prefix** mining across failed traces.
108
+ - `clustertrace.tag(key, value)` and `@trace(tags={...})` for filterable tags.
109
+ - `wrap_openai` for sync + async OpenAI clients.
110
+ - Filter UI on the recent-traces page (status, tag, name search) with live polling.
111
+ - Per-field payload truncation (`CLUSTERTRACE_MAX_PAYLOAD_BYTES`, default 32 KB).
112
+ - Real schema migration runner (`schema_meta.version` + ordered DDL list). v1 → v2 adds `trace_tags`, `traces.signature`.
113
+
114
+ ## [0.1.0] — 2026-05-20
115
+
116
+ ### Added
117
+ - Initial release.
118
+ - `@clustertrace.trace` decorator (sync + async), `clustertrace.span(...)`, `clustertrace.tool_call(...)`.
119
+ - `wrap_anthropic` for sync + async Anthropic clients (works with `AnthropicBedrock` and `AnthropicVertex` via the shared `.messages.create` interface).
120
+ - SQLite storage at `~/.clustertrace/traces.db`, `CLUSTERTRACE_DB` override, WAL mode.
121
+ - FastAPI dashboard on port 7777: recent traces, trace timeline, failure-cluster visualization (per-span error rate + step-of-failure histogram + force-directed call graph).
122
+ - 18 tests.