agentdelta 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentdelta-0.1.0/.aider.conf.yml +24 -0
- agentdelta-0.1.0/.claude/commands/add-adapter.md +32 -0
- agentdelta-0.1.0/.claude/commands/diff.md +21 -0
- agentdelta-0.1.0/.claude/commands/inspect.md +22 -0
- agentdelta-0.1.0/.claude/commands/record.md +41 -0
- agentdelta-0.1.0/.coderabbit.yaml +32 -0
- agentdelta-0.1.0/.continue/config.json +44 -0
- agentdelta-0.1.0/.cursor/rules/redline.mdc +43 -0
- agentdelta-0.1.0/.editorconfig +24 -0
- agentdelta-0.1.0/.github/CODEOWNERS +6 -0
- agentdelta-0.1.0/.github/FUNDING.yml +1 -0
- agentdelta-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +56 -0
- agentdelta-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +36 -0
- agentdelta-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +34 -0
- agentdelta-0.1.0/.github/copilot-instructions.md +47 -0
- agentdelta-0.1.0/.github/dependabot.yml +18 -0
- agentdelta-0.1.0/.github/labels.yml +98 -0
- agentdelta-0.1.0/.github/release-drafter.yml +58 -0
- agentdelta-0.1.0/.github/workflows/ci.yml +114 -0
- agentdelta-0.1.0/.github/workflows/codeql.yml +32 -0
- agentdelta-0.1.0/.github/workflows/docs.yml +53 -0
- agentdelta-0.1.0/.github/workflows/release-drafter.yml +24 -0
- agentdelta-0.1.0/.github/workflows/release.yml +111 -0
- agentdelta-0.1.0/.github/workflows/stale.yml +25 -0
- agentdelta-0.1.0/.gitignore +36 -0
- agentdelta-0.1.0/.pre-commit-config.yaml +24 -0
- agentdelta-0.1.0/.windsurfrules +25 -0
- agentdelta-0.1.0/AGENTS.md +47 -0
- agentdelta-0.1.0/ARCHITECTURE.md +109 -0
- agentdelta-0.1.0/CHANGELOG.md +29 -0
- agentdelta-0.1.0/CITATION.cff +20 -0
- agentdelta-0.1.0/CLAUDE.md +84 -0
- agentdelta-0.1.0/CODEX.md +75 -0
- agentdelta-0.1.0/CODE_OF_CONDUCT.md +40 -0
- agentdelta-0.1.0/CONTRIBUTING.md +89 -0
- agentdelta-0.1.0/LICENSE +21 -0
- agentdelta-0.1.0/Makefile +39 -0
- agentdelta-0.1.0/PKG-INFO +463 -0
- agentdelta-0.1.0/README.md +391 -0
- agentdelta-0.1.0/SECURITY.md +27 -0
- agentdelta-0.1.0/action.yml +81 -0
- agentdelta-0.1.0/assets/demo.tape +32 -0
- agentdelta-0.1.0/assets/hero.png +0 -0
- agentdelta-0.1.0/assets/logo.png +0 -0
- agentdelta-0.1.0/codecov.yml +19 -0
- agentdelta-0.1.0/docs/adding-adapter.md +163 -0
- agentdelta-0.1.0/docs/api-reference.md +59 -0
- agentdelta-0.1.0/docs/architecture.md +84 -0
- agentdelta-0.1.0/docs/cli-reference.md +81 -0
- agentdelta-0.1.0/docs/contributing.md +32 -0
- agentdelta-0.1.0/docs/github-action.md +89 -0
- agentdelta-0.1.0/docs/how-it-works.md +66 -0
- agentdelta-0.1.0/docs/index.md +78 -0
- agentdelta-0.1.0/docs/mcp.md +75 -0
- agentdelta-0.1.0/docs/openai.md +53 -0
- agentdelta-0.1.0/docs/quickstart.md +124 -0
- agentdelta-0.1.0/docs/trace-format.md +109 -0
- agentdelta-0.1.0/examples/demo.py +81 -0
- agentdelta-0.1.0/mkdocs.yml +98 -0
- agentdelta-0.1.0/openapi.yaml +211 -0
- agentdelta-0.1.0/pyproject.toml +124 -0
- agentdelta-0.1.0/smoke_test.py +515 -0
- agentdelta-0.1.0/src/agentdelta/__init__.py +22 -0
- agentdelta-0.1.0/src/agentdelta/api.py +131 -0
- agentdelta-0.1.0/src/agentdelta/cli.py +138 -0
- agentdelta-0.1.0/src/agentdelta/diff.py +191 -0
- agentdelta-0.1.0/src/agentdelta/embed.py +112 -0
- agentdelta-0.1.0/src/agentdelta/instrument.py +140 -0
- agentdelta-0.1.0/src/agentdelta/mcp_server.py +200 -0
- agentdelta-0.1.0/src/agentdelta/py.typed +0 -0
- agentdelta-0.1.0/src/agentdelta/report.py +216 -0
- agentdelta-0.1.0/src/agentdelta/trace.py +175 -0
- agentdelta-0.1.0/tests/__init__.py +0 -0
- agentdelta-0.1.0/tests/conftest.py +63 -0
- agentdelta-0.1.0/tests/test_cli.py +88 -0
- agentdelta-0.1.0/tests/test_diff.py +67 -0
- agentdelta-0.1.0/tests/test_embed.py +60 -0
- agentdelta-0.1.0/tests/test_instrument.py +84 -0
- agentdelta-0.1.0/tests/test_report.py +46 -0
- agentdelta-0.1.0/tests/test_trace.py +78 -0
- agentdelta-0.1.0/tools/openai-tools.json +72 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# aider configuration for agentdelta
|
|
2
|
+
# https://aider.chat/docs/config/aider_conf.html
|
|
3
|
+
|
|
4
|
+
# Always include these files for architectural context
|
|
5
|
+
read:
|
|
6
|
+
- ARCHITECTURE.md
|
|
7
|
+
- CLAUDE.md
|
|
8
|
+
- src/agentdelta/trace.py
|
|
9
|
+
- src/agentdelta/diff.py
|
|
10
|
+
|
|
11
|
+
# Auto-lint after edits
|
|
12
|
+
lint: true
|
|
13
|
+
lint-cmd: "ruff check {files} && ruff format --check {files}"
|
|
14
|
+
|
|
15
|
+
# Auto-test after edits
|
|
16
|
+
auto-test: false
|
|
17
|
+
test-cmd: "pytest tests/ -x -q"
|
|
18
|
+
|
|
19
|
+
# Model guidance
|
|
20
|
+
model: claude-sonnet-4-5
|
|
21
|
+
|
|
22
|
+
# Conventions
|
|
23
|
+
attribute-author: false
|
|
24
|
+
attribute-co-author: true
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Scaffold a new instrumentation adapter for a framework not yet supported by agentdelta.
|
|
2
|
+
|
|
3
|
+
Usage: /project:add-adapter <framework_name>
|
|
4
|
+
|
|
5
|
+
Framework name from $ARGUMENTS (e.g. "autogen", "crewai", "smolagents", "pydantic-ai").
|
|
6
|
+
|
|
7
|
+
Generate the following:
|
|
8
|
+
|
|
9
|
+
1. `src/agentdelta/instrument_<framework>.py` โ adapter module:
|
|
10
|
+
- A class `<Framework>AgentdeltaAdapter` that wraps the framework's event/callback API
|
|
11
|
+
- Must emit `TraceNode` objects to an `AgentTrace` for each: agent start, LLM call, tool call, tool return, agent end
|
|
12
|
+
- Use `NodeType.START`, `NodeType.LLM`, `NodeType.TOOL_CALL`, `NodeType.TOOL_RETURN`, `NodeType.END`
|
|
13
|
+
- Include a `record_<framework>()` context manager mirroring the LangChain `record()` API
|
|
14
|
+
|
|
15
|
+
2. Export from `src/agentdelta/__init__.py`:
|
|
16
|
+
- Add to `__all__` alphabetically
|
|
17
|
+
|
|
18
|
+
3. `tests/test_instrument_<framework>.py` โ at least 3 tests:
|
|
19
|
+
- Callback captures LLM output as a NodeType.LLM node
|
|
20
|
+
- Callback captures tool calls as NodeType.TOOL_CALL nodes
|
|
21
|
+
- record() context manager saves a valid JSONL file on exit
|
|
22
|
+
|
|
23
|
+
4. Update `README.md` Quick Start section with a snippet for the new framework
|
|
24
|
+
|
|
25
|
+
5. Update `pyproject.toml` optional dependencies:
|
|
26
|
+
- Add `[<framework>]` extra with the framework package pinned to `>=` minimum supported version
|
|
27
|
+
|
|
28
|
+
Context:
|
|
29
|
+
- See `src/agentdelta/instrument.py` for the LangChain reference implementation
|
|
30
|
+
- AgentTrace.add_node() and add_edge() are the only write methods needed
|
|
31
|
+
- Keep content fields under 2000 chars for LLM nodes, 500 chars for tool nodes
|
|
32
|
+
- The adapter does NOT need to import agentdelta internals other than trace.py
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Diff two agentdelta trace files and report any behavioral regression.
|
|
2
|
+
|
|
3
|
+
Usage: /project:diff <baseline.jsonl> <candidate.jsonl> [--format rich|json|markdown] [--exit-code]
|
|
4
|
+
|
|
5
|
+
Steps:
|
|
6
|
+
1. Run: `agentdelta diff $ARGUMENTS`
|
|
7
|
+
2. Parse the output:
|
|
8
|
+
- "REGRESSION DETECTED" โ a ForkPoint was found; show the fork step, tool change, and similarity score
|
|
9
|
+
- "No regression" โ traces are equivalent; show match percentage
|
|
10
|
+
3. If a fork is detected, explain in plain English what changed:
|
|
11
|
+
- Tool change (e.g. get_weather โ web_search): mention latency/reliability implications
|
|
12
|
+
- Reasoning divergence: mention that the LLM reasoning path changed even if the final answer is the same
|
|
13
|
+
- Step count difference: mention added/removed tool calls
|
|
14
|
+
4. Suggest a remediation if asked: tighten the system prompt, pin the model version, or add a unit test fixture for this trace pair.
|
|
15
|
+
|
|
16
|
+
Context:
|
|
17
|
+
- Traces are JSONL files โ one JSON object per line (trace_meta, node, edge records)
|
|
18
|
+
- Fork threshold default: 0.70 cosine similarity
|
|
19
|
+
- Match threshold default: 0.85 cosine similarity
|
|
20
|
+
- The embedding model is all-MiniLM-L6-v2, runs locally, no API key needed
|
|
21
|
+
- `has_regression` is True iff fork_point is not None
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Inspect an agentdelta trace file and summarize the agent's execution path.
|
|
2
|
+
|
|
3
|
+
Usage: /project:inspect <trace.jsonl>
|
|
4
|
+
|
|
5
|
+
Steps:
|
|
6
|
+
1. Run: `agentdelta inspect $ARGUMENTS`
|
|
7
|
+
2. Present a clean summary:
|
|
8
|
+
- Total steps and run_id
|
|
9
|
+
- Sequence of node types (start โ llm โ tool_call โ tool_return โ ... โ end)
|
|
10
|
+
- Each tool call: name and truncated arguments
|
|
11
|
+
- Each LLM step: first 100 chars of reasoning
|
|
12
|
+
- Any metadata fields present
|
|
13
|
+
3. Highlight anything unusual:
|
|
14
|
+
- More than 10 steps (complex agent run)
|
|
15
|
+
- Repeated tool calls (possible loop)
|
|
16
|
+
- Missing start or end node
|
|
17
|
+
- Very short LLM reasoning (< 20 chars โ may be truncated or empty)
|
|
18
|
+
|
|
19
|
+
Context:
|
|
20
|
+
- Trace format: each line is JSON with "type": "node"|"edge"|"trace_meta"
|
|
21
|
+
- Node types: start, llm, tool_call, tool_return, end
|
|
22
|
+
- Node IDs are content-addressed (SHA-256[:16]) โ same content = same ID across runs
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
Generate boilerplate code to record an agent run with agentdelta.
|
|
2
|
+
|
|
3
|
+
Usage: /project:record [framework] [agent_variable_name]
|
|
4
|
+
|
|
5
|
+
Arguments: $ARGUMENTS (optional: framework name like "langchain", "langgraph", "custom")
|
|
6
|
+
|
|
7
|
+
Generate a self-contained Python snippet that:
|
|
8
|
+
|
|
9
|
+
For LangChain/LangGraph (default):
|
|
10
|
+
```python
|
|
11
|
+
from agentdelta import record
|
|
12
|
+
|
|
13
|
+
# Baseline (before your change)
|
|
14
|
+
with record("baseline.jsonl", run_id="v1.0") as cb:
|
|
15
|
+
agent.invoke({"input": "..."}, config={"callbacks": [cb]})
|
|
16
|
+
|
|
17
|
+
# Candidate (after your change)
|
|
18
|
+
with record("candidate.jsonl", run_id="v1.1") as cb:
|
|
19
|
+
agent.invoke({"input": "..."}, config={"callbacks": [cb]})
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
For custom/framework-agnostic:
|
|
23
|
+
```python
|
|
24
|
+
from agentdelta import AgentTrace
|
|
25
|
+
from agentdelta.trace import TraceNode, TraceEdge, NodeType, EdgeType
|
|
26
|
+
|
|
27
|
+
trace = AgentTrace(run_id="my_run")
|
|
28
|
+
trace.add_node(TraceNode(step=1, node_type=NodeType.START, content="user input here"))
|
|
29
|
+
trace.add_node(TraceNode(step=2, node_type=NodeType.LLM, content="reasoning text here"))
|
|
30
|
+
trace.add_node(TraceNode(step=3, node_type=NodeType.TOOL_CALL, content="tool_name(args)"))
|
|
31
|
+
trace.add_node(TraceNode(step=4, node_type=NodeType.TOOL_RETURN, content="tool result"))
|
|
32
|
+
trace.add_node(TraceNode(step=5, node_type=NodeType.END, content="final output"))
|
|
33
|
+
trace.save("my_run.jsonl")
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Then show the diff command: `agentdelta diff baseline.jsonl candidate.jsonl`
|
|
37
|
+
|
|
38
|
+
Context:
|
|
39
|
+
- AgentdeltaCallback is LangChain BaseCallbackHandler-compatible (no import of BaseCallbackHandler needed)
|
|
40
|
+
- record() saves the trace on context exit, even if the agent raises an exception
|
|
41
|
+
- run_id appears in the diff report header โ use something meaningful (version, git hash, etc.)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
language: en
|
|
2
|
+
early_access: false
|
|
3
|
+
reviews:
|
|
4
|
+
profile: chill
|
|
5
|
+
request_changes_workflow: false
|
|
6
|
+
high_level_summary: true
|
|
7
|
+
poem: false
|
|
8
|
+
review_status: true
|
|
9
|
+
collapse_walkthrough: false
|
|
10
|
+
auto_review:
|
|
11
|
+
enabled: true
|
|
12
|
+
drafts: false
|
|
13
|
+
path_filters:
|
|
14
|
+
- "!assets/**"
|
|
15
|
+
- "!*.md"
|
|
16
|
+
path_instructions:
|
|
17
|
+
- path: "src/agentdelta/diff.py"
|
|
18
|
+
instructions: |
|
|
19
|
+
This is the core fork-detection algorithm. Be strict about:
|
|
20
|
+
- Threshold defaults must not change without a migration note
|
|
21
|
+
- New status values must be added to the literal type in StepDiff
|
|
22
|
+
- Fork detection must remain O(n) per alignment pair
|
|
23
|
+
- path: "src/agentdelta/embed.py"
|
|
24
|
+
instructions: |
|
|
25
|
+
Thread safety: _get_model() uses double-checked locking. Any change
|
|
26
|
+
to the singleton pattern must preserve thread safety.
|
|
27
|
+
- path: "tests/**"
|
|
28
|
+
instructions: |
|
|
29
|
+
Ensure new tests use fixtures from conftest.py rather than
|
|
30
|
+
building traces inline. CLI tests must use click.testing.CliRunner.
|
|
31
|
+
chat:
|
|
32
|
+
auto_reply: true
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "agentdelta",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"schema": "v1",
|
|
5
|
+
"context": [
|
|
6
|
+
{
|
|
7
|
+
"provider": "file",
|
|
8
|
+
"params": {
|
|
9
|
+
"nRetrieve": 20,
|
|
10
|
+
"nFinal": 5
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"provider": "code"
|
|
15
|
+
}
|
|
16
|
+
],
|
|
17
|
+
"docs": [
|
|
18
|
+
{
|
|
19
|
+
"title": "agentdelta Architecture",
|
|
20
|
+
"startUrl": "file://./ARCHITECTURE.md",
|
|
21
|
+
"faviconUrl": ""
|
|
22
|
+
}
|
|
23
|
+
],
|
|
24
|
+
"customCommands": [
|
|
25
|
+
{
|
|
26
|
+
"name": "diff",
|
|
27
|
+
"prompt": "Run agentdelta diff on the two trace files I specify. Explain the fork point in plain English, whether it represents a behavioral regression, and what likely caused it (prompt change, model upgrade, tool order change). Files: {input}",
|
|
28
|
+
"description": "Diff two agentdelta trace files and explain any fork"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"name": "add-adapter",
|
|
32
|
+
"prompt": "Scaffold a new agentdelta instrumentation adapter for the framework: {input}. Follow the pattern in src/agentdelta/instrument.py. Create the adapter file, export it from __init__.py, and write at least 3 tests.",
|
|
33
|
+
"description": "Scaffold a new framework instrumentation adapter"
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"rules": [
|
|
37
|
+
"This is agentdelta โ a semantic diff engine for AI agent traces.",
|
|
38
|
+
"All embeddings must go through _get_model() in embed.py. Never import SentenceTransformer directly.",
|
|
39
|
+
"TraceNode.id is content-addressed (SHA-256). Never assign it manually.",
|
|
40
|
+
"Library code must not use print() โ use rich.console.Console.",
|
|
41
|
+
"All public API must have type annotations and docstrings.",
|
|
42
|
+
"fork_threshold=0.70 and match_threshold=0.85 are the algorithmic defaults โ don't change without tests."
|
|
43
|
+
]
|
|
44
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: redline coding rules and architecture context
|
|
3
|
+
globs: ["src/**/*.py", "tests/**/*.py"]
|
|
4
|
+
alwaysApply: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# redline โ Cursor Rules
|
|
8
|
+
|
|
9
|
+
## What this project does
|
|
10
|
+
Semantic diff engine for AI agent behavior. Records agent traces as JSONL, embeds steps with `all-MiniLM-L6-v2`, aligns two traces by cosine similarity, detects the first semantic fork point.
|
|
11
|
+
|
|
12
|
+
## Module map
|
|
13
|
+
- `trace.py` โ data model (TraceNode, TraceEdge, AgentTrace, NodeType, EdgeType)
|
|
14
|
+
- `embed.py` โ thread-safe sentence-transformer singleton + sliding-window alignment
|
|
15
|
+
- `diff.py` โ diff_traces() โ DiffResult, ForkPoint, StepDiff
|
|
16
|
+
- `instrument.py` โ LangChain callback + record() context manager
|
|
17
|
+
- `report.py` โ Rich/JSON/Markdown formatters
|
|
18
|
+
- `cli.py` โ Click CLI (diff, inspect commands)
|
|
19
|
+
|
|
20
|
+
## Invariants โ never break these
|
|
21
|
+
- TraceNode.id is SHA-256[:16] of "{node_type}:{content}" โ must stay content-addressed
|
|
22
|
+
- embed_trace() is idempotent โ safe to call twice on the same trace
|
|
23
|
+
- fork_threshold=0.70 and match_threshold=0.85 are the defaults; don't change without tests
|
|
24
|
+
- has_regression is True iff fork_point is not None โ no other state
|
|
25
|
+
- All embeddings come from the thread-safe _get_model() singleton in embed.py
|
|
26
|
+
|
|
27
|
+
## Code style
|
|
28
|
+
- Python 3.10+, fully type-annotated, mypy strict mode
|
|
29
|
+
- Ruff lint rules: E W F I UP B S N SIM RUF PT
|
|
30
|
+
- No print() in library code โ use rich.console.Console
|
|
31
|
+
- All public functions and classes must have Google-style docstrings
|
|
32
|
+
- Tests: pytest, CliRunner for CLI tests, no mocking of embed model (use short test traces)
|
|
33
|
+
|
|
34
|
+
## When adding a new output format
|
|
35
|
+
1. Add `to_<format>(result: DiffResult) -> str` in report.py
|
|
36
|
+
2. Add format name to --format Choice in cli.py
|
|
37
|
+
3. Add at least 2 tests in tests/test_report.py
|
|
38
|
+
|
|
39
|
+
## When adding a new instrumentation adapter
|
|
40
|
+
1. Create src/redline/instrument_<framework>.py
|
|
41
|
+
2. Implement a context manager mirroring record() in instrument.py
|
|
42
|
+
3. Export from __init__.py, add to __all__ alphabetically
|
|
43
|
+
4. Add tests in tests/test_instrument_<framework>.py
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
root = true
|
|
2
|
+
|
|
3
|
+
[*]
|
|
4
|
+
indent_style = space
|
|
5
|
+
indent_size = 4
|
|
6
|
+
end_of_line = lf
|
|
7
|
+
charset = utf-8
|
|
8
|
+
trim_trailing_whitespace = true
|
|
9
|
+
insert_final_newline = true
|
|
10
|
+
|
|
11
|
+
[*.yml]
|
|
12
|
+
indent_size = 2
|
|
13
|
+
|
|
14
|
+
[*.yaml]
|
|
15
|
+
indent_size = 2
|
|
16
|
+
|
|
17
|
+
[*.toml]
|
|
18
|
+
indent_size = 4
|
|
19
|
+
|
|
20
|
+
[*.md]
|
|
21
|
+
trim_trailing_whitespace = false
|
|
22
|
+
|
|
23
|
+
[Makefile]
|
|
24
|
+
indent_style = tab
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
github: sandeep-alluru
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: Bug Report
|
|
2
|
+
description: Something is broken
|
|
3
|
+
labels: ["bug"]
|
|
4
|
+
body:
|
|
5
|
+
- type: markdown
|
|
6
|
+
attributes:
|
|
7
|
+
value: |
|
|
8
|
+
Thanks for taking the time to report a bug. Please fill in as much detail as you can.
|
|
9
|
+
|
|
10
|
+
- type: input
|
|
11
|
+
id: version
|
|
12
|
+
attributes:
|
|
13
|
+
label: agentdelta version
|
|
14
|
+
placeholder: "0.1.0"
|
|
15
|
+
validations:
|
|
16
|
+
required: true
|
|
17
|
+
|
|
18
|
+
- type: input
|
|
19
|
+
id: python
|
|
20
|
+
attributes:
|
|
21
|
+
label: Python version
|
|
22
|
+
placeholder: "3.11.5"
|
|
23
|
+
validations:
|
|
24
|
+
required: true
|
|
25
|
+
|
|
26
|
+
- type: textarea
|
|
27
|
+
id: command
|
|
28
|
+
attributes:
|
|
29
|
+
label: Command run
|
|
30
|
+
description: The exact command or Python code that triggered the bug
|
|
31
|
+
render: bash
|
|
32
|
+
validations:
|
|
33
|
+
required: true
|
|
34
|
+
|
|
35
|
+
- type: textarea
|
|
36
|
+
id: expected
|
|
37
|
+
attributes:
|
|
38
|
+
label: Expected behavior
|
|
39
|
+
validations:
|
|
40
|
+
required: true
|
|
41
|
+
|
|
42
|
+
- type: textarea
|
|
43
|
+
id: actual
|
|
44
|
+
attributes:
|
|
45
|
+
label: Actual behavior
|
|
46
|
+
description: Include the full error message and traceback if applicable
|
|
47
|
+
render: text
|
|
48
|
+
validations:
|
|
49
|
+
required: true
|
|
50
|
+
|
|
51
|
+
- type: textarea
|
|
52
|
+
id: trace_format
|
|
53
|
+
attributes:
|
|
54
|
+
label: Trace file snippet (optional)
|
|
55
|
+
description: A minimal .jsonl trace that reproduces the issue (redact any sensitive content)
|
|
56
|
+
render: json
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Feature Request
|
|
2
|
+
description: Propose a new feature or improvement
|
|
3
|
+
labels: ["enhancement"]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: problem
|
|
7
|
+
attributes:
|
|
8
|
+
label: Problem
|
|
9
|
+
description: What problem does this solve? Who experiences it?
|
|
10
|
+
validations:
|
|
11
|
+
required: true
|
|
12
|
+
|
|
13
|
+
- type: textarea
|
|
14
|
+
id: solution
|
|
15
|
+
attributes:
|
|
16
|
+
label: Proposed solution
|
|
17
|
+
description: What would you like to happen?
|
|
18
|
+
validations:
|
|
19
|
+
required: true
|
|
20
|
+
|
|
21
|
+
- type: textarea
|
|
22
|
+
id: alternatives
|
|
23
|
+
attributes:
|
|
24
|
+
label: Alternatives considered
|
|
25
|
+
description: Other approaches you've thought about
|
|
26
|
+
|
|
27
|
+
- type: dropdown
|
|
28
|
+
id: contribution
|
|
29
|
+
attributes:
|
|
30
|
+
label: Are you willing to implement this?
|
|
31
|
+
options:
|
|
32
|
+
- "Yes โ I'll open a PR"
|
|
33
|
+
- "Maybe โ if someone helps"
|
|
34
|
+
- "No โ just requesting"
|
|
35
|
+
validations:
|
|
36
|
+
required: true
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
## What changed
|
|
2
|
+
|
|
3
|
+
<!-- One paragraph summary of what this PR does -->
|
|
4
|
+
|
|
5
|
+
## Why
|
|
6
|
+
|
|
7
|
+
<!-- Motivation: bug it fixes, feature it adds, or issue it closes -->
|
|
8
|
+
Closes #
|
|
9
|
+
|
|
10
|
+
## Type of change
|
|
11
|
+
|
|
12
|
+
- [ ] `fix:` Bug fix
|
|
13
|
+
- [ ] `feat:` New feature
|
|
14
|
+
- [ ] `refactor:` Refactor (no behavior change)
|
|
15
|
+
- [ ] `docs:` Documentation only
|
|
16
|
+
- [ ] `test:` Tests only
|
|
17
|
+
- [ ] `chore:` Dependency bump, CI config, etc.
|
|
18
|
+
- [ ] `perf:` Performance improvement
|
|
19
|
+
|
|
20
|
+
## Testing done
|
|
21
|
+
|
|
22
|
+
<!-- How did you verify this works? Which test files cover it? -->
|
|
23
|
+
|
|
24
|
+
- [ ] `make test` passes locally
|
|
25
|
+
- [ ] `make lint` passes locally
|
|
26
|
+
- [ ] `make typecheck` passes locally
|
|
27
|
+
- [ ] New tests added for changed behavior
|
|
28
|
+
|
|
29
|
+
## Checklist
|
|
30
|
+
|
|
31
|
+
- [ ] `CHANGELOG.md` updated under `[Unreleased]`
|
|
32
|
+
- [ ] Public API changes have docstrings
|
|
33
|
+
- [ ] No secrets, API keys, or trace data committed
|
|
34
|
+
- [ ] README updated if CLI flags or behavior changed
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# GitHub Copilot Instructions โ agentdelta
|
|
2
|
+
|
|
3
|
+
agentdelta is a semantic diff engine for AI agent behavior. It records agent execution traces as JSONL, embeds each step with `all-MiniLM-L6-v2`, and detects the first semantic fork point between two runs.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
| Module | Purpose |
|
|
8
|
+
|---|---|
|
|
9
|
+
| `trace.py` | Data model: `NodeType`, `EdgeType`, `TraceNode`, `TraceEdge`, `AgentTrace` |
|
|
10
|
+
| `embed.py` | Sentence-transformer singleton + `align_traces()` sliding-window alignment |
|
|
11
|
+
| `diff.py` | `diff_traces()` โ `DiffResult`, `ForkPoint`, `StepDiff` |
|
|
12
|
+
| `instrument.py` | `AgentdeltaCallback` (LangChain) + `record()` context manager |
|
|
13
|
+
| `report.py` | `print_diff()`, `to_json()`, `to_markdown()` formatters |
|
|
14
|
+
| `cli.py` | Click CLI: `agentdelta diff` and `agentdelta inspect` |
|
|
15
|
+
|
|
16
|
+
## Key invariants
|
|
17
|
+
|
|
18
|
+
- `TraceNode.id` is SHA-256[:16] of `"{node_type}:{content}"` โ content-addressed, deterministic
|
|
19
|
+
- `embed_trace()` mutates nodes in-place; idempotent
|
|
20
|
+
- `fork_threshold=0.70` โ cosine similarity below this โ ForkPoint
|
|
21
|
+
- `match_threshold=0.85` โ cosine similarity above this โ "match" (no change)
|
|
22
|
+
- `has_regression` is `True` iff `fork_point is not None`
|
|
23
|
+
- Node content is truncated: LLM โค2000 chars, tool โค500 chars
|
|
24
|
+
|
|
25
|
+
## Code style
|
|
26
|
+
|
|
27
|
+
- Python 3.10+, type-annotated, mypy strict
|
|
28
|
+
- Ruff rules: E, W, F, I, UP, B, S, N, SIM, RUF, PT; ignore S101 (assert in tests), N806
|
|
29
|
+
- No `print()` in library code โ use `rich.console.Console`
|
|
30
|
+
- All public classes and functions must have docstrings
|
|
31
|
+
- Tests use `pytest`; CLI tests use `click.testing.CliRunner`
|
|
32
|
+
|
|
33
|
+
## Trace JSONL format
|
|
34
|
+
|
|
35
|
+
```jsonl
|
|
36
|
+
{"type": "trace_meta", "run_id": "v1.0"}
|
|
37
|
+
{"type": "node", "id": "...", "step": 1, "node_type": "start", "content": "...", "metadata": {}}
|
|
38
|
+
{"type": "edge", "source_step": 1, "target_step": 2, "edge_type": "sequence", "label": ""}
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Adding a new output format
|
|
42
|
+
|
|
43
|
+
Add `to_<format>(result: DiffResult) -> str` to `report.py`, add to `--format` choices in `cli.py`, add tests.
|
|
44
|
+
|
|
45
|
+
## Adding a new instrumentation adapter
|
|
46
|
+
|
|
47
|
+
Create `src/agentdelta/instrument_<framework>.py`. See `instrument.py` (LangChain) as reference. Export from `__init__.py`. Add tests.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: pip
|
|
4
|
+
directory: /
|
|
5
|
+
schedule:
|
|
6
|
+
interval: weekly
|
|
7
|
+
open-pull-requests-limit: 5
|
|
8
|
+
labels:
|
|
9
|
+
- dependencies
|
|
10
|
+
|
|
11
|
+
- package-ecosystem: github-actions
|
|
12
|
+
directory: /
|
|
13
|
+
schedule:
|
|
14
|
+
interval: weekly
|
|
15
|
+
open-pull-requests-limit: 5
|
|
16
|
+
labels:
|
|
17
|
+
- dependencies
|
|
18
|
+
- ci
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Bug reports
|
|
2
|
+
- name: bug
|
|
3
|
+
color: "d73a4a"
|
|
4
|
+
description: "Something isn't working"
|
|
5
|
+
|
|
6
|
+
- name: fix
|
|
7
|
+
color: "d73a4a"
|
|
8
|
+
description: "A bug fix"
|
|
9
|
+
|
|
10
|
+
# Features
|
|
11
|
+
- name: enhancement
|
|
12
|
+
color: "a2eeef"
|
|
13
|
+
description: "New feature or request"
|
|
14
|
+
|
|
15
|
+
- name: feature
|
|
16
|
+
color: "a2eeef"
|
|
17
|
+
description: "New feature"
|
|
18
|
+
|
|
19
|
+
- name: feat
|
|
20
|
+
color: "a2eeef"
|
|
21
|
+
description: "New feature (conventional commit)"
|
|
22
|
+
|
|
23
|
+
# Documentation
|
|
24
|
+
- name: documentation
|
|
25
|
+
color: "0075ca"
|
|
26
|
+
description: "Improvements or additions to documentation"
|
|
27
|
+
|
|
28
|
+
- name: docs
|
|
29
|
+
color: "0075ca"
|
|
30
|
+
description: "Documentation update"
|
|
31
|
+
|
|
32
|
+
# Maintenance
|
|
33
|
+
- name: chore
|
|
34
|
+
color: "e4e669"
|
|
35
|
+
description: "Build process, dependencies, or tooling"
|
|
36
|
+
|
|
37
|
+
- name: dependencies
|
|
38
|
+
color: "0366d6"
|
|
39
|
+
description: "Pull requests that update a dependency"
|
|
40
|
+
|
|
41
|
+
- name: ci
|
|
42
|
+
color: "e4e669"
|
|
43
|
+
description: "CI/CD changes"
|
|
44
|
+
|
|
45
|
+
- name: refactor
|
|
46
|
+
color: "fef2c0"
|
|
47
|
+
description: "Code refactoring without behavior change"
|
|
48
|
+
|
|
49
|
+
# Tests
|
|
50
|
+
- name: test
|
|
51
|
+
color: "bfd4f2"
|
|
52
|
+
description: "Adding missing tests or correcting existing tests"
|
|
53
|
+
|
|
54
|
+
# Version bumps
|
|
55
|
+
- name: major
|
|
56
|
+
color: "ee0701"
|
|
57
|
+
description: "Breaking change โ bumps major version"
|
|
58
|
+
|
|
59
|
+
- name: minor
|
|
60
|
+
color: "fbca04"
|
|
61
|
+
description: "Non-breaking new feature โ bumps minor version"
|
|
62
|
+
|
|
63
|
+
- name: patch
|
|
64
|
+
color: "0075ca"
|
|
65
|
+
description: "Backwards compatible bug fix โ bumps patch version"
|
|
66
|
+
|
|
67
|
+
# Meta
|
|
68
|
+
- name: good first issue
|
|
69
|
+
color: "7057ff"
|
|
70
|
+
description: "Good for newcomers"
|
|
71
|
+
|
|
72
|
+
- name: help wanted
|
|
73
|
+
color: "008672"
|
|
74
|
+
description: "Extra attention is needed"
|
|
75
|
+
|
|
76
|
+
- name: security
|
|
77
|
+
color: "ee0701"
|
|
78
|
+
description: "Security vulnerability"
|
|
79
|
+
|
|
80
|
+
- name: pinned
|
|
81
|
+
color: "e11d48"
|
|
82
|
+
description: "Pinned issue โ do not close"
|
|
83
|
+
|
|
84
|
+
- name: duplicate
|
|
85
|
+
color: "cfd3d7"
|
|
86
|
+
description: "This issue or pull request already exists"
|
|
87
|
+
|
|
88
|
+
- name: invalid
|
|
89
|
+
color: "e4e4e7"
|
|
90
|
+
description: "This doesn't seem right"
|
|
91
|
+
|
|
92
|
+
- name: wontfix
|
|
93
|
+
color: "e4e4e7"
|
|
94
|
+
description: "This will not be worked on"
|
|
95
|
+
|
|
96
|
+
- name: question
|
|
97
|
+
color: "d876e3"
|
|
98
|
+
description: "Further information is requested"
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
name-template: "v$RESOLVED_VERSION"
|
|
2
|
+
tag-template: "v$RESOLVED_VERSION"
|
|
3
|
+
categories:
|
|
4
|
+
- title: "๐ New Features"
|
|
5
|
+
labels:
|
|
6
|
+
- "feature"
|
|
7
|
+
- "enhancement"
|
|
8
|
+
- "feat"
|
|
9
|
+
- title: "๐ Bug Fixes"
|
|
10
|
+
labels:
|
|
11
|
+
- "fix"
|
|
12
|
+
- "bugfix"
|
|
13
|
+
- "bug"
|
|
14
|
+
- title: "๐งฐ Maintenance"
|
|
15
|
+
labels:
|
|
16
|
+
- "chore"
|
|
17
|
+
- "dependencies"
|
|
18
|
+
- "ci"
|
|
19
|
+
- "refactor"
|
|
20
|
+
- title: "๐ Documentation"
|
|
21
|
+
labels:
|
|
22
|
+
- "documentation"
|
|
23
|
+
- "docs"
|
|
24
|
+
- title: "๐งช Tests"
|
|
25
|
+
labels:
|
|
26
|
+
- "test"
|
|
27
|
+
- "tests"
|
|
28
|
+
change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
|
|
29
|
+
change-prefix: ""
|
|
30
|
+
no-changes-template: "No changes."
|
|
31
|
+
version-resolver:
|
|
32
|
+
major:
|
|
33
|
+
labels:
|
|
34
|
+
- "major"
|
|
35
|
+
- "breaking"
|
|
36
|
+
minor:
|
|
37
|
+
labels:
|
|
38
|
+
- "minor"
|
|
39
|
+
- "feature"
|
|
40
|
+
- "enhancement"
|
|
41
|
+
- "feat"
|
|
42
|
+
patch:
|
|
43
|
+
labels:
|
|
44
|
+
- "patch"
|
|
45
|
+
- "fix"
|
|
46
|
+
- "bugfix"
|
|
47
|
+
- "documentation"
|
|
48
|
+
- "chore"
|
|
49
|
+
- "dependencies"
|
|
50
|
+
- "ci"
|
|
51
|
+
- "refactor"
|
|
52
|
+
default: patch
|
|
53
|
+
template: |
|
|
54
|
+
## What's Changed
|
|
55
|
+
|
|
56
|
+
$CHANGES
|
|
57
|
+
|
|
58
|
+
**Full Changelog**: https://github.com/sandeep-alluru/agentdelta/compare/$PREVIOUS_TAG...v$RESOLVED_VERSION
|