agenticlens 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenticlens-0.1.0/.github/workflows/ci.yml +38 -0
- agenticlens-0.1.0/.gitignore +14 -0
- agenticlens-0.1.0/AgenticLens_Spec.md +463 -0
- agenticlens-0.1.0/LICENSE +21 -0
- agenticlens-0.1.0/PKG-INFO +86 -0
- agenticlens-0.1.0/README.md +33 -0
- agenticlens-0.1.0/ROADMAP.md +30 -0
- agenticlens-0.1.0/docs/index.md +14 -0
- agenticlens-0.1.0/examples/basic_usage.py +34 -0
- agenticlens-0.1.0/examples/recommendations_demo.py +89 -0
- agenticlens-0.1.0/mkdocs.yml +9 -0
- agenticlens-0.1.0/pyproject.toml +82 -0
- agenticlens-0.1.0/src/agenticlens/__init__.py +5 -0
- agenticlens-0.1.0/src/agenticlens/analyzers/__init__.py +3 -0
- agenticlens-0.1.0/src/agenticlens/analyzers/base.py +17 -0
- agenticlens-0.1.0/src/agenticlens/cli/__init__.py +0 -0
- agenticlens-0.1.0/src/agenticlens/cli/main.py +92 -0
- agenticlens-0.1.0/src/agenticlens/cli/render.py +72 -0
- agenticlens-0.1.0/src/agenticlens/config/__init__.py +17 -0
- agenticlens-0.1.0/src/agenticlens/config/pricing.py +59 -0
- agenticlens-0.1.0/src/agenticlens/config/pricing.yaml +24 -0
- agenticlens-0.1.0/src/agenticlens/config/settings.py +42 -0
- agenticlens-0.1.0/src/agenticlens/exporters/__init__.py +5 -0
- agenticlens-0.1.0/src/agenticlens/exporters/base.py +12 -0
- agenticlens-0.1.0/src/agenticlens/exporters/csv_exporter.py +44 -0
- agenticlens-0.1.0/src/agenticlens/exporters/json_exporter.py +9 -0
- agenticlens-0.1.0/src/agenticlens/metrics/__init__.py +3 -0
- agenticlens-0.1.0/src/agenticlens/metrics/calculator.py +20 -0
- agenticlens-0.1.0/src/agenticlens/models/__init__.py +14 -0
- agenticlens-0.1.0/src/agenticlens/models/enums.py +16 -0
- agenticlens-0.1.0/src/agenticlens/models/metrics.py +25 -0
- agenticlens-0.1.0/src/agenticlens/models/recommendation.py +13 -0
- agenticlens-0.1.0/src/agenticlens/models/step.py +25 -0
- agenticlens-0.1.0/src/agenticlens/models/workflow.py +33 -0
- agenticlens-0.1.0/src/agenticlens/profiler/__init__.py +4 -0
- agenticlens-0.1.0/src/agenticlens/profiler/context.py +23 -0
- agenticlens-0.1.0/src/agenticlens/profiler/profile.py +46 -0
- agenticlens-0.1.0/src/agenticlens/profiler/step.py +76 -0
- agenticlens-0.1.0/src/agenticlens/providers/__init__.py +13 -0
- agenticlens-0.1.0/src/agenticlens/providers/anthropic.py +19 -0
- agenticlens-0.1.0/src/agenticlens/providers/base.py +31 -0
- agenticlens-0.1.0/src/agenticlens/providers/openai.py +19 -0
- agenticlens-0.1.0/src/agenticlens/providers/registry.py +24 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/__init__.py +15 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/base.py +13 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/duplicate_tool_calls.py +48 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/engine.py +41 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/excessive_chunks.py +42 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/long_history.py +37 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/repeated_prompt.py +50 -0
- agenticlens-0.1.0/src/agenticlens/recommenders/utils.py +11 -0
- agenticlens-0.1.0/src/agenticlens/utils/__init__.py +0 -0
- agenticlens-0.1.0/tests/test_cli.py +137 -0
- agenticlens-0.1.0/tests/test_exporters.py +42 -0
- agenticlens-0.1.0/tests/test_models.py +48 -0
- agenticlens-0.1.0/tests/test_pricing.py +24 -0
- agenticlens-0.1.0/tests/test_profiler.py +86 -0
- agenticlens-0.1.0/tests/test_providers.py +46 -0
- agenticlens-0.1.0/tests/test_recommender_rules.py +155 -0
- agenticlens-0.1.0/tests/test_recommenders.py +36 -0
- agenticlens-0.1.0/uv.lock +1292 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v3
|
|
19
|
+
with:
|
|
20
|
+
enable-cache: true
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
run: uv python install ${{ matrix.python-version }}
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: uv sync --extra dev --python ${{ matrix.python-version }}
|
|
27
|
+
|
|
28
|
+
- name: Lint (ruff check)
|
|
29
|
+
run: uv run ruff check .
|
|
30
|
+
|
|
31
|
+
- name: Format check (ruff format)
|
|
32
|
+
run: uv run ruff format --check .
|
|
33
|
+
|
|
34
|
+
- name: Type check (mypy)
|
|
35
|
+
run: uv run mypy
|
|
36
|
+
|
|
37
|
+
- name: Test (pytest)
|
|
38
|
+
run: uv run pytest
|
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
# AgenticLens — Project Master Specification (MVP)
|
|
2
|
+
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
**AgenticLens** is an open-source Python library that helps developers **profile, analyze, and optimize token consumption** in LLM-powered applications and agentic workflows.
|
|
8
|
+
|
|
9
|
+
Unlike traditional observability tools that only display token usage and cost, AgenticLens explains:
|
|
10
|
+
|
|
11
|
+
- **Where** tokens were consumed
|
|
12
|
+
- **Why** they were consumed
|
|
13
|
+
- **Which** workflow step was responsible
|
|
14
|
+
- **Which** tool calls were expensive
|
|
15
|
+
- **How** developers can reduce token usage and cost
|
|
16
|
+
|
|
17
|
+
> **Design principle:** Framework-agnostic and provider-agnostic.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Vision
|
|
22
|
+
|
|
23
|
+
Modern AI applications consist of multiple LLM calls, planning agents, memory systems, retrieval pipelines, tool calls, MCP servers, and multi-agent workflows.
|
|
24
|
+
|
|
25
|
+
Current observability tools show traces and token counts but provide **limited guidance on optimization**.
|
|
26
|
+
|
|
27
|
+
AgenticLens should become the **"performance profiler" for AI applications**, similar to:
|
|
28
|
+
|
|
29
|
+
| Analogy | Domain |
|
|
30
|
+
|---|---|
|
|
31
|
+
| `cProfile` | Python runtime |
|
|
32
|
+
| Chrome DevTools | Browser performance |
|
|
33
|
+
| TensorBoard | ML training |
|
|
34
|
+
|
|
35
|
+
### The library should ultimately answer three questions
|
|
36
|
+
|
|
37
|
+
1. Where did the tokens go?
|
|
38
|
+
2. Why were they spent?
|
|
39
|
+
3. How can developers reduce them?
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Initial Scope (MVP)
|
|
44
|
+
|
|
45
|
+
The first version focuses on **profiling and reporting**.
|
|
46
|
+
|
|
47
|
+
> **Out of scope for MVP:** dashboards, databases, web applications, enterprise features.
|
|
48
|
+
|
|
49
|
+
Keep the MVP lightweight and suitable for publishing as a PyPI package.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Primary Features
|
|
54
|
+
|
|
55
|
+
### 1. Workflow Profiler
|
|
56
|
+
|
|
57
|
+
Profile an entire workflow with a single context manager:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from agenticlens import profile
|
|
61
|
+
|
|
62
|
+
with profile("Customer Support"):
|
|
63
|
+
agent.run(question)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
The profiler establishes the workflow context (start/end time, id) but does **not** auto-capture LLM calls — see [Instrumentation Model](#instrumentation-model) below.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
### 2. Step Profiling
|
|
71
|
+
|
|
72
|
+
Every workflow consists of multiple steps. Each step should have independent metrics, and is wrapped **explicitly** by the developer:
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from agenticlens import profile, step
|
|
76
|
+
|
|
77
|
+
with profile("Customer Support"):
|
|
78
|
+
with step("Planner", type="planner") as s:
|
|
79
|
+
plan = planner_llm.invoke(prompt)
|
|
80
|
+
s.record(plan) # attaches token usage from the provider response
|
|
81
|
+
|
|
82
|
+
with step("Retriever", type="retriever") as s:
|
|
83
|
+
chunks = retriever.search(query)
|
|
84
|
+
|
|
85
|
+
with step("Final Response", type="llm_call") as s:
|
|
86
|
+
answer = response_llm.invoke(context)
|
|
87
|
+
s.record(answer)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
`step()` yields a handle whose `.record(response)` extracts token usage from a provider response (auto-detected via the provider registry) and attaches it to the step. Steps are recorded against the currently active workflow via a `contextvar`-based stack, so calling `step()` outside of a `profile()` block raises a clear error.
|
|
91
|
+
|
|
92
|
+
**Example step types:**
|
|
93
|
+
|
|
94
|
+
- Planner
|
|
95
|
+
- Retriever
|
|
96
|
+
- Tool Call
|
|
97
|
+
- LLM Call
|
|
98
|
+
- Memory
|
|
99
|
+
- Final Response
|
|
100
|
+
|
|
101
|
+
#### Instrumentation Model
|
|
102
|
+
|
|
103
|
+
> **Decision:** AgenticLens uses an **explicit step API**, not automatic SDK monkey-patching.
|
|
104
|
+
|
|
105
|
+
Rationale:
|
|
106
|
+
|
|
107
|
+
- Reliable across provider SDK versions — no breakage when OpenAI/Anthropic change internal client internals.
|
|
108
|
+
- Testable without mocking global state.
|
|
109
|
+
- Step boundaries (Planner vs. Retriever vs. Tool Call) are a workflow concept the library cannot infer reliably; the developer already knows them.
|
|
110
|
+
|
|
111
|
+
Trade-off accepted: integrating AgenticLens requires adding `with step(...):` around LLM/tool calls. This cost is acceptable for an MVP and can be revisited post-MVP with optional auto-instrumentation adapters (e.g. for LangChain callbacks) once the core model is stable.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
### 3. Token Metrics
|
|
116
|
+
|
|
117
|
+
Collected per step:
|
|
118
|
+
|
|
119
|
+
| Metric | Description |
|
|
120
|
+
|---|---|
|
|
121
|
+
| `prompt_tokens` | Tokens in the input prompt |
|
|
122
|
+
| `completion_tokens` | Tokens in the model output |
|
|
123
|
+
| `total_tokens` | Sum of prompt + completion |
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
### 4. Cost Metrics
|
|
128
|
+
|
|
129
|
+
| Metric | Description |
|
|
130
|
+
|---|---|
|
|
131
|
+
| `input_cost` | Cost of prompt tokens |
|
|
132
|
+
| `output_cost` | Cost of completion tokens |
|
|
133
|
+
| `total_cost` | Total spend for the step |
|
|
134
|
+
|
|
135
|
+
**Pricing source of truth:** a static pricing table bundled at `src/agenticlens/config/pricing.yaml`, keyed by `provider:model`, kept current via periodic manual updates (community PRs welcome). Resolution order:
|
|
136
|
+
|
|
137
|
+
1. User-supplied pricing override (via `pyproject.toml` / YAML config / env var) — always wins.
|
|
138
|
+
2. Bundled `pricing.yaml` entry for the exact `provider:model`.
|
|
139
|
+
3. Unknown model → cost fields are `None` and a `UnknownModelPricingWarning` is emitted. **Never silently report `$0.00`** for an unpriced model — that's misleading, not a graceful fallback.
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
### 5. Performance Metrics
|
|
144
|
+
|
|
145
|
+
| Metric | Description |
|
|
146
|
+
|---|---|
|
|
147
|
+
| `latency` | Total step duration |
|
|
148
|
+
| `ttft` | Time To First Token. `float \| None` — only populated when the wrapped call is a streaming call; `None` for non-streaming calls (the common case). Not an error or missing-data condition. |
|
|
149
|
+
| `tps` | Tokens Per Second (`completion_tokens / latency`) |
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
### 6. Workflow Summary Output
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
╔══════════════════════════════╗
|
|
157
|
+
║ Customer Support Agent ║
|
|
158
|
+
╠══════════════════════════════╣
|
|
159
|
+
║ Total Tokens │ 24,581 ║
|
|
160
|
+
║ Total Cost │ $0.24 ║
|
|
161
|
+
║ Latency │ 18.2 sec ║
|
|
162
|
+
╚══════════════════════════════╝
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
### 7. Step Breakdown Output
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
Planner
|
|
171
|
+
───────────────────────────────
|
|
172
|
+
Prompt Tokens 850
|
|
173
|
+
Completion Tokens 210
|
|
174
|
+
Cost $0.02
|
|
175
|
+
Latency 1.1 sec
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Repeated for every workflow step.
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Recommendation Engine (Rule-Based)
|
|
183
|
+
|
|
184
|
+
AgenticLens does not stop at reporting — it **analyzes** the workflow and generates actionable optimization suggestions.
|
|
185
|
+
|
|
186
|
+
> Initial recommendations use **simple heuristics**, not AI-generated analysis.
|
|
187
|
+
|
|
188
|
+
### MVP Heuristic Rules
|
|
189
|
+
|
|
190
|
+
Each rule below detects a pattern and estimates the tokens it would save (`tokens_saved`), which feeds the savings calculation.
|
|
191
|
+
|
|
192
|
+
| Rule | Detection logic | `tokens_saved` estimate |
|
|
193
|
+
|---|---|---|
|
|
194
|
+
| **Repeated system prompt** | Hash the first N tokens (configurable, default 50) of each step's prompt. If the same hash appears in ≥2 steps, flag all occurrences after the first. | Sum of token counts of the repeated prefix across the duplicate occurrences. |
|
|
195
|
+
| **Excessive retrieved chunks** | Retriever step returns more than `max_chunks` (configurable, default 8) chunks. | `(chunk_count - max_chunks) × avg_tokens_per_chunk`. |
|
|
196
|
+
| **Long conversation history** | A step's `prompt_tokens` attributable to history/memory content exceeds `history_token_limit` (configurable, default 4000). | `prompt_tokens_from_history - history_token_limit`. |
|
|
197
|
+
| **Duplicate tool calls** | Two or more Tool Call steps in the same workflow share an identical `(tool_name, arguments)` signature. | Sum of `prompt_tokens + completion_tokens` for every duplicate occurrence after the first. |
|
|
198
|
+
|
|
199
|
+
All thresholds live in `RecommenderConfig` and are user-overridable; defaults above are starting points, not hard-coded constants.
|
|
200
|
+
|
|
201
|
+
### Estimated Savings Formula
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
estimated_savings_pct = min(100, (sum(tokens_saved for all triggered rules) / workflow.total_tokens) * 100)
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
Each `Recommendation` carries its own `estimated_savings` (per-rule), and the workflow summary reports the aggregate using the formula above.
|
|
208
|
+
|
|
209
|
+
**Example output:**
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
Optimization Suggestions
|
|
213
|
+
────────────────────────
|
|
214
|
+
✓ Planner repeated system prompt
|
|
215
|
+
✓ Retrieved 12 chunks
|
|
216
|
+
└─ Only 4 appear to be useful
|
|
217
|
+
|
|
218
|
+
Estimated Savings: 31%
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Architecture
|
|
224
|
+
|
|
225
|
+
The project is modular, with each package having a single responsibility.
|
|
226
|
+
|
|
227
|
+
```
|
|
228
|
+
src/agenticlens/
|
|
229
|
+
├── profiler/ # Workflow and step profiling logic
|
|
230
|
+
├── metrics/ # Token, cost, and performance collection
|
|
231
|
+
├── providers/ # LLM provider integrations
|
|
232
|
+
├── analyzers/ # Workflow analysis and pattern detection
|
|
233
|
+
├── recommenders/ # Rule-based optimization recommendations
|
|
234
|
+
├── exporters/ # JSON and CSV export
|
|
235
|
+
├── cli/ # Typer-based CLI
|
|
236
|
+
├── config/ # Configuration loading
|
|
237
|
+
├── models/ # Pydantic data models
|
|
238
|
+
└── utils/ # Shared utilities
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Core Data Models
|
|
244
|
+
|
|
245
|
+
All models are implemented with **Pydantic v2**.
|
|
246
|
+
|
|
247
|
+
### `Workflow`
|
|
248
|
+
|
|
249
|
+
| Field | Type | Description |
|
|
250
|
+
|---|---|---|
|
|
251
|
+
| `id` | `str` | Unique identifier |
|
|
252
|
+
| `name` | `str` | Workflow name |
|
|
253
|
+
| `start_time` | `datetime` | Execution start |
|
|
254
|
+
| `end_time` | `datetime` | Execution end |
|
|
255
|
+
| `total_tokens` | `int` | Aggregate token count |
|
|
256
|
+
| `total_cost` | `float` | Aggregate cost |
|
|
257
|
+
| `latency` | `float` | Wall-clock duration |
|
|
258
|
+
| `steps` | `list[Step]` | All profiled steps |
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
### `Step`
|
|
263
|
+
|
|
264
|
+
| Field | Type | Description |
|
|
265
|
+
|---|---|---|
|
|
266
|
+
| `id` | `str` | Unique identifier |
|
|
267
|
+
| `name` | `str` | Step label |
|
|
268
|
+
| `type` | `StepType` | Enum (planner, retriever, tool, etc.) |
|
|
269
|
+
| `provider` | `str` | LLM provider name |
|
|
270
|
+
| `model` | `str` | Model identifier |
|
|
271
|
+
| `metrics` | `Metrics` | Step-level metrics |
|
|
272
|
+
|
|
273
|
+
---
|
|
274
|
+
|
|
275
|
+
### `Metrics`
|
|
276
|
+
|
|
277
|
+
| Field | Type | Description |
|
|
278
|
+
|---|---|---|
|
|
279
|
+
| `prompt_tokens` | `int` | Input token count |
|
|
280
|
+
| `completion_tokens` | `int` | Output token count |
|
|
281
|
+
| `total_tokens` | `int` | Combined count |
|
|
282
|
+
| `latency` | `float` | Duration in seconds |
|
|
283
|
+
| `ttft` | `float \| None` | Time To First Token |
|
|
284
|
+
| `cost` | `float` | Calculated cost |
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
### `Recommendation`
|
|
289
|
+
|
|
290
|
+
| Field | Type | Description |
|
|
291
|
+
|---|---|---|
|
|
292
|
+
| `title` | `str` | Short recommendation title |
|
|
293
|
+
| `description` | `str` | Detailed explanation |
|
|
294
|
+
| `severity` | `Severity` | Enum: `info`, `warning`, `critical` |
|
|
295
|
+
| `estimated_savings` | `float \| None` | Projected % token reduction |
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## Provider Architecture
|
|
300
|
+
|
|
301
|
+
Providers are independent modules behind an **abstract interface**, so additional providers can be added without modifying the profiler.
|
|
302
|
+
|
|
303
|
+
### Initial providers
|
|
304
|
+
|
|
305
|
+
| Provider | Status |
|
|
306
|
+
|---|---|
|
|
307
|
+
| OpenAI | ✅ MVP |
|
|
308
|
+
| Anthropic | ✅ MVP |
|
|
309
|
+
|
|
310
|
+
### Future providers
|
|
311
|
+
|
|
312
|
+
| Provider | Status |
|
|
313
|
+
|---|---|
|
|
314
|
+
| Gemini | 🔜 Roadmap |
|
|
315
|
+
| Ollama | 🔜 Roadmap |
|
|
316
|
+
| vLLM | 🔜 Roadmap |
|
|
317
|
+
| LiteLLM | 🔜 Roadmap |
|
|
318
|
+
| Azure OpenAI | 🔜 Roadmap |
|
|
319
|
+
|
|
320
|
+
---
|
|
321
|
+
|
|
322
|
+
## CLI
|
|
323
|
+
|
|
324
|
+
Built with **Typer** and **Rich** for terminal output.
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
# Profile a script
|
|
328
|
+
agenticlens profile app.py
|
|
329
|
+
|
|
330
|
+
# Display a saved report
|
|
331
|
+
agenticlens report report.json
|
|
332
|
+
|
|
333
|
+
# Analyze a saved workflow
|
|
334
|
+
agenticlens analyze workflow.json
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## Exporters
|
|
340
|
+
|
|
341
|
+
| Format | Status |
|
|
342
|
+
|---|---|
|
|
343
|
+
| JSON | ✅ MVP |
|
|
344
|
+
| CSV | ✅ MVP |
|
|
345
|
+
|
|
346
|
+
---
|
|
347
|
+
|
|
348
|
+
## Configuration
|
|
349
|
+
|
|
350
|
+
Configuration is supported through any of:
|
|
351
|
+
|
|
352
|
+
- `pyproject.toml`
|
|
353
|
+
- YAML file
|
|
354
|
+
- Environment variables
|
|
355
|
+
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
## Technology Stack
|
|
359
|
+
|
|
360
|
+
| Concern | Choice |
|
|
361
|
+
|---|---|
|
|
362
|
+
| Language | Python 3.10+ |
|
|
363
|
+
| Package Manager | `uv` |
|
|
364
|
+
| Packaging | `pyproject.toml` |
|
|
365
|
+
| Testing | `pytest` |
|
|
366
|
+
| Linting | `ruff` |
|
|
367
|
+
| Formatting | `ruff format` |
|
|
368
|
+
| Type Checking | `mypy` |
|
|
369
|
+
| Documentation | MkDocs Material |
|
|
370
|
+
| CLI | Typer |
|
|
371
|
+
| Terminal Output | Rich |
|
|
372
|
+
| Data Models | Pydantic v2 |
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
## Coding Standards
|
|
377
|
+
|
|
378
|
+
- Full **type hints** throughout
|
|
379
|
+
- **Async-friendly** architecture
|
|
380
|
+
- **Modular design** — single responsibility per module
|
|
381
|
+
- No global mutable state
|
|
382
|
+
- Unit tests for every module
|
|
383
|
+
- Comprehensive docstrings
|
|
384
|
+
- Clean separation between providers, metrics, and analyzers
|
|
385
|
+
|
|
386
|
+
---
|
|
387
|
+
|
|
388
|
+
## Repository Structure
|
|
389
|
+
|
|
390
|
+
```
|
|
391
|
+
agenticlens/
|
|
392
|
+
├── README.md
|
|
393
|
+
├── LICENSE
|
|
394
|
+
├── ROADMAP.md
|
|
395
|
+
├── pyproject.toml
|
|
396
|
+
├── src/
|
|
397
|
+
│ └── agenticlens/
|
|
398
|
+
├── tests/
|
|
399
|
+
├── examples/
|
|
400
|
+
├── docs/
|
|
401
|
+
└── .github/
|
|
402
|
+
└── workflows/
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
---
|
|
406
|
+
|
|
407
|
+
## MVP Deliverables
|
|
408
|
+
|
|
409
|
+
The initial implementation includes:
|
|
410
|
+
|
|
411
|
+
- [ ] Repository scaffold
|
|
412
|
+
- [ ] Project configuration (`pyproject.toml`, `ruff`, `mypy`)
|
|
413
|
+
- [ ] Complete package structure
|
|
414
|
+
- [ ] Data models (Pydantic v2)
|
|
415
|
+
- [ ] Provider abstraction (abstract base class)
|
|
416
|
+
- [ ] Profiler skeleton
|
|
417
|
+
- [ ] Metrics engine skeleton
|
|
418
|
+
- [ ] CLI skeleton (Typer)
|
|
419
|
+
- [ ] Unit test setup (pytest)
|
|
420
|
+
- [ ] GitHub Actions CI pipeline
|
|
421
|
+
- [ ] Documentation structure (MkDocs)
|
|
422
|
+
|
|
423
|
+
> Business logic is implemented incrementally after the scaffold is complete.
|
|
424
|
+
|
|
425
|
+
---
|
|
426
|
+
|
|
427
|
+
## Scaffold-First Instructions
|
|
428
|
+
|
|
429
|
+
> **The first task is NOT to implement the complete library.**
|
|
430
|
+
|
|
431
|
+
In order:
|
|
432
|
+
|
|
433
|
+
1. Scaffold the complete repository
|
|
434
|
+
2. Create all directories
|
|
435
|
+
3. Configure tooling (ruff, mypy, pytest)
|
|
436
|
+
4. Configure CI (GitHub Actions)
|
|
437
|
+
5. Configure packaging (pyproject.toml, uv)
|
|
438
|
+
6. Create all base classes and interfaces
|
|
439
|
+
7. Create placeholder implementations where appropriate
|
|
440
|
+
8. Ensure the project installs successfully
|
|
441
|
+
9. Ensure linting, formatting, typing, and tests pass
|
|
442
|
+
10. **Do not implement optimization algorithms until project structure is complete**
|
|
443
|
+
|
|
444
|
+
> **Objective:** Create a production-quality open-source project foundation that can be incrementally expanded.
|
|
445
|
+
|
|
446
|
+
---
|
|
447
|
+
|
|
448
|
+
## Future Roadmap
|
|
449
|
+
|
|
450
|
+
| Feature | Notes |
|
|
451
|
+
|---|---|
|
|
452
|
+
| LangGraph integration | Native graph-step profiling |
|
|
453
|
+
| CrewAI integration | Multi-agent workflow support |
|
|
454
|
+
| OpenAI Agents SDK integration | Tool call + handoff tracing |
|
|
455
|
+
| MCP profiling | Server-level token attribution |
|
|
456
|
+
| RAG analysis | Chunk utility scoring |
|
|
457
|
+
| Prompt optimization | Automated prompt compression |
|
|
458
|
+
| Context utilization metrics | Effective context window usage |
|
|
459
|
+
| Evaluation framework | Quality vs. cost tradeoffs |
|
|
460
|
+
| Dashboard | Visual workflow explorer |
|
|
461
|
+
| Enterprise reporting | Team-level aggregation and export |
|
|
462
|
+
|
|
463
|
+
> All items above are **out of scope for MVP**.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 DeepAgentLabs
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agenticlens
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Profile, analyze, and optimize token consumption in LLM-powered applications and agentic workflows.
|
|
5
|
+
Project-URL: Homepage, https://github.com/agenticlens/agenticlens
|
|
6
|
+
Project-URL: Issues, https://github.com/agenticlens/agenticlens/issues
|
|
7
|
+
Author: AgenticLens Contributors
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2026 DeepAgentLabs
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Keywords: agents,cost,llm,observability,profiling,tokens
|
|
31
|
+
Classifier: Development Status :: 3 - Alpha
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
38
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
39
|
+
Requires-Python: >=3.10
|
|
40
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
41
|
+
Requires-Dist: pyyaml>=6.0
|
|
42
|
+
Requires-Dist: rich>=13.0
|
|
43
|
+
Requires-Dist: typer>=0.12
|
|
44
|
+
Provides-Extra: dev
|
|
45
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
46
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
47
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
48
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
49
|
+
Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
|
|
50
|
+
Provides-Extra: docs
|
|
51
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# AgenticLens
|
|
55
|
+
|
|
56
|
+
An open-source profiler for AI agents that analyzes token usage, cost, latency, and optimization opportunities across LLM workflows.
|
|
57
|
+
|
|
58
|
+
> **Status:** early scaffold. Core data models, provider abstraction, and the explicit `profile()`/`step()` instrumentation API are in place. The recommendation engine's heuristic rules are not yet implemented — see [AgenticLens_Spec.md](AgenticLens_Spec.md).
|
|
59
|
+
|
|
60
|
+
## Install (development)
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
uv sync --extra dev
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from agenticlens import profile, step
|
|
70
|
+
|
|
71
|
+
with profile("Customer Support"):
|
|
72
|
+
with step("Planner", type="planner") as s:
|
|
73
|
+
response = planner_llm.invoke(prompt)
|
|
74
|
+
s.record(response)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Development
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
uv run pytest # tests
|
|
81
|
+
uv run ruff check . # lint
|
|
82
|
+
uv run ruff format . # format
|
|
83
|
+
uv run mypy # type check
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
See [AgenticLens_Spec.md](AgenticLens_Spec.md) for the full project specification and [ROADMAP.md](ROADMAP.md) for what's planned beyond the MVP.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# AgenticLens
|
|
2
|
+
|
|
3
|
+
An open-source profiler for AI agents that analyzes token usage, cost, latency, and optimization opportunities across LLM workflows.
|
|
4
|
+
|
|
5
|
+
> **Status:** early scaffold. Core data models, provider abstraction, and the explicit `profile()`/`step()` instrumentation API are in place. The recommendation engine's heuristic rules are not yet implemented — see [AgenticLens_Spec.md](AgenticLens_Spec.md).
|
|
6
|
+
|
|
7
|
+
## Install (development)
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv sync --extra dev
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from agenticlens import profile, step
|
|
17
|
+
|
|
18
|
+
with profile("Customer Support"):
|
|
19
|
+
with step("Planner", type="planner") as s:
|
|
20
|
+
response = planner_llm.invoke(prompt)
|
|
21
|
+
s.record(response)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Development
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
uv run pytest # tests
|
|
28
|
+
uv run ruff check . # lint
|
|
29
|
+
uv run ruff format . # format
|
|
30
|
+
uv run mypy # type check
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
See [AgenticLens_Spec.md](AgenticLens_Spec.md) for the full project specification and [ROADMAP.md](ROADMAP.md) for what's planned beyond the MVP.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Roadmap
|
|
2
|
+
|
|
3
|
+
## MVP (current)
|
|
4
|
+
|
|
5
|
+
- [x] Repository scaffold
|
|
6
|
+
- [x] Project configuration (`pyproject.toml`, `ruff`, `mypy`)
|
|
7
|
+
- [x] Complete package structure
|
|
8
|
+
- [x] Data models (Pydantic v2)
|
|
9
|
+
- [x] Provider abstraction (abstract base class) + OpenAI/Anthropic
|
|
10
|
+
- [x] Profiler skeleton (`profile()` / `step()`)
|
|
11
|
+
- [x] Metrics engine skeleton (cost calculation, pricing resolution)
|
|
12
|
+
- [x] CLI skeleton (Typer)
|
|
13
|
+
- [x] Unit test setup (pytest)
|
|
14
|
+
- [x] GitHub Actions CI pipeline
|
|
15
|
+
- [x] Recommendation engine heuristic rules (repeated system prompt, excessive chunks, long history, duplicate tool calls)
|
|
16
|
+
- [x] CLI `profile`/`report`/`analyze` business logic
|
|
17
|
+
- [ ] Documentation structure (MkDocs)
|
|
18
|
+
|
|
19
|
+
## Post-MVP
|
|
20
|
+
|
|
21
|
+
See "Future Roadmap" in [AgenticLens_Spec.md](AgenticLens_Spec.md):
|
|
22
|
+
|
|
23
|
+
- LangGraph / CrewAI / OpenAI Agents SDK integrations
|
|
24
|
+
- MCP server-level token attribution
|
|
25
|
+
- RAG chunk-utility scoring
|
|
26
|
+
- Automated prompt compression
|
|
27
|
+
- Context utilization metrics
|
|
28
|
+
- Evaluation framework (quality vs. cost)
|
|
29
|
+
- Dashboard / visual workflow explorer
|
|
30
|
+
- Enterprise reporting
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# AgenticLens
|
|
2
|
+
|
|
3
|
+
Profile, analyze, and optimize token consumption in LLM-powered applications and agentic workflows.
|
|
4
|
+
|
|
5
|
+
See the [project specification](../AgenticLens_Spec.md) for the full design.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from agenticlens import profile, step
|
|
9
|
+
|
|
10
|
+
with profile("Customer Support"):
|
|
11
|
+
with step("Planner", type="planner") as s:
|
|
12
|
+
response = planner_llm.invoke(prompt)
|
|
13
|
+
s.record(response)
|
|
14
|
+
```
|