agent-roi-tracker 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_roi/__init__.py +3 -0
- agent_roi/api/__init__.py +1 -0
- agent_roi/api/app.py +179 -0
- agent_roi/classify/__init__.py +26 -0
- agent_roi/classify/base.py +44 -0
- agent_roi/classify/semantic.py +197 -0
- agent_roi/cli/__init__.py +1 -0
- agent_roi/cli/main.py +200 -0
- agent_roi/collectors/__init__.py +31 -0
- agent_roi/collectors/base.py +49 -0
- agent_roi/collectors/claude_code.py +165 -0
- agent_roi/collectors/codex.py +157 -0
- agent_roi/collectors/copilot.py +210 -0
- agent_roi/collectors/gemini.py +220 -0
- agent_roi/core/__init__.py +1 -0
- agent_roi/core/config.py +58 -0
- agent_roi/core/models.py +241 -0
- agent_roi/core/platform.py +113 -0
- agent_roi/core/pricing.py +79 -0
- agent_roi/core/project.py +52 -0
- agent_roi/core/service.py +172 -0
- agent_roi/core/timeframe.py +76 -0
- agent_roi/core/tokens.py +30 -0
- agent_roi/storage/__init__.py +5 -0
- agent_roi/storage/db.py +542 -0
- agent_roi_tracker-0.1.0.dist-info/METADATA +163 -0
- agent_roi_tracker-0.1.0.dist-info/RECORD +30 -0
- agent_roi_tracker-0.1.0.dist-info/WHEEL +4 -0
- agent_roi_tracker-0.1.0.dist-info/entry_points.txt +2 -0
- agent_roi_tracker-0.1.0.dist-info/licenses/LICENSE +21 -0
agent_roi/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""REST API package."""
|
agent_roi/api/app.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""REST API for the web dashboard.
|
|
2
|
+
|
|
3
|
+
Thin layer over :class:`Service`. If a built web UI exists at ``web/dist`` it is
|
|
4
|
+
served as static files so ``agent-roi serve`` gives a single-URL experience.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from fastapi import FastAPI, HTTPException
|
|
13
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
14
|
+
from fastapi.staticfiles import StaticFiles
|
|
15
|
+
|
|
16
|
+
from agent_roi import __version__
|
|
17
|
+
from agent_roi.core.platform import platform_label
|
|
18
|
+
from agent_roi.core.service import Service
|
|
19
|
+
from agent_roi.core.timeframe import parse_since, parse_until
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_app(service: Service | None = None) -> FastAPI:
|
|
23
|
+
svc: Service = service or Service()
|
|
24
|
+
app = FastAPI(title="Agent-ROI", version=__version__)
|
|
25
|
+
|
|
26
|
+
# Allow the React dev server (vite default port) during development.
|
|
27
|
+
app.add_middleware(
|
|
28
|
+
CORSMiddleware,
|
|
29
|
+
allow_origins=["http://localhost:5173"],
|
|
30
|
+
allow_methods=["GET", "POST"],
|
|
31
|
+
allow_headers=["*"],
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
@app.get("/api/health")
|
|
35
|
+
def health() -> dict[str, str]:
|
|
36
|
+
return {"status": "ok", "version": __version__}
|
|
37
|
+
|
|
38
|
+
@app.get("/api/report")
|
|
39
|
+
def report(
|
|
40
|
+
group_by: str = "topic",
|
|
41
|
+
since: str = "",
|
|
42
|
+
until: str = "",
|
|
43
|
+
) -> list[dict[str, object]]:
|
|
44
|
+
"""Usage/cost grouped by 'topic' | 'tool' | 'model', optionally windowed."""
|
|
45
|
+
if group_by not in ("topic", "tool", "model", "project"):
|
|
46
|
+
raise HTTPException(400, f"Invalid group_by: {group_by}")
|
|
47
|
+
start, end = _window(since, until)
|
|
48
|
+
return [
|
|
49
|
+
r.model_dump() | {"total_tokens": r.total_tokens}
|
|
50
|
+
for r in svc.report(dimension=group_by, start=start, end=end)
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
@app.get("/api/report/topic/{topic}")
|
|
54
|
+
def topic_breakdown(topic: str, since: str = "", until: str = "") -> dict[str, object]:
|
|
55
|
+
"""Drill into one topic: split by tool and by model."""
|
|
56
|
+
start, end = _window(since, until)
|
|
57
|
+
bd = svc.topic_breakdown(topic, start=start, end=end)
|
|
58
|
+
return {
|
|
59
|
+
"topic": bd.topic,
|
|
60
|
+
"total": bd.total.model_dump() | {"total_tokens": bd.total.total_tokens},
|
|
61
|
+
"by_tool": [r.model_dump() | {"total_tokens": r.total_tokens} for r in bd.by_tool],
|
|
62
|
+
"by_model": [r.model_dump() | {"total_tokens": r.total_tokens} for r in bd.by_model],
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@app.get("/api/sessions")
|
|
66
|
+
def sessions(topic: str = "", since: str = "", until: str = "") -> list[dict[str, object]]:
|
|
67
|
+
"""Per-session rows, optionally scoped to one topic and time window."""
|
|
68
|
+
start, end = _window(since, until)
|
|
69
|
+
rows = svc.sessions(topic=topic or None, start=start, end=end)
|
|
70
|
+
return [s.model_dump() | {"total_tokens": s.total_tokens} for s in rows]
|
|
71
|
+
|
|
72
|
+
@app.get("/api/sessions/{session_id}")
|
|
73
|
+
def session_detail(session_id: str) -> dict[str, object]:
|
|
74
|
+
"""One session's aggregate plus the interactions (conversation turns)."""
|
|
75
|
+
detail = svc.session_detail(session_id)
|
|
76
|
+
if detail is None:
|
|
77
|
+
raise HTTPException(404, f"No session {session_id!r}")
|
|
78
|
+
return {
|
|
79
|
+
"session": detail.session.model_dump()
|
|
80
|
+
| {"total_tokens": detail.session.total_tokens},
|
|
81
|
+
"interactions": [
|
|
82
|
+
i.model_dump() | {"total_tokens": i.total_tokens}
|
|
83
|
+
for i in detail.interactions
|
|
84
|
+
],
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
@app.get("/api/timeseries")
|
|
88
|
+
def timeseries(
|
|
89
|
+
since: str = "",
|
|
90
|
+
until: str = "",
|
|
91
|
+
granularity: str = "day",
|
|
92
|
+
) -> dict[str, object]:
|
|
93
|
+
"""Token/cost trends plus splits by tool and model."""
|
|
94
|
+
if granularity not in ("day", "week", "month"):
|
|
95
|
+
raise HTTPException(400, f"Invalid granularity: {granularity}")
|
|
96
|
+
start, end = _window(since, until)
|
|
97
|
+
bundle = svc.timeseries(start=start, end=end, granularity=granularity)
|
|
98
|
+
return {
|
|
99
|
+
"granularity": granularity,
|
|
100
|
+
"totals": [p.model_dump() | {"total_tokens": p.total_tokens} for p in bundle.totals],
|
|
101
|
+
"by_tool": [r.model_dump() for r in bundle.by_tool],
|
|
102
|
+
"by_model": [r.model_dump() for r in bundle.by_model],
|
|
103
|
+
"tool_keys": bundle.tool_keys,
|
|
104
|
+
"model_keys": bundle.model_keys,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
@app.get("/api/pricing")
|
|
108
|
+
def pricing() -> list[dict[str, object]]:
|
|
109
|
+
"""The pricing table behind every cost figure."""
|
|
110
|
+
return [p.model_dump() for p in svc.pricing()]
|
|
111
|
+
|
|
112
|
+
@app.get("/api/sources")
|
|
113
|
+
def sources() -> dict[str, object]:
|
|
114
|
+
"""Collector diagnostics: which tools were detected and where."""
|
|
115
|
+
return {
|
|
116
|
+
"platform": platform_label(),
|
|
117
|
+
"collectors": [s.model_dump() for s in svc.sources()],
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
@app.post("/api/ingest")
|
|
121
|
+
def ingest() -> dict[str, int]:
|
|
122
|
+
return {"ingested": svc.ingest()}
|
|
123
|
+
|
|
124
|
+
@app.post("/api/classify")
|
|
125
|
+
def classify() -> dict[str, int]:
|
|
126
|
+
return {"classified": svc.classify()}
|
|
127
|
+
|
|
128
|
+
@app.post("/api/refresh")
|
|
129
|
+
def refresh() -> dict[str, int]:
|
|
130
|
+
"""Ingest new logs and re-discover topics in one step."""
|
|
131
|
+
return svc.refresh()
|
|
132
|
+
|
|
133
|
+
_mount_web_ui(app)
|
|
134
|
+
return app
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _since(value: str) -> datetime | None:
|
|
138
|
+
try:
|
|
139
|
+
return parse_since(value)
|
|
140
|
+
except ValueError as exc:
|
|
141
|
+
raise HTTPException(400, str(exc)) from exc
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _until(value: str) -> datetime | None:
|
|
145
|
+
try:
|
|
146
|
+
return parse_until(value)
|
|
147
|
+
except ValueError as exc:
|
|
148
|
+
raise HTTPException(400, str(exc)) from exc
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _window(since: str, until: str) -> tuple[datetime | None, datetime | None]:
|
|
152
|
+
start = _since(since)
|
|
153
|
+
end = _until(until)
|
|
154
|
+
if start is not None and end is not None and start >= end:
|
|
155
|
+
raise HTTPException(400, "since must be before until")
|
|
156
|
+
return start, end
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _mount_web_ui(app: FastAPI) -> None:
|
|
160
|
+
dist = _web_dist()
|
|
161
|
+
if dist is not None:
|
|
162
|
+
app.mount("/", StaticFiles(directory=str(dist), html=True), name="web")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _web_dist() -> Path | None:
|
|
166
|
+
"""Locate the built web UI.
|
|
167
|
+
|
|
168
|
+
Prefers the copy bundled inside the installed package (so a pip/uv install
|
|
169
|
+
can serve the dashboard), then falls back to the dev build at ``web/dist``.
|
|
170
|
+
"""
|
|
171
|
+
here = Path(__file__).resolve()
|
|
172
|
+
candidates = [
|
|
173
|
+
here.parent.parent / "webui", # packaged: src/agent_roi/webui
|
|
174
|
+
here.parents[3] / "web" / "dist", # dev checkout
|
|
175
|
+
]
|
|
176
|
+
for candidate in candidates:
|
|
177
|
+
if candidate.is_dir():
|
|
178
|
+
return candidate
|
|
179
|
+
return None
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Classifier factory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from agent_roi.classify.base import Classifier, SessionDoc
|
|
6
|
+
from agent_roi.core.config import ClassifierConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_classifier(config: ClassifierConfig) -> Classifier:
|
|
10
|
+
"""Build the classifier described by config.
|
|
11
|
+
|
|
12
|
+
Only the model-free ``semantic`` provider exists: it discovers topics locally
|
|
13
|
+
from session text, so there is nothing to install, no server to run, and no
|
|
14
|
+
tokens to spend.
|
|
15
|
+
"""
|
|
16
|
+
if config.provider == "semantic":
|
|
17
|
+
from agent_roi.classify.semantic import SemanticClassifier
|
|
18
|
+
|
|
19
|
+
return SemanticClassifier(
|
|
20
|
+
similarity_threshold=config.similarity_threshold,
|
|
21
|
+
label_terms=config.label_terms,
|
|
22
|
+
)
|
|
23
|
+
raise ValueError(f"Unknown classifier provider: {config.provider!r}")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = ["Classifier", "SessionDoc", "get_classifier"]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Topic classifier interface.
|
|
2
|
+
|
|
3
|
+
A classifier looks at whole *sessions* (one continuous piece of agent work) and
|
|
4
|
+
groups the ones that are about the same thing, assigning each group a short topic
|
|
5
|
+
label such as "auth refactor" or "ci pipeline". Topics are how Agent-ROI
|
|
6
|
+
aggregates token cost per *subject* rather than per request, which is the whole
|
|
7
|
+
point of measuring agent ROI.
|
|
8
|
+
|
|
9
|
+
Classification is deliberately model-free: it discovers topics from the text of
|
|
10
|
+
the sessions themselves (semantic similarity), so it runs fully offline, costs
|
|
11
|
+
nothing, and never sends anything to an external service.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
|
|
19
|
+
UNCATEGORIZED = "uncategorized"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class SessionDoc:
|
|
24
|
+
"""One session handed to the classifier for topic discovery.
|
|
25
|
+
|
|
26
|
+
``summary`` is a compact, combined snippet of the session's interactions;
|
|
27
|
+
``project`` is the coarse repo/folder grouping derived from the cwd.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
session_id: str
|
|
31
|
+
project: str
|
|
32
|
+
summary: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Classifier(ABC):
|
|
36
|
+
"""Base class for topic classifiers.
|
|
37
|
+
|
|
38
|
+
Implementations look at all the given sessions together so they can group
|
|
39
|
+
similar ones, rather than labeling each session in isolation.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def label_sessions(self, sessions: list[SessionDoc]) -> dict[str, str]:
|
|
44
|
+
"""Return a ``{session_id: topic}`` mapping for the given sessions."""
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Model-free semantic topic discovery.
|
|
2
|
+
|
|
3
|
+
Groups sessions that are about the same kind of work, without any LLM or external
|
|
4
|
+
service. The pipeline mirrors classic "local embedding + community detection"
|
|
5
|
+
setups, but uses lightweight, dependency-free building blocks so it runs anywhere
|
|
6
|
+
and costs nothing:
|
|
7
|
+
|
|
8
|
+
1. Each session's combined summary is tokenized and turned into a TF-IDF vector
|
|
9
|
+
(a cheap, deterministic stand-in for an embedding).
|
|
10
|
+
2. Sessions whose vectors are close enough (cosine similarity) are linked, and
|
|
11
|
+
the connected components of that graph become topic clusters — the same idea
|
|
12
|
+
as graph community detection, kept simple with union-find.
|
|
13
|
+
3. Each cluster is labeled from its most distinctive shared terms.
|
|
14
|
+
|
|
15
|
+
Sessions with no meaningful text fall back to ``uncategorized`` rather than being
|
|
16
|
+
force-fit into a topic.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import math
|
|
22
|
+
import re
|
|
23
|
+
from collections import defaultdict
|
|
24
|
+
|
|
25
|
+
from agent_roi.classify.base import UNCATEGORIZED, Classifier, SessionDoc
|
|
26
|
+
|
|
27
|
+
# Generic words that carry no topic signal in coding-assistant chatter. Kept
|
|
28
|
+
# short and intentionally conservative — real topic terms should survive.
|
|
29
|
+
_STOPWORDS = frozenset(
|
|
30
|
+
{
|
|
31
|
+
"the", "a", "an", "and", "or", "but", "if", "then", "else", "for", "of",
|
|
32
|
+
"to", "in", "on", "at", "by", "is", "are", "was", "were", "be", "been",
|
|
33
|
+
"this", "that", "these", "those", "it", "its", "as", "with", "from",
|
|
34
|
+
"into", "out", "up", "down", "so", "not", "no", "yes", "can", "will",
|
|
35
|
+
"would", "should", "could", "do", "does", "did", "have", "has", "had",
|
|
36
|
+
"i", "you", "we", "they", "he", "she", "me", "my", "your", "our",
|
|
37
|
+
"please", "help", "want", "need", "make", "let", "lets", "use", "using",
|
|
38
|
+
"add", "added", "adding", "fix", "fixed", "fixing", "update", "updated",
|
|
39
|
+
"change", "changed", "create", "created", "new", "code", "file", "files",
|
|
40
|
+
"function", "error", "errors", "issue", "issues", "problem", "try",
|
|
41
|
+
"trying", "now", "also", "like", "just", "get", "got", "set", "run",
|
|
42
|
+
"running", "here", "there", "what", "how", "why", "when", "which",
|
|
43
|
+
"user", "assistant", "message", "okay", "ok", "thanks", "thank",
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
_TOKEN_RE = re.compile(r"[a-z][a-z0-9_]{2,}")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class SemanticClassifier(Classifier):
|
|
51
|
+
"""Discover topics by clustering semantically similar sessions."""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
similarity_threshold: float = 0.18,
|
|
56
|
+
label_terms: int = 3,
|
|
57
|
+
) -> None:
|
|
58
|
+
# Cosine similarity at/above which two sessions are treated as the same
|
|
59
|
+
# topic. Higher = stricter (more, smaller topics).
|
|
60
|
+
self.similarity_threshold = similarity_threshold
|
|
61
|
+
# How many distinctive terms make up a generated topic label.
|
|
62
|
+
self.label_terms = label_terms
|
|
63
|
+
|
|
64
|
+
def label_sessions(self, sessions: list[SessionDoc]) -> dict[str, str]:
|
|
65
|
+
if not sessions:
|
|
66
|
+
return {}
|
|
67
|
+
|
|
68
|
+
token_lists = [_tokenize(s.summary) for s in sessions]
|
|
69
|
+
vectors = _tfidf_vectors(token_lists)
|
|
70
|
+
|
|
71
|
+
clusters = _cluster(vectors, self.similarity_threshold)
|
|
72
|
+
|
|
73
|
+
labels: dict[str, str] = {}
|
|
74
|
+
used: dict[str, int] = {}
|
|
75
|
+
for members in clusters:
|
|
76
|
+
label = _label_for(members, vectors, self.label_terms)
|
|
77
|
+
# Disambiguate identical labels from distinct clusters.
|
|
78
|
+
if label != UNCATEGORIZED and label in used:
|
|
79
|
+
used[label] += 1
|
|
80
|
+
label = f"{label} {used[label]}"
|
|
81
|
+
else:
|
|
82
|
+
used.setdefault(label, 1)
|
|
83
|
+
for idx in members:
|
|
84
|
+
labels[sessions[idx].session_id] = label
|
|
85
|
+
return labels
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _tokenize(text: str) -> list[str]:
|
|
89
|
+
return [t for t in _TOKEN_RE.findall(text.lower()) if t not in _STOPWORDS]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _tfidf_vectors(token_lists: list[list[str]]) -> list[dict[str, float]]:
|
|
93
|
+
"""Build L2-normalized TF-IDF vectors so dot product == cosine similarity."""
|
|
94
|
+
n_docs = len(token_lists)
|
|
95
|
+
doc_freq: dict[str, int] = defaultdict(int)
|
|
96
|
+
for tokens in token_lists:
|
|
97
|
+
for term in set(tokens):
|
|
98
|
+
doc_freq[term] += 1
|
|
99
|
+
|
|
100
|
+
idf = {
|
|
101
|
+
term: math.log((n_docs + 1) / (df + 1)) + 1.0
|
|
102
|
+
for term, df in doc_freq.items()
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
vectors: list[dict[str, float]] = []
|
|
106
|
+
for tokens in token_lists:
|
|
107
|
+
term_freq: dict[str, int] = defaultdict(int)
|
|
108
|
+
for term in tokens:
|
|
109
|
+
term_freq[term] += 1
|
|
110
|
+
vec = {term: tf * idf[term] for term, tf in term_freq.items()}
|
|
111
|
+
norm = math.sqrt(sum(w * w for w in vec.values()))
|
|
112
|
+
if norm > 0:
|
|
113
|
+
vec = {term: w / norm for term, w in vec.items()}
|
|
114
|
+
vectors.append(vec)
|
|
115
|
+
return vectors
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _cosine(a: dict[str, float], b: dict[str, float]) -> float:
|
|
119
|
+
# Vectors are unit-normalized, so cosine is just the dot product. Iterate the
|
|
120
|
+
# smaller vector for speed.
|
|
121
|
+
if len(a) > len(b):
|
|
122
|
+
a, b = b, a
|
|
123
|
+
return sum(weight * b.get(term, 0.0) for term, weight in a.items())
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _cluster(vectors: list[dict[str, float]], threshold: float) -> list[list[int]]:
|
|
127
|
+
"""Greedy centroid clustering: assign each session to the cluster whose mean
|
|
128
|
+
vector it is most similar to (above ``threshold``), else start a new cluster.
|
|
129
|
+
|
|
130
|
+
Centroid linkage resists the "chaining" failure of single-linkage/connected
|
|
131
|
+
components, where one borderline pair can merge unrelated work into a single
|
|
132
|
+
giant topic. Empty vectors (no meaningful tokens) are returned as their own
|
|
133
|
+
singletons so they can be labeled ``uncategorized`` downstream.
|
|
134
|
+
"""
|
|
135
|
+
centroids: list[dict[str, float]] = []
|
|
136
|
+
sizes: list[int] = []
|
|
137
|
+
members: list[list[int]] = []
|
|
138
|
+
empties: list[list[int]] = []
|
|
139
|
+
|
|
140
|
+
for i, vec in enumerate(vectors):
|
|
141
|
+
if not vec:
|
|
142
|
+
empties.append([i])
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
best, best_sim = -1, threshold
|
|
146
|
+
for c, centroid in enumerate(centroids):
|
|
147
|
+
sim = _cosine(vec, centroid)
|
|
148
|
+
if sim >= best_sim:
|
|
149
|
+
best, best_sim = c, sim
|
|
150
|
+
|
|
151
|
+
if best == -1:
|
|
152
|
+
centroids.append(dict(vec))
|
|
153
|
+
sizes.append(1)
|
|
154
|
+
members.append([i])
|
|
155
|
+
else:
|
|
156
|
+
members[best].append(i)
|
|
157
|
+
sizes[best] += 1
|
|
158
|
+
centroids[best] = _merge_centroid(centroids[best], sizes[best], vec)
|
|
159
|
+
|
|
160
|
+
return members + empties
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _merge_centroid(
|
|
164
|
+
centroid: dict[str, float], size: int, vec: dict[str, float]
|
|
165
|
+
) -> dict[str, float]:
|
|
166
|
+
"""Fold ``vec`` into a centroid as a running mean, then re-normalize."""
|
|
167
|
+
merged = dict(centroid)
|
|
168
|
+
weight = 1.0 / size
|
|
169
|
+
for term, w in centroid.items():
|
|
170
|
+
merged[term] = w * (size - 1) * weight
|
|
171
|
+
for term, w in vec.items():
|
|
172
|
+
merged[term] = merged.get(term, 0.0) + w * weight
|
|
173
|
+
norm = math.sqrt(sum(x * x for x in merged.values()))
|
|
174
|
+
if norm > 0:
|
|
175
|
+
merged = {term: x / norm for term, x in merged.items()}
|
|
176
|
+
return merged
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _label_for(
|
|
180
|
+
members: list[int],
|
|
181
|
+
vectors: list[dict[str, float]],
|
|
182
|
+
max_terms: int,
|
|
183
|
+
) -> str:
|
|
184
|
+
"""Name a cluster from the highest-weighted terms shared by its sessions."""
|
|
185
|
+
scores: dict[str, float] = defaultdict(float)
|
|
186
|
+
for idx in members:
|
|
187
|
+
for term, weight in vectors[idx].items():
|
|
188
|
+
scores[term] += weight
|
|
189
|
+
|
|
190
|
+
if not scores:
|
|
191
|
+
return UNCATEGORIZED
|
|
192
|
+
|
|
193
|
+
ranked = sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))
|
|
194
|
+
# Singletons get a slightly shorter label; multi-session topics a bit longer.
|
|
195
|
+
n_terms = max(1, max_terms if len(members) > 1 else max_terms - 1)
|
|
196
|
+
top = [term for term, _ in ranked[:n_terms]]
|
|
197
|
+
return " ".join(top) if top else UNCATEGORIZED
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI package."""
|
agent_roi/cli/main.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Agent-ROI command-line interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
|
|
11
|
+
from agent_roi.core.platform import platform_label
|
|
12
|
+
from agent_roi.core.service import Service
|
|
13
|
+
from agent_roi.core.timeframe import parse_since
|
|
14
|
+
|
|
15
|
+
app = typer.Typer(
|
|
16
|
+
name="agent-roi",
|
|
17
|
+
help="Track the cost, usage, and ROI of your AI coding agents across every tool.",
|
|
18
|
+
no_args_is_help=True,
|
|
19
|
+
add_completion=False,
|
|
20
|
+
)
|
|
21
|
+
console = Console()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _parse_since(value: str) -> datetime | None:
|
|
25
|
+
try:
|
|
26
|
+
return parse_since(value)
|
|
27
|
+
except ValueError as exc:
|
|
28
|
+
console.print(f"[red]{exc}[/red]")
|
|
29
|
+
raise typer.Exit(1) from exc
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@app.command()
|
|
33
|
+
def ingest(
|
|
34
|
+
classify: bool = typer.Option(
|
|
35
|
+
True, help="Also discover topics after ingesting (recommended)."
|
|
36
|
+
),
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Collect interactions from all enabled tools, then discover topics."""
|
|
39
|
+
service = Service()
|
|
40
|
+
with console.status("Ingesting logs..."):
|
|
41
|
+
count = service.ingest()
|
|
42
|
+
console.print(f"[green]Ingested {count} interactions.[/green]")
|
|
43
|
+
if classify:
|
|
44
|
+
with console.status("Discovering topics..."):
|
|
45
|
+
labeled = service.classify()
|
|
46
|
+
console.print(f"[green]Classified {labeled} interactions into topics.[/green]")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@app.command()
|
|
50
|
+
def classify(
|
|
51
|
+
limit: int = typer.Option(0, help="Max sessions to classify (0 = all)."),
|
|
52
|
+
) -> None:
|
|
53
|
+
"""Re-discover topics across all sessions."""
|
|
54
|
+
service = Service()
|
|
55
|
+
with console.status("Discovering topics..."):
|
|
56
|
+
count = service.classify(limit=limit or None)
|
|
57
|
+
console.print(f"[green]Classified {count} interactions.[/green]")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@app.command()
|
|
61
|
+
def report(
|
|
62
|
+
by: str = typer.Option("topic", help="Grouping dimension: topic | tool | model | project."),
|
|
63
|
+
since: str = typer.Option(
|
|
64
|
+
"", help="Time window start: a date (YYYY-MM-DD) or shorthand like 7d, 24h, today."
|
|
65
|
+
),
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Show a token/cost breakdown, grouped and optionally time-windowed."""
|
|
68
|
+
if by not in ("topic", "tool", "model", "project"):
|
|
69
|
+
console.print(f"[red]Unsupported grouping: {by} (use topic|tool|model|project)[/red]")
|
|
70
|
+
raise typer.Exit(1)
|
|
71
|
+
|
|
72
|
+
start = _parse_since(since)
|
|
73
|
+
service = Service()
|
|
74
|
+
rollups = service.report(dimension=by, start=start)
|
|
75
|
+
if not rollups:
|
|
76
|
+
console.print("[yellow]No data in range. Run 'agent-roi ingest' first.[/yellow]")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
title = f"Token Cost by {by.capitalize()}"
|
|
80
|
+
if start is not None:
|
|
81
|
+
title += f" (since {start.date()})"
|
|
82
|
+
table = Table(title=title)
|
|
83
|
+
table.add_column(by.capitalize(), style="cyan", no_wrap=True)
|
|
84
|
+
table.add_column("Interactions", justify="right")
|
|
85
|
+
table.add_column("Input", justify="right")
|
|
86
|
+
table.add_column("Output", justify="right")
|
|
87
|
+
table.add_column("Total Tokens", justify="right")
|
|
88
|
+
table.add_column("Cost (USD)", justify="right", style="green")
|
|
89
|
+
table.add_column("Src", justify="center")
|
|
90
|
+
|
|
91
|
+
for r in rollups:
|
|
92
|
+
table.add_row(
|
|
93
|
+
r.key,
|
|
94
|
+
str(r.interactions),
|
|
95
|
+
f"{r.input_tokens:,}",
|
|
96
|
+
f"{r.output_tokens:,}",
|
|
97
|
+
f"{r.total_tokens:,}",
|
|
98
|
+
f"${r.cost_usd:,.4f}",
|
|
99
|
+
"~est" if r.estimated else "exact",
|
|
100
|
+
)
|
|
101
|
+
console.print(table)
|
|
102
|
+
if any(r.estimated for r in rollups):
|
|
103
|
+
console.print("[dim]~est = token counts estimated (tool doesn't report usage).[/dim]")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@app.command()
|
|
107
|
+
def topic(
|
|
108
|
+
name: str = typer.Argument(..., help="Topic to drill into."),
|
|
109
|
+
since: str = typer.Option("", help="Time window start (date or 7d/24h/today)."),
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Drill into one topic: how its tokens split across tools and models."""
|
|
112
|
+
start = _parse_since(since)
|
|
113
|
+
service = Service()
|
|
114
|
+
bd = service.topic_breakdown(name, start=start)
|
|
115
|
+
if bd.total.interactions == 0:
|
|
116
|
+
console.print(f"[yellow]No interactions for topic '{name}' in range.[/yellow]")
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
console.print(
|
|
120
|
+
f"[bold]{name}[/bold] — {bd.total.interactions} interactions, "
|
|
121
|
+
f"{bd.total.total_tokens:,} tokens, [green]${bd.total.cost_usd:,.4f}[/green]"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
for label, rows in (("By Tool", bd.by_tool), ("By Model", bd.by_model)):
|
|
125
|
+
table = Table(title=label)
|
|
126
|
+
table.add_column(label.split()[-1], style="cyan")
|
|
127
|
+
table.add_column("Interactions", justify="right")
|
|
128
|
+
table.add_column("Total Tokens", justify="right")
|
|
129
|
+
table.add_column("Cost (USD)", justify="right", style="green")
|
|
130
|
+
table.add_column("Share", justify="right")
|
|
131
|
+
for r in rows:
|
|
132
|
+
share = (r.cost_usd / bd.total.cost_usd * 100) if bd.total.cost_usd else 0.0
|
|
133
|
+
table.add_row(
|
|
134
|
+
r.key,
|
|
135
|
+
str(r.interactions),
|
|
136
|
+
f"{r.total_tokens:,}",
|
|
137
|
+
f"${r.cost_usd:,.4f}",
|
|
138
|
+
f"{share:.0f}%",
|
|
139
|
+
)
|
|
140
|
+
console.print(table)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@app.command()
|
|
144
|
+
def pricing() -> None:
|
|
145
|
+
"""Show the pricing table behind every cost figure (USD per 1M tokens)."""
|
|
146
|
+
service = Service()
|
|
147
|
+
table = Table(title="Model Pricing (USD per 1M tokens)")
|
|
148
|
+
table.add_column("Model", style="cyan")
|
|
149
|
+
table.add_column("Input", justify="right")
|
|
150
|
+
table.add_column("Output", justify="right")
|
|
151
|
+
table.add_column("Cache Read", justify="right")
|
|
152
|
+
table.add_column("Cache Write", justify="right")
|
|
153
|
+
for p in service.pricing():
|
|
154
|
+
table.add_row(
|
|
155
|
+
p.model, f"${p.input}", f"${p.output}", f"${p.cache_read}", f"${p.cache_write}"
|
|
156
|
+
)
|
|
157
|
+
console.print(table)
|
|
158
|
+
console.print("[dim]cost = (input x in + output x out + cache_read x cr + ...) / 1e6[/dim]")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@app.command()
|
|
162
|
+
def doctor() -> None:
|
|
163
|
+
"""Show which tools were detected, where Agent-ROI looked, and what it found."""
|
|
164
|
+
service = Service()
|
|
165
|
+
console.print(f"[bold]Platform:[/bold] {platform_label()}")
|
|
166
|
+
table = Table(title="Data Sources")
|
|
167
|
+
table.add_column("Tool", style="cyan")
|
|
168
|
+
table.add_column("Detected", justify="center")
|
|
169
|
+
table.add_column("Log Files", justify="right")
|
|
170
|
+
table.add_column("Interactions", justify="right")
|
|
171
|
+
table.add_column("Cost (USD)", justify="right", style="green")
|
|
172
|
+
table.add_column("Notes")
|
|
173
|
+
for s in service.sources():
|
|
174
|
+
table.add_row(
|
|
175
|
+
s.name,
|
|
176
|
+
"[green]yes[/green]" if s.available else "[red]no[/red]",
|
|
177
|
+
str(s.log_files),
|
|
178
|
+
f"{s.interactions:,}",
|
|
179
|
+
f"${s.cost_usd:,.2f}",
|
|
180
|
+
s.note,
|
|
181
|
+
)
|
|
182
|
+
console.print(table)
|
|
183
|
+
for s in service.sources():
|
|
184
|
+
if s.search_paths:
|
|
185
|
+
console.print(f"[dim]{s.name} searched:[/dim] {', '.join(s.search_paths)}")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@app.command()
|
|
189
|
+
def serve(
|
|
190
|
+
host: str = typer.Option("127.0.0.1", help="Bind host."),
|
|
191
|
+
port: int = typer.Option(8000, help="Bind port."),
|
|
192
|
+
) -> None:
|
|
193
|
+
"""Run the REST API (and serve the built web UI if present)."""
|
|
194
|
+
import uvicorn
|
|
195
|
+
|
|
196
|
+
uvicorn.run("agent_roi.api.app:create_app", host=host, port=port, factory=True)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
if __name__ == "__main__":
|
|
200
|
+
app()
|