agentmetrics-langchain 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ # Generated — dashboard SPA copied here during server build
2
+ api/app/static/
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *.pyo
8
+ .venv/
9
+ .env
10
+ *.egg-info/
11
+ dist/
12
+ build/
13
+ .mypy_cache/
14
+ .ruff_cache/
15
+ .pytest_cache/
16
+ htmlcov/
17
+ .coverage
18
+ coverage.xml
19
+
20
+ # Node / JS
21
+ node_modules/
22
+ .next/
23
+ .turbo/
24
+ dist/
25
+ build/
26
+ *.tsbuildinfo
27
+ .pnpm-store/
28
+
29
+ # Env files
30
+ .env
31
+ .env.local
32
+ .env.production
33
+ .env.*.local
34
+ api/.env.local
35
+ dashboard/.env.local
36
+
37
+ # Build artifacts inside packages
38
+ packages/python/dist/
39
+ packages/python/*.egg-info/
40
+ packages/js/dist/
41
+ packages/js/node_modules/
42
+
43
+ # Internal docs — never public
44
+ .internal/
45
+ PLAN.md
46
+ CODE.md
47
+
48
+ # OS
49
+ .DS_Store
50
+ Thumbs.db
51
+
52
+ # IDE
53
+ .vscode/
54
+ .idea/
55
+ *.swp
56
+
57
+ # Docker
58
+ *.log
59
+ .internal
60
+
61
+ # Local data (SQLite DB when running without Docker)
62
+ data/
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentmetrics-langchain
3
+ Version: 0.2.0
4
+ Summary: AgentMetrics observability integration for LangChain agents
5
+ Project-URL: Homepage, https://github.com/andalabx/agentmetrics
6
+ Project-URL: Repository, https://github.com/andalabx/agentmetrics
7
+ License: MIT
8
+ Keywords: agentmetrics,ai-agents,langchain,monitoring,observability
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: agentmetrics-shared>=0.2.0
11
+ Requires-Dist: agentmetrics>=0.2.0
12
+ Requires-Dist: langchain-core>=0.2.0
13
+ Description-Content-Type: text/markdown
14
+
15
+ # agentmetrics-langchain
16
+
17
+ [![PyPI](https://img.shields.io/pypi/v/agentmetrics-langchain?color=6366f1&label=pypi&logo=python&logoColor=white)](https://pypi.org/project/agentmetrics-langchain)
18
+ [![License: MIT](https://img.shields.io/badge/license-MIT-6366f1)](../../LICENSE)
19
+
20
+ AgentMetrics integration for [LangChain](https://python.langchain.com). Pass one callback to any chain or agent `.invoke()` call and every run reports back to your dashboard showing latency, cost, token counts, tool calls, and errors, with no changes to your agent logic.
21
+
22
+ ---
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ pip install agentmetrics-langchain
28
+ ```
29
+
30
+ ---
31
+
32
+ ## Quickstart
33
+
34
+ ```python
35
+ from agentmetrics_langchain import AgentMetricsCallback
36
+
37
+ cb = AgentMetricsCallback(
38
+ agent_id="my-langchain-agent",
39
+ base_url="http://localhost:8099",
40
+ )
41
+
42
+ result = agent.invoke(
43
+ {"input": "What is the weather in Paris?"},
44
+ config={"callbacks": [cb]},
45
+ )
46
+
47
+ cb.flush()
48
+ ```
49
+
50
+ ---
51
+
52
+ ## API
53
+
54
+ ### `AgentMetricsCallback(agent_id, base_url)`
55
+
56
+ | Parameter | Default | Description |
57
+ |---|---|---|
58
+ | `agent_id` | `"langchain-agent"` | Label shown in the dashboard |
59
+ | `base_url` | `"http://localhost:8099"` | AgentMetrics server address |
60
+
61
+ The callback is a `BaseCallbackHandler`. Pass it via `config={"callbacks": [cb]}` on any chain or agent `.invoke()` call. It tracks the top-level chain only, with nested sub-chains aggregated into the same run.
62
+
63
+ Supports both OpenAI-style and Anthropic-style token counting from `usage_metadata` and `llm_output`.
64
+
65
+ ### `.flush(timeout=10.0)`
66
+
67
+ Blocks until all in-flight HTTP requests complete. Call before process exit in scripts.
68
+
69
+ ---
70
+
71
+ ## What gets tracked
72
+
73
+ Each top-level chain invocation emits one event to `/v1/events` on completion or error:
74
+
75
+ | Field | Description |
76
+ |---|---|
77
+ | `status` | `success` or `failed` |
78
+ | `duration_ms` | Wall-clock chain duration |
79
+ | `input_tokens` / `output_tokens` | Aggregated across all LLM calls in the chain |
80
+ | `cache_read_tokens` / `cache_write_tokens` | Cache token counts (Anthropic) |
81
+ | `llm_calls` | Number of LLM requests in the chain |
82
+ | `tool_calls` / `tool_errors` | Tool usage counts |
83
+ | `tool_names` | Set of tools invoked |
84
+ | `model` | Model name from the first LLM call |
85
+ | `estimated_cost_usd` | Computed from token counts and model pricing |
86
+ | `error` | First 500 chars of the error message on failure |
87
+
88
+ ---
89
+
90
+ ## LangGraph
91
+
92
+ The callback works with LangGraph graphs the same way:
93
+
94
+ ```python
95
+ from langgraph.graph import StateGraph
96
+ from agentmetrics_langchain import AgentMetricsCallback
97
+
98
+ cb = AgentMetricsCallback(base_url="http://localhost:8099")
99
+ app = build_graph().compile()
100
+
101
+ result = app.invoke(state, config={"callbacks": [cb]})
102
+ ```
103
+
104
+ ---
105
+
106
+ ## License
107
+
108
+ [MIT](../../LICENSE)
@@ -0,0 +1,94 @@
1
+ # agentmetrics-langchain
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/agentmetrics-langchain?color=6366f1&label=pypi&logo=python&logoColor=white)](https://pypi.org/project/agentmetrics-langchain)
4
+ [![License: MIT](https://img.shields.io/badge/license-MIT-6366f1)](../../LICENSE)
5
+
6
+ AgentMetrics integration for [LangChain](https://python.langchain.com). Pass one callback to any chain or agent `.invoke()` call and every run reports back to your dashboard showing latency, cost, token counts, tool calls, and errors, with no changes to your agent logic.
7
+
8
+ ---
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ pip install agentmetrics-langchain
14
+ ```
15
+
16
+ ---
17
+
18
+ ## Quickstart
19
+
20
+ ```python
21
+ from agentmetrics_langchain import AgentMetricsCallback
22
+
23
+ cb = AgentMetricsCallback(
24
+ agent_id="my-langchain-agent",
25
+ base_url="http://localhost:8099",
26
+ )
27
+
28
+ result = agent.invoke(
29
+ {"input": "What is the weather in Paris?"},
30
+ config={"callbacks": [cb]},
31
+ )
32
+
33
+ cb.flush()
34
+ ```
35
+
36
+ ---
37
+
38
+ ## API
39
+
40
+ ### `AgentMetricsCallback(agent_id, base_url)`
41
+
42
+ | Parameter | Default | Description |
43
+ |---|---|---|
44
+ | `agent_id` | `"langchain-agent"` | Label shown in the dashboard |
45
+ | `base_url` | `"http://localhost:8099"` | AgentMetrics server address |
46
+
47
+ The callback is a `BaseCallbackHandler`. Pass it via `config={"callbacks": [cb]}` on any chain or agent `.invoke()` call. It tracks the top-level chain only, with nested sub-chains aggregated into the same run.
48
+
49
+ Supports both OpenAI-style and Anthropic-style token counting from `usage_metadata` and `llm_output`.
50
+
51
+ ### `.flush(timeout=10.0)`
52
+
53
+ Blocks until all in-flight HTTP requests complete. Call before process exit in scripts.
54
+
55
+ ---
56
+
57
+ ## What gets tracked
58
+
59
+ Each top-level chain invocation emits one event to `/v1/events` on completion or error:
60
+
61
+ | Field | Description |
62
+ |---|---|
63
+ | `status` | `success` or `failed` |
64
+ | `duration_ms` | Wall-clock chain duration |
65
+ | `input_tokens` / `output_tokens` | Aggregated across all LLM calls in the chain |
66
+ | `cache_read_tokens` / `cache_write_tokens` | Cache token counts (Anthropic) |
67
+ | `llm_calls` | Number of LLM requests in the chain |
68
+ | `tool_calls` / `tool_errors` | Tool usage counts |
69
+ | `tool_names` | Set of tools invoked |
70
+ | `model` | Model name from the first LLM call |
71
+ | `estimated_cost_usd` | Computed from token counts and model pricing |
72
+ | `error` | First 500 chars of the error message on failure |
73
+
74
+ ---
75
+
76
+ ## LangGraph
77
+
78
+ The callback works with LangGraph graphs the same way:
79
+
80
+ ```python
81
+ from langgraph.graph import StateGraph
82
+ from agentmetrics_langchain import AgentMetricsCallback
83
+
84
+ cb = AgentMetricsCallback(base_url="http://localhost:8099")
85
+ app = build_graph().compile()
86
+
87
+ result = app.invoke(state, config={"callbacks": [cb]})
88
+ ```
89
+
90
+ ---
91
+
92
+ ## License
93
+
94
+ [MIT](../../LICENSE)
@@ -0,0 +1,4 @@
1
+ from agentmetrics_langchain.callback import AgentMetricsCallback
2
+
3
+ __version__ = "0.1.0"
4
+ __all__ = ["AgentMetricsCallback"]
@@ -0,0 +1,338 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import time
5
+ from typing import Any
6
+ from uuid import UUID
7
+
8
+ from agentmetrics.http_client import HttpClient
9
+ from agentmetrics_shared import AgentEndEvent, estimate_cost
10
+ from langchain_core.callbacks.base import BaseCallbackHandler
11
+ from langchain_core.outputs import LLMResult
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # INT-18: maximum depth for the parent chain walk (cycle guard)
16
+ _MAX_CHAIN_DEPTH = 50
17
+
18
+ # INT-17: maximum number of concurrent tracked runs (memory cap)
19
+ _MAX_TRACKED_RUNS = 5_000
20
+
21
+
22
+ class _RunState:
23
+ __slots__ = (
24
+ "_counted_errors",
25
+ "agent_id",
26
+ "cache_read_tokens",
27
+ "cache_write_tokens",
28
+ "error",
29
+ "input_tokens",
30
+ "llm_calls",
31
+ "model",
32
+ "output_tokens",
33
+ "start_ms",
34
+ "status",
35
+ "tool_calls",
36
+ "tool_errors",
37
+ "tool_names",
38
+ )
39
+
40
+ def __init__(self, agent_id: str) -> None:
41
+ self.agent_id = agent_id
42
+ self.start_ms = time.monotonic()
43
+ self.input_tokens = 0
44
+ self.output_tokens = 0
45
+ self.cache_read_tokens = 0
46
+ self.cache_write_tokens = 0
47
+ self.llm_calls = 0
48
+ self.tool_calls = 0
49
+ self.tool_errors = 0
50
+ self.tool_names: set[str] = set()
51
+ self._counted_errors: set[str] = set() # INT-19: dedup error IDs
52
+ self.model: str | None = None
53
+ self.status = "success"
54
+ self.error: str | None = None
55
+
56
+
57
+ class AgentMetricsCallback(BaseCallbackHandler):
58
+ """
59
+ LangChain callback handler that sends a run summary to AgentMetrics
60
+ whenever a top-level chain (agent run) completes.
61
+
62
+ Usage::
63
+
64
+ from agentmetrics_langchain import AgentMetricsCallback
65
+
66
+ cb = AgentMetricsCallback(api_key="am_...", agent_id="my-agent")
67
+ result = agent.invoke({"input": "..."}, config={"callbacks": [cb]})
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ api_key: str,
73
+ agent_id: str = "langchain-agent",
74
+ base_url: str = "http://localhost:8099",
75
+ ) -> None:
76
+ super().__init__()
77
+ self._client = HttpClient(api_key=api_key, base_url=base_url)
78
+ self._agent_id = agent_id
79
+ # run_id (str) → RunState for top-level chains
80
+ self._runs: dict[str, _RunState] = {}
81
+ # run_id → parent run_id for walking the ancestry chain
82
+ self._parent_map: dict[str, str] = {}
83
+ # tool run_id → tool name (resolved in on_tool_end)
84
+ self._tool_names_pending: dict[str, str] = {}
85
+
86
+
87
+ def _track_run(self, run_id: str, data: _RunState) -> None:
88
+ """INT-17: Store a run state with a safety cap to prevent unbounded memory growth."""
89
+ if len(self._runs) >= _MAX_TRACKED_RUNS:
90
+ logger.warning(
91
+ "agentmetrics: _runs cap (%d) reached, dropping oldest entry",
92
+ _MAX_TRACKED_RUNS,
93
+ )
94
+ oldest = next(iter(self._runs))
95
+ del self._runs[oldest]
96
+ self._runs[run_id] = data
97
+
98
+ def on_chain_start(
99
+ self,
100
+ serialized: dict[str, Any],
101
+ inputs: dict[str, Any],
102
+ *,
103
+ run_id: UUID,
104
+ parent_run_id: UUID | None = None,
105
+ **kwargs: Any,
106
+ ) -> None:
107
+ rid = str(run_id)
108
+ if parent_run_id is None:
109
+ self._track_run(rid, _RunState(self._agent_id))
110
+ else:
111
+ self._parent_map[rid] = str(parent_run_id)
112
+
113
+ def on_chain_end(
114
+ self,
115
+ outputs: dict[str, Any],
116
+ *,
117
+ run_id: UUID,
118
+ parent_run_id: UUID | None = None,
119
+ **kwargs: Any,
120
+ ) -> None:
121
+ if parent_run_id is None:
122
+ self._emit(str(run_id))
123
+
124
+ def on_chain_error(
125
+ self,
126
+ error: BaseException,
127
+ *,
128
+ run_id: UUID,
129
+ parent_run_id: UUID | None = None,
130
+ **kwargs: Any,
131
+ ) -> None:
132
+ if parent_run_id is None:
133
+ run = self._runs.get(str(run_id))
134
+ if run:
135
+ run.status = "failed"
136
+ run.error = str(error)[:500]
137
+ self._emit(str(run_id)) # _emit already pops from _runs (INT-17)
138
+
139
+
140
+ def on_llm_start(
141
+ self,
142
+ serialized: dict[str, Any],
143
+ prompts: list[str],
144
+ *,
145
+ run_id: UUID,
146
+ parent_run_id: UUID | None = None,
147
+ **kwargs: Any,
148
+ ) -> None:
149
+ if parent_run_id is not None:
150
+ self._parent_map[str(run_id)] = str(parent_run_id)
151
+
152
+ def on_chat_model_start(
153
+ self,
154
+ serialized: dict[str, Any],
155
+ messages: list[list[Any]],
156
+ *,
157
+ run_id: UUID,
158
+ parent_run_id: UUID | None = None,
159
+ **kwargs: Any,
160
+ ) -> None:
161
+ if parent_run_id is not None:
162
+ self._parent_map[str(run_id)] = str(parent_run_id)
163
+
164
+ def on_llm_end(
165
+ self,
166
+ response: LLMResult,
167
+ *,
168
+ run_id: UUID,
169
+ parent_run_id: UUID | None = None,
170
+ **kwargs: Any,
171
+ ) -> None:
172
+ run = self._find_top_run(str(run_id))
173
+ if run is None:
174
+ return
175
+ run.llm_calls += 1
176
+
177
+ found_usage = False
178
+ for gen_list in response.generations:
179
+ for gen in gen_list:
180
+ # Path 1: ChatGeneration → message.usage_metadata (Anthropic / OpenAI)
181
+ msg = getattr(gen, "message", None)
182
+ umeta = getattr(msg, "usage_metadata", None) if msg else None
183
+ if umeta:
184
+ run.input_tokens += umeta.get("input_tokens", 0) or 0
185
+ run.output_tokens += umeta.get("output_tokens", 0) or 0
186
+ details = umeta.get("input_token_details") or {}
187
+ run.cache_read_tokens += details.get("cache_read", 0) or 0
188
+ run.cache_write_tokens += details.get("cache_creation", 0) or 0
189
+ if not run.model:
190
+ rmeta = getattr(msg, "response_metadata", {}) or {}
191
+ run.model = rmeta.get("model_name") or rmeta.get("model")
192
+ found_usage = True
193
+ if found_usage:
194
+ return
195
+
196
+ # Path 2: llm_output dict (older models / non-chat)
197
+ lo = response.llm_output or {}
198
+ usage = lo.get("token_usage") or lo.get("usage") or {}
199
+ run.input_tokens += usage.get("prompt_tokens", 0) or 0
200
+ run.output_tokens += usage.get("completion_tokens", 0) or 0
201
+ # INT-16: also extract cache tokens from the llm_output token_usage path
202
+ token_usage = lo.get("token_usage") or {}
203
+ cache_read = (
204
+ token_usage.get("cache_read_input_tokens")
205
+ or token_usage.get("cache_read_tokens")
206
+ or 0
207
+ )
208
+ cache_write = (
209
+ token_usage.get("cache_creation_input_tokens")
210
+ or token_usage.get("cache_write_tokens")
211
+ or 0
212
+ )
213
+ run.cache_read_tokens += cache_read
214
+ run.cache_write_tokens += cache_write
215
+ if not run.model:
216
+ run.model = lo.get("model_name") or lo.get("model")
217
+
218
+ def on_llm_error(
219
+ self,
220
+ error: BaseException,
221
+ *,
222
+ run_id: UUID,
223
+ parent_run_id: UUID | None = None,
224
+ **kwargs: Any,
225
+ ) -> None:
226
+ pass
227
+
228
+
229
+ def on_tool_start(
230
+ self,
231
+ serialized: dict[str, Any],
232
+ input_str: str,
233
+ *,
234
+ run_id: UUID,
235
+ parent_run_id: UUID | None = None,
236
+ **kwargs: Any,
237
+ ) -> None:
238
+ rid = str(run_id)
239
+ name = serialized.get("name") or (serialized.get("id") or ["unknown"])[-1]
240
+ self._tool_names_pending[rid] = str(name)
241
+ if parent_run_id is not None:
242
+ self._parent_map[rid] = str(parent_run_id)
243
+
244
+ def on_tool_end(
245
+ self,
246
+ output: Any,
247
+ *,
248
+ run_id: UUID,
249
+ parent_run_id: UUID | None = None,
250
+ **kwargs: Any,
251
+ ) -> None:
252
+ rid = str(run_id)
253
+ run = self._find_top_run(rid)
254
+ if run:
255
+ run.tool_calls += 1
256
+ name = self._tool_names_pending.pop(rid, None)
257
+ if name:
258
+ run.tool_names.add(name)
259
+
260
+ def on_tool_error(
261
+ self,
262
+ error: BaseException,
263
+ *,
264
+ run_id: UUID,
265
+ parent_run_id: UUID | None = None,
266
+ **kwargs: Any,
267
+ ) -> None:
268
+ rid = str(run_id)
269
+ run = self._find_top_run(rid)
270
+ if run:
271
+ run.tool_calls += 1
272
+ # INT-19: deduplicate error counting to avoid double-counting the same failure
273
+ if rid not in run._counted_errors:
274
+ run._counted_errors.add(rid)
275
+ run.tool_errors += 1
276
+ name = self._tool_names_pending.pop(rid, None)
277
+ if name:
278
+ run.tool_names.add(name)
279
+ # Clean up tool-level entries from parent map (INT-17: prevent unbounded growth)
280
+ self._parent_map.pop(rid, None)
281
+ self._tool_names_pending.pop(rid, None)
282
+
283
+
284
+ def _find_top_run(self, run_id: str, depth: int = 0) -> _RunState | None:
285
+ """Walk the parent chain from run_id upward to find a top-level RunState.
286
+
287
+ INT-18: depth limit prevents infinite loops from cyclic parent references.
288
+ """
289
+ seen: set[str] = set()
290
+ rid = run_id
291
+ current_depth = 0
292
+ while rid and rid not in seen:
293
+ if current_depth > _MAX_CHAIN_DEPTH:
294
+ logger.warning(
295
+ "agentmetrics: parent chain depth exceeded %d at run %s, stopping walk",
296
+ _MAX_CHAIN_DEPTH, rid,
297
+ )
298
+ return None
299
+ seen.add(rid)
300
+ if rid in self._runs:
301
+ return self._runs[rid]
302
+ rid = self._parent_map.get(rid, "")
303
+ current_depth += 1
304
+ return None
305
+
306
+ def _emit(self, run_id: str) -> None:
307
+ run = self._runs.pop(run_id, None)
308
+ if run is None:
309
+ return
310
+ # clean up orphan parent-map entries
311
+ dead = [k for k, v in self._parent_map.items() if v == run_id]
312
+ for k in dead:
313
+ self._parent_map.pop(k, None)
314
+
315
+ duration_ms = (time.monotonic() - run.start_ms) * 1000
316
+ ev = AgentEndEvent(agent_id=run.agent_id, platform="langchain")
317
+ ev.trace_id = run_id
318
+ ev.input_tokens = run.input_tokens
319
+ ev.output_tokens = run.output_tokens
320
+ ev.cache_read_tokens = run.cache_read_tokens
321
+ ev.cache_write_tokens = run.cache_write_tokens
322
+ ev.llm_calls = run.llm_calls
323
+ ev.tool_calls = run.tool_calls
324
+ ev.tool_errors = run.tool_errors
325
+ ev.tool_names = list(run.tool_names)
326
+ ev.status = run.status
327
+ ev.duration_ms = round(duration_ms, 2)
328
+ ev.error = run.error
329
+ ev.model = run.model
330
+ ev.estimated_cost_usd = estimate_cost(
331
+ run.model or "", run.input_tokens, run.output_tokens,
332
+ run.cache_read_tokens, run.cache_write_tokens,
333
+ ) or None
334
+ self._client.fire_and_forget(ev.to_payload())
335
+
336
+ def flush(self, timeout: float = 10.0) -> None:
337
+ """Wait for all in-flight HTTP requests to complete."""
338
+ self._client.flush(timeout=timeout)
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "agentmetrics-langchain"
7
+ version = "0.2.0"
8
+ description = "AgentMetrics observability integration for LangChain agents"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.10"
12
+ keywords = ["langchain", "agentmetrics", "observability", "ai-agents", "monitoring"]
13
+ dependencies = [
14
+ "agentmetrics>=0.2.0",
15
+ "agentmetrics-shared>=0.2.0",
16
+ "langchain-core>=0.2.0",
17
+ ]
18
+
19
+ [project.urls]
20
+ Homepage = "https://github.com/andalabx/agentmetrics"
21
+ Repository = "https://github.com/andalabx/agentmetrics"
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["agentmetrics_langchain"]
25
+
26
+ [tool.uv.sources]
27
+ agentmetrics = { workspace = true }
28
+ agentmetrics-shared = { workspace = true }