@warpmetrics/warp 0.0.16 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -14
- package/package.json +7 -3
- package/src/core/warp.js +3 -1
- package/src/index.d.ts +37 -0
- package/src/index.js +2 -0
- package/src/providers/anthropic.js +10 -4
- package/src/trace/act.js +1 -1
- package/src/trace/group.js +1 -1
- package/src/trace/outcome.js +1 -1
- package/src/trace/trace.js +55 -0
package/README.md
CHANGED
|
@@ -14,12 +14,12 @@ npm install @warpmetrics/warp
|
|
|
14
14
|
|
|
15
15
|
```js
|
|
16
16
|
import OpenAI from 'openai';
|
|
17
|
-
import { warp, run, group, call, outcome } from '@warpmetrics/warp';
|
|
17
|
+
import { warp, run, group, call, trace, outcome } from '@warpmetrics/warp';
|
|
18
18
|
|
|
19
19
|
const openai = warp(new OpenAI(), { apiKey: 'wm_...' });
|
|
20
20
|
|
|
21
|
-
const r = run('
|
|
22
|
-
const planning = group(r, '
|
|
21
|
+
const r = run('Code Review', { name: 'Review PR #42' });
|
|
22
|
+
const planning = group(r, 'Planning');
|
|
23
23
|
|
|
24
24
|
const response = await openai.chat.completions.create({
|
|
25
25
|
model: 'gpt-4o',
|
|
@@ -27,7 +27,7 @@ const response = await openai.chat.completions.create({
|
|
|
27
27
|
});
|
|
28
28
|
|
|
29
29
|
call(planning, response);
|
|
30
|
-
outcome(r, '
|
|
30
|
+
outcome(r, 'Completed', { reason: 'Approved' });
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
Every LLM call is captured by `warp()` but only sent to the API when you explicitly `call()` it into a run or group. Unclaimed responses are never transmitted.
|
|
@@ -59,7 +59,7 @@ Options are only needed on the first call. After that, config is shared across a
|
|
|
59
59
|
Create a run — the top-level unit that tracks one agent execution.
|
|
60
60
|
|
|
61
61
|
```js
|
|
62
|
-
const r = run('
|
|
62
|
+
const r = run('Code Review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
|
|
63
63
|
```
|
|
64
64
|
|
|
65
65
|
### `run(act, label, opts?)`
|
|
@@ -67,7 +67,7 @@ const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/rep
|
|
|
67
67
|
Create a follow-up run from an act (the result of acting on an outcome).
|
|
68
68
|
|
|
69
69
|
```js
|
|
70
|
-
const r2 = run(a, '
|
|
70
|
+
const r2 = run(a, 'Code Review', { name: 'Retry' });
|
|
71
71
|
```
|
|
72
72
|
|
|
73
73
|
### `group(target, label, opts?)`
|
|
@@ -75,9 +75,9 @@ const r2 = run(a, 'code-review', { name: 'Retry' });
|
|
|
75
75
|
Create a group — a logical phase or step inside a run or group.
|
|
76
76
|
|
|
77
77
|
```js
|
|
78
|
-
const planning = group(r, '
|
|
79
|
-
const coding = group(r, '
|
|
80
|
-
const subStep = group(planning, '
|
|
78
|
+
const planning = group(r, 'Planning', { name: 'Planning Phase' });
|
|
79
|
+
const coding = group(r, 'Coding');
|
|
80
|
+
const subStep = group(planning, 'Sub Step'); // groups can nest
|
|
81
81
|
```
|
|
82
82
|
|
|
83
83
|
### `call(target, response, opts?)`
|
|
@@ -90,12 +90,44 @@ call(r, response);
|
|
|
90
90
|
call(g, response, { label: 'extract' }); // with opts
|
|
91
91
|
```
|
|
92
92
|
|
|
93
|
+
### `trace(target, data)`
|
|
94
|
+
|
|
95
|
+
Manually record an LLM call for providers not wrapped by `warp()`.
|
|
96
|
+
|
|
97
|
+
```js
|
|
98
|
+
trace(r, {
|
|
99
|
+
provider: 'google',
|
|
100
|
+
model: 'gemini-2.0-flash',
|
|
101
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
102
|
+
response: 'Hi there!',
|
|
103
|
+
tokens: { prompt: 10, completion: 5 },
|
|
104
|
+
latency: 230,
|
|
105
|
+
cost: 0.0001,
|
|
106
|
+
});
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
| Field | Type | Required | Description |
|
|
110
|
+
|---|---|---|---|
|
|
111
|
+
| `provider` | `string` | Yes | Provider name (e.g. `"google"`, `"cohere"`) |
|
|
112
|
+
| `model` | `string` | Yes | Model identifier |
|
|
113
|
+
| `messages` | `any` | No | Request messages/input |
|
|
114
|
+
| `response` | `string` | No | Response text |
|
|
115
|
+
| `tools` | `string[]` | No | Tool names available |
|
|
116
|
+
| `toolCalls` | `{ id, name, arguments }[]` | No | Tool calls made |
|
|
117
|
+
| `tokens` | `{ prompt?, completion?, total? }` | No | Token usage |
|
|
118
|
+
| `latency` | `number` | No | Duration in milliseconds |
|
|
119
|
+
| `timestamp` | `string` | No | ISO 8601 timestamp (auto-generated if omitted) |
|
|
120
|
+
| `status` | `string` | No | `"success"` (default) or `"error"` |
|
|
121
|
+
| `error` | `string` | No | Error message |
|
|
122
|
+
| `cost` | `number` | No | Cost in USD |
|
|
123
|
+
| `opts` | `Record<string, any>` | No | Custom metadata |
|
|
124
|
+
|
|
93
125
|
### `outcome(target, name, opts?)`
|
|
94
126
|
|
|
95
127
|
Record an outcome on any tracked target.
|
|
96
128
|
|
|
97
129
|
```js
|
|
98
|
-
outcome(r, '
|
|
130
|
+
outcome(r, 'Completed', { reason: 'All checks passed', source: 'ci' });
|
|
99
131
|
```
|
|
100
132
|
|
|
101
133
|
### `act(target, name, opts?)`
|
|
@@ -103,18 +135,19 @@ outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
|
|
|
103
135
|
Record an action taken on an outcome. Returns an act handle that can be passed to `run()` for follow-ups.
|
|
104
136
|
|
|
105
137
|
```js
|
|
106
|
-
const oc = outcome(r, '
|
|
107
|
-
const a = act(oc, '
|
|
108
|
-
const r2 = run(a, '
|
|
138
|
+
const oc = outcome(r, 'Failed', { reason: 'Tests failed' });
|
|
139
|
+
const a = act(oc, 'Retry', { strategy: 'fix-and-rerun' });
|
|
140
|
+
const r2 = run(a, 'Code Review');
|
|
109
141
|
```
|
|
110
142
|
|
|
111
143
|
### `ref(target)`
|
|
112
144
|
|
|
113
|
-
Resolve any target (run, group, or LLM response) to its string ID.
|
|
145
|
+
Resolve any target (run, group, outcome, act, or LLM response) to its string ID. Also accepts raw ID strings (e.g. `"wm_run_..."` loaded from a database) and registers them locally.
|
|
114
146
|
|
|
115
147
|
```js
|
|
116
148
|
ref(r) // 'wm_run_01jkx3ndek0gh4r5tmqp9a3bcv'
|
|
117
149
|
ref(response) // 'wm_call_01jkx3ndef8mn2q7kpvhc4e9ws'
|
|
150
|
+
ref('wm_run_01jkx3ndek0gh4r5tmqp9a3bcv') // adopts and returns the ID
|
|
118
151
|
```
|
|
119
152
|
|
|
120
153
|
### `flush()`
|
|
@@ -140,6 +173,31 @@ Need another provider? [Open an issue](https://github.com/warpmetrics/warp/issue
|
|
|
140
173
|
| `WARPMETRICS_API_URL` | Custom API endpoint |
|
|
141
174
|
| `WARPMETRICS_DEBUG` | Set to `"true"` to enable debug logging |
|
|
142
175
|
|
|
176
|
+
## Development
|
|
177
|
+
|
|
178
|
+
### Running tests
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
npm install
|
|
182
|
+
npm test # unit tests only (integration tests auto-skip)
|
|
183
|
+
npm run test:coverage # with coverage report
|
|
184
|
+
npm run test:watch # watch mode
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Integration tests
|
|
188
|
+
|
|
189
|
+
Integration tests make real API calls to OpenAI and Anthropic. They are **automatically skipped** unless the corresponding API keys are set.
|
|
190
|
+
|
|
191
|
+
To run them:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
cp .env.example .env
|
|
195
|
+
# Edit .env with your API keys
|
|
196
|
+
npm run test:integration
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
> **Note:** Integration tests make a small number of API calls with `max_tokens: 5`, so costs are minimal (fractions of a cent per run).
|
|
200
|
+
|
|
143
201
|
## License
|
|
144
202
|
|
|
145
203
|
MIT
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warpmetrics/warp",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.18",
|
|
4
4
|
"description": "Measure your agents, not your LLM calls.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -16,11 +16,12 @@
|
|
|
16
16
|
],
|
|
17
17
|
"scripts": {
|
|
18
18
|
"test": "vitest run",
|
|
19
|
+
"test:integration": "vitest run integration",
|
|
19
20
|
"test:watch": "vitest",
|
|
20
21
|
"test:coverage": "vitest run --coverage",
|
|
21
22
|
"preversion": "vitest run --coverage",
|
|
22
|
-
"release:patch": "npm version patch && git push origin main --tags",
|
|
23
|
-
"release:minor": "npm version minor && git push origin main --tags"
|
|
23
|
+
"release:patch": "npm install && npm version patch && git push origin main --tags",
|
|
24
|
+
"release:minor": "npm install && npm version minor && git push origin main --tags"
|
|
24
25
|
},
|
|
25
26
|
"keywords": [
|
|
26
27
|
"ai",
|
|
@@ -43,7 +44,10 @@
|
|
|
43
44
|
"ulid": "^3.0.2"
|
|
44
45
|
},
|
|
45
46
|
"devDependencies": {
|
|
47
|
+
"@anthropic-ai/sdk": "^0.74.0",
|
|
46
48
|
"@vitest/coverage-v8": "^1.6.1",
|
|
49
|
+
"dotenv": "^17.3.1",
|
|
50
|
+
"openai": "^6.22.0",
|
|
47
51
|
"vitest": "^1.2.0"
|
|
48
52
|
}
|
|
49
53
|
}
|
package/src/core/warp.js
CHANGED
|
@@ -85,7 +85,9 @@ function wrapStream(stream, ctx) {
|
|
|
85
85
|
for await (const chunk of stream) {
|
|
86
86
|
const delta = ctx.provider.extractStreamDelta(chunk);
|
|
87
87
|
if (delta.content) content += delta.content;
|
|
88
|
-
if (delta.usage)
|
|
88
|
+
if (delta.usage) {
|
|
89
|
+
usage = usage ? { ...usage, ...delta.usage } : delta.usage;
|
|
90
|
+
}
|
|
89
91
|
yield chunk;
|
|
90
92
|
}
|
|
91
93
|
|
package/src/index.d.ts
CHANGED
|
@@ -47,6 +47,43 @@ export function group(target: Run | Group | string, label: string, opts?: Record
|
|
|
47
47
|
/** Track an LLM call by linking a response to a run or group. */
|
|
48
48
|
export function call(target: Run | Group | string, response: object, opts?: Record<string, any>): void;
|
|
49
49
|
|
|
50
|
+
export interface TraceData {
|
|
51
|
+
/** Provider name (e.g. "google", "cohere"). */
|
|
52
|
+
provider: string;
|
|
53
|
+
/** Model identifier. */
|
|
54
|
+
model: string;
|
|
55
|
+
/** Request messages/input. */
|
|
56
|
+
messages?: any;
|
|
57
|
+
/** Response text. */
|
|
58
|
+
response?: string;
|
|
59
|
+
/** Tool names available. */
|
|
60
|
+
tools?: string[];
|
|
61
|
+
/** Tool calls made. */
|
|
62
|
+
toolCalls?: { id?: string; name: string; arguments?: string }[];
|
|
63
|
+
/** Token usage. */
|
|
64
|
+
tokens?: { prompt?: number; completion?: number; total?: number };
|
|
65
|
+
/** Duration in milliseconds. */
|
|
66
|
+
latency?: number;
|
|
67
|
+
/** ISO 8601 timestamp (auto-generated if omitted). */
|
|
68
|
+
timestamp?: string;
|
|
69
|
+
/** "success" (default) or "error". */
|
|
70
|
+
status?: string;
|
|
71
|
+
/** Error message. */
|
|
72
|
+
error?: string;
|
|
73
|
+
/** Cost in USD. */
|
|
74
|
+
cost?: number;
|
|
75
|
+
/** Custom metadata. */
|
|
76
|
+
opts?: Record<string, any>;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface Call {
|
|
80
|
+
readonly id: string;
|
|
81
|
+
readonly _type: 'call';
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Manually record an LLM call for providers not wrapped by warp(). */
|
|
85
|
+
export function trace(target: Run | Group | string, data: TraceData): Call | undefined;
|
|
86
|
+
|
|
50
87
|
/** Record an outcome on any tracked target. Returns an Outcome handle for use with act(). */
|
|
51
88
|
export function outcome(
|
|
52
89
|
target: Run | Group | object | string,
|
package/src/index.js
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
// run(act, label, opts?) — create a follow-up run from an act
|
|
7
7
|
// group(target, label, opts?) — create a group inside a run or group
|
|
8
8
|
// call(target, response, opts?) — track an LLM call
|
|
9
|
+
// trace(target, data) — manually trace a call (non-SDK tools)
|
|
9
10
|
// outcome(target, name, opts?) — record a result
|
|
10
11
|
// act(target, name, opts?) — record an action, returns act ref
|
|
11
12
|
// ref(target) — get tracking ID
|
|
@@ -13,6 +14,7 @@ export { warp } from './core/warp.js';
|
|
|
13
14
|
export { run } from './trace/run.js';
|
|
14
15
|
export { group } from './trace/group.js';
|
|
15
16
|
export { call } from './trace/call.js';
|
|
17
|
+
export { trace } from './trace/trace.js';
|
|
16
18
|
export { outcome } from './trace/outcome.js';
|
|
17
19
|
export { act } from './trace/act.js';
|
|
18
20
|
export { ref } from './trace/ref.js';
|
|
@@ -21,10 +21,16 @@ export function extract(result) {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
export function extractStreamDelta(chunk) {
|
|
24
|
-
|
|
25
|
-
content: chunk.
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
if (chunk.type === 'content_block_delta') {
|
|
25
|
+
return { content: chunk.delta?.text || null, usage: null };
|
|
26
|
+
}
|
|
27
|
+
if (chunk.type === 'message_start') {
|
|
28
|
+
return { content: null, usage: chunk.message?.usage || null };
|
|
29
|
+
}
|
|
30
|
+
if (chunk.type === 'message_delta') {
|
|
31
|
+
return { content: null, usage: chunk.usage || null };
|
|
32
|
+
}
|
|
33
|
+
return { content: null, usage: null };
|
|
28
34
|
}
|
|
29
35
|
|
|
30
36
|
export function normalizeUsage(usage) {
|
package/src/trace/act.js
CHANGED
|
@@ -9,7 +9,7 @@ import { logAct, getConfig } from '../core/transport.js';
|
|
|
9
9
|
* Record an action taken on an outcome (e.g. acting on feedback).
|
|
10
10
|
*
|
|
11
11
|
* @param {{ id: string, _type: 'outcome' } | string} target — Outcome handle from outcome(), or outcome ref string (wm_oc_*)
|
|
12
|
-
* @param {string} name — action name ("
|
|
12
|
+
* @param {string} name — action name ("Improve Section", "Refine Prompt")
|
|
13
13
|
* @param {Record<string, any>} [opts]
|
|
14
14
|
* @returns {{ readonly id: string, readonly _type: 'act' } | undefined}
|
|
15
15
|
*/
|
package/src/trace/group.js
CHANGED
|
@@ -9,7 +9,7 @@ import { logGroup, logLink, getConfig } from '../core/transport.js';
|
|
|
9
9
|
* Create a group — a logical phase or step inside a run or another group.
|
|
10
10
|
*
|
|
11
11
|
* @param {object | string} target — Run, Group, or ref string
|
|
12
|
-
* @param {string} label — group type used for aggregation ("
|
|
12
|
+
* @param {string} label — group type used for aggregation ("Planner", "Coder")
|
|
13
13
|
* @param {Record<string, any>} [opts]
|
|
14
14
|
* @returns {{ readonly id: string, readonly _type: 'group' }}
|
|
15
15
|
*/
|
package/src/trace/outcome.js
CHANGED
|
@@ -11,7 +11,7 @@ import { logOutcome, getConfig } from '../core/transport.js';
|
|
|
11
11
|
* Returns a frozen Outcome handle that can be passed to act().
|
|
12
12
|
*
|
|
13
13
|
* @param {object | string} target — Run, Group, LLM response, or ref string
|
|
14
|
-
* @param {string} name — outcome name ("
|
|
14
|
+
* @param {string} name — outcome name ("Completed", "Failed", "Helpful")
|
|
15
15
|
* @param {Record<string, any>} [opts]
|
|
16
16
|
* @returns {{ id: string, _type: 'outcome' } | undefined}
|
|
17
17
|
*/
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// Warpmetrics SDK — trace()
|
|
2
|
+
|
|
3
|
+
import { ref as getRef } from './ref.js';
|
|
4
|
+
import { generateId } from '../core/utils.js';
|
|
5
|
+
import { runRegistry, groupRegistry } from '../core/registry.js';
|
|
6
|
+
import { logCall, logLink, getConfig } from '../core/transport.js';
|
|
7
|
+
|
|
8
|
+
export function trace(target, data) {
|
|
9
|
+
if (!data || !data.provider || !data.model) {
|
|
10
|
+
if (getConfig().debug) console.warn('[warpmetrics] trace() — data must include provider and model.');
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const targetId = getRef(target);
|
|
15
|
+
if (!targetId) {
|
|
16
|
+
if (getConfig().debug) console.warn('[warpmetrics] trace() — target not recognised.');
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Run registry takes precedence over group registry when targetId exists in both
|
|
21
|
+
const parentData = runRegistry.get(targetId) || groupRegistry.get(targetId);
|
|
22
|
+
if (!parentData) {
|
|
23
|
+
if (getConfig().debug) console.warn('[warpmetrics] trace() — parent not found in registry.');
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const id = generateId('call');
|
|
28
|
+
|
|
29
|
+
const event = {
|
|
30
|
+
id,
|
|
31
|
+
provider: data.provider,
|
|
32
|
+
model: data.model,
|
|
33
|
+
messages: data.messages || null,
|
|
34
|
+
response: data.response || null,
|
|
35
|
+
tools: data.tools || null,
|
|
36
|
+
toolCalls: data.toolCalls || null,
|
|
37
|
+
tokens: data.tokens || null,
|
|
38
|
+
latency: data.latency ?? null,
|
|
39
|
+
timestamp: data.timestamp || new Date().toISOString(),
|
|
40
|
+
status: data.status || 'success',
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
if (data.error) event.error = data.error;
|
|
44
|
+
if (data.opts) event.opts = data.opts;
|
|
45
|
+
if (data.cost != null) {
|
|
46
|
+
const costNum = Number(data.cost);
|
|
47
|
+
if (!isNaN(costNum)) event.costOverride = Math.round(costNum * 1_000_000);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
logCall(event);
|
|
51
|
+
logLink({ parentId: targetId, childId: id, type: 'call' });
|
|
52
|
+
if (parentData?.calls) parentData.calls.push(id);
|
|
53
|
+
|
|
54
|
+
return Object.freeze({ id, _type: 'call' });
|
|
55
|
+
}
|