@warpmetrics/warp 0.0.16 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -12
- package/package.json +7 -3
- package/src/core/warp.js +3 -1
- package/src/index.js +2 -0
- package/src/providers/anthropic.js +10 -4
- package/src/trace/act.js +1 -1
- package/src/trace/group.js +1 -1
- package/src/trace/outcome.js +1 -1
- package/src/trace/trace.js +55 -0
package/README.md
CHANGED
|
@@ -18,8 +18,8 @@ import { warp, run, group, call, outcome } from '@warpmetrics/warp';
|
|
|
18
18
|
|
|
19
19
|
const openai = warp(new OpenAI(), { apiKey: 'wm_...' });
|
|
20
20
|
|
|
21
|
-
const r = run('
|
|
22
|
-
const planning = group(r, '
|
|
21
|
+
const r = run('Code Review', { name: 'Review PR #42' });
|
|
22
|
+
const planning = group(r, 'Planning');
|
|
23
23
|
|
|
24
24
|
const response = await openai.chat.completions.create({
|
|
25
25
|
model: 'gpt-4o',
|
|
@@ -27,7 +27,7 @@ const response = await openai.chat.completions.create({
|
|
|
27
27
|
});
|
|
28
28
|
|
|
29
29
|
call(planning, response);
|
|
30
|
-
outcome(r, '
|
|
30
|
+
outcome(r, 'Completed', { reason: 'Approved' });
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
Every LLM call is captured by `warp()` but only sent to the API when you explicitly `call()` it into a run or group. Unclaimed responses are never transmitted.
|
|
@@ -59,7 +59,7 @@ Options are only needed on the first call. After that, config is shared across a
|
|
|
59
59
|
Create a run — the top-level unit that tracks one agent execution.
|
|
60
60
|
|
|
61
61
|
```js
|
|
62
|
-
const r = run('
|
|
62
|
+
const r = run('Code Review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
|
|
63
63
|
```
|
|
64
64
|
|
|
65
65
|
### `run(act, label, opts?)`
|
|
@@ -67,7 +67,7 @@ const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/rep
|
|
|
67
67
|
Create a follow-up run from an act (the result of acting on an outcome).
|
|
68
68
|
|
|
69
69
|
```js
|
|
70
|
-
const r2 = run(a, '
|
|
70
|
+
const r2 = run(a, 'Code Review', { name: 'Retry' });
|
|
71
71
|
```
|
|
72
72
|
|
|
73
73
|
### `group(target, label, opts?)`
|
|
@@ -75,9 +75,9 @@ const r2 = run(a, 'code-review', { name: 'Retry' });
|
|
|
75
75
|
Create a group — a logical phase or step inside a run or group.
|
|
76
76
|
|
|
77
77
|
```js
|
|
78
|
-
const planning = group(r, '
|
|
79
|
-
const coding = group(r, '
|
|
80
|
-
const subStep = group(planning, '
|
|
78
|
+
const planning = group(r, 'Planning', { name: 'Planning Phase' });
|
|
79
|
+
const coding = group(r, 'Coding');
|
|
80
|
+
const subStep = group(planning, 'Sub Step'); // groups can nest
|
|
81
81
|
```
|
|
82
82
|
|
|
83
83
|
### `call(target, response, opts?)`
|
|
@@ -95,7 +95,7 @@ call(g, response, { label: 'extract' }); // with opts
|
|
|
95
95
|
Record an outcome on any tracked target.
|
|
96
96
|
|
|
97
97
|
```js
|
|
98
|
-
outcome(r, '
|
|
98
|
+
outcome(r, 'Completed', { reason: 'All checks passed', source: 'ci' });
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
### `act(target, name, opts?)`
|
|
@@ -103,9 +103,9 @@ outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
|
|
|
103
103
|
Record an action taken on an outcome. Returns an act handle that can be passed to `run()` for follow-ups.
|
|
104
104
|
|
|
105
105
|
```js
|
|
106
|
-
const oc = outcome(r, '
|
|
107
|
-
const a = act(oc, '
|
|
108
|
-
const r2 = run(a, '
|
|
106
|
+
const oc = outcome(r, 'Failed', { reason: 'Tests failed' });
|
|
107
|
+
const a = act(oc, 'Retry', { strategy: 'fix-and-rerun' });
|
|
108
|
+
const r2 = run(a, 'Code Review');
|
|
109
109
|
```
|
|
110
110
|
|
|
111
111
|
### `ref(target)`
|
|
@@ -140,6 +140,31 @@ Need another provider? [Open an issue](https://github.com/warpmetrics/warp/issue
|
|
|
140
140
|
| `WARPMETRICS_API_URL` | Custom API endpoint |
|
|
141
141
|
| `WARPMETRICS_DEBUG` | Set to `"true"` to enable debug logging |
|
|
142
142
|
|
|
143
|
+
## Development
|
|
144
|
+
|
|
145
|
+
### Running tests
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
npm install
|
|
149
|
+
npm test # unit tests only (integration tests auto-skip)
|
|
150
|
+
npm run test:coverage # with coverage report
|
|
151
|
+
npm run test:watch # watch mode
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Integration tests
|
|
155
|
+
|
|
156
|
+
Integration tests make real API calls to OpenAI and Anthropic. They are **automatically skipped** unless the corresponding API keys are set.
|
|
157
|
+
|
|
158
|
+
To run them:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
cp .env.example .env
|
|
162
|
+
# Edit .env with your API keys
|
|
163
|
+
npm run test:integration
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
> **Note:** Integration tests make a small number of API calls with `max_tokens: 5`, so costs are minimal (fractions of a cent per run).
|
|
167
|
+
|
|
143
168
|
## License
|
|
144
169
|
|
|
145
170
|
MIT
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warpmetrics/warp",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.17",
|
|
4
4
|
"description": "Measure your agents, not your LLM calls.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -16,11 +16,12 @@
|
|
|
16
16
|
],
|
|
17
17
|
"scripts": {
|
|
18
18
|
"test": "vitest run",
|
|
19
|
+
"test:integration": "vitest run integration",
|
|
19
20
|
"test:watch": "vitest",
|
|
20
21
|
"test:coverage": "vitest run --coverage",
|
|
21
22
|
"preversion": "vitest run --coverage",
|
|
22
|
-
"release:patch": "npm version patch && git push origin main --tags",
|
|
23
|
-
"release:minor": "npm version minor && git push origin main --tags"
|
|
23
|
+
"release:patch": "npm install && npm version patch && git push origin main --tags",
|
|
24
|
+
"release:minor": "npm install && npm version minor && git push origin main --tags"
|
|
24
25
|
},
|
|
25
26
|
"keywords": [
|
|
26
27
|
"ai",
|
|
@@ -43,7 +44,10 @@
|
|
|
43
44
|
"ulid": "^3.0.2"
|
|
44
45
|
},
|
|
45
46
|
"devDependencies": {
|
|
47
|
+
"@anthropic-ai/sdk": "^0.74.0",
|
|
46
48
|
"@vitest/coverage-v8": "^1.6.1",
|
|
49
|
+
"dotenv": "^17.3.1",
|
|
50
|
+
"openai": "^6.22.0",
|
|
47
51
|
"vitest": "^1.2.0"
|
|
48
52
|
}
|
|
49
53
|
}
|
package/src/core/warp.js
CHANGED
|
@@ -85,7 +85,9 @@ function wrapStream(stream, ctx) {
|
|
|
85
85
|
for await (const chunk of stream) {
|
|
86
86
|
const delta = ctx.provider.extractStreamDelta(chunk);
|
|
87
87
|
if (delta.content) content += delta.content;
|
|
88
|
-
if (delta.usage)
|
|
88
|
+
if (delta.usage) {
|
|
89
|
+
usage = usage ? { ...usage, ...delta.usage } : delta.usage;
|
|
90
|
+
}
|
|
89
91
|
yield chunk;
|
|
90
92
|
}
|
|
91
93
|
|
package/src/index.js
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
// run(act, label, opts?) — create a follow-up run from an act
|
|
7
7
|
// group(target, label, opts?) — create a group inside a run or group
|
|
8
8
|
// call(target, response, opts?) — track an LLM call
|
|
9
|
+
// trace(target, data) — manually trace a call (non-SDK tools)
|
|
9
10
|
// outcome(target, name, opts?) — record a result
|
|
10
11
|
// act(target, name, opts?) — record an action, returns act ref
|
|
11
12
|
// ref(target) — get tracking ID
|
|
@@ -13,6 +14,7 @@ export { warp } from './core/warp.js';
|
|
|
13
14
|
export { run } from './trace/run.js';
|
|
14
15
|
export { group } from './trace/group.js';
|
|
15
16
|
export { call } from './trace/call.js';
|
|
17
|
+
export { trace } from './trace/trace.js';
|
|
16
18
|
export { outcome } from './trace/outcome.js';
|
|
17
19
|
export { act } from './trace/act.js';
|
|
18
20
|
export { ref } from './trace/ref.js';
|
|
@@ -21,10 +21,16 @@ export function extract(result) {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
export function extractStreamDelta(chunk) {
|
|
24
|
-
|
|
25
|
-
content: chunk.
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
if (chunk.type === 'content_block_delta') {
|
|
25
|
+
return { content: chunk.delta?.text || null, usage: null };
|
|
26
|
+
}
|
|
27
|
+
if (chunk.type === 'message_start') {
|
|
28
|
+
return { content: null, usage: chunk.message?.usage || null };
|
|
29
|
+
}
|
|
30
|
+
if (chunk.type === 'message_delta') {
|
|
31
|
+
return { content: null, usage: chunk.usage || null };
|
|
32
|
+
}
|
|
33
|
+
return { content: null, usage: null };
|
|
28
34
|
}
|
|
29
35
|
|
|
30
36
|
export function normalizeUsage(usage) {
|
package/src/trace/act.js
CHANGED
|
@@ -9,7 +9,7 @@ import { logAct, getConfig } from '../core/transport.js';
|
|
|
9
9
|
* Record an action taken on an outcome (e.g. acting on feedback).
|
|
10
10
|
*
|
|
11
11
|
* @param {{ id: string, _type: 'outcome' } | string} target — Outcome handle from outcome(), or outcome ref string (wm_oc_*)
|
|
12
|
-
* @param {string} name — action name ("
|
|
12
|
+
* @param {string} name — action name ("Improve Section", "Refine Prompt")
|
|
13
13
|
* @param {Record<string, any>} [opts]
|
|
14
14
|
* @returns {{ readonly id: string, readonly _type: 'act' } | undefined}
|
|
15
15
|
*/
|
package/src/trace/group.js
CHANGED
|
@@ -9,7 +9,7 @@ import { logGroup, logLink, getConfig } from '../core/transport.js';
|
|
|
9
9
|
* Create a group — a logical phase or step inside a run or another group.
|
|
10
10
|
*
|
|
11
11
|
* @param {object | string} target — Run, Group, or ref string
|
|
12
|
-
* @param {string} label — group type used for aggregation ("
|
|
12
|
+
* @param {string} label — group type used for aggregation ("Planner", "Coder")
|
|
13
13
|
* @param {Record<string, any>} [opts]
|
|
14
14
|
* @returns {{ readonly id: string, readonly _type: 'group' }}
|
|
15
15
|
*/
|
package/src/trace/outcome.js
CHANGED
|
@@ -11,7 +11,7 @@ import { logOutcome, getConfig } from '../core/transport.js';
|
|
|
11
11
|
* Returns a frozen Outcome handle that can be passed to act().
|
|
12
12
|
*
|
|
13
13
|
* @param {object | string} target — Run, Group, LLM response, or ref string
|
|
14
|
-
* @param {string} name — outcome name ("
|
|
14
|
+
* @param {string} name — outcome name ("Completed", "Failed", "Helpful")
|
|
15
15
|
* @param {Record<string, any>} [opts]
|
|
16
16
|
* @returns {{ id: string, _type: 'outcome' } | undefined}
|
|
17
17
|
*/
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// Warpmetrics SDK — trace()
|
|
2
|
+
|
|
3
|
+
import { ref as getRef } from './ref.js';
|
|
4
|
+
import { generateId } from '../core/utils.js';
|
|
5
|
+
import { runRegistry, groupRegistry } from '../core/registry.js';
|
|
6
|
+
import { logCall, logLink, getConfig } from '../core/transport.js';
|
|
7
|
+
|
|
8
|
+
export function trace(target, data) {
|
|
9
|
+
if (!data || !data.provider || !data.model) {
|
|
10
|
+
if (getConfig().debug) console.warn('[warpmetrics] trace() — data must include provider and model.');
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const targetId = getRef(target);
|
|
15
|
+
if (!targetId) {
|
|
16
|
+
if (getConfig().debug) console.warn('[warpmetrics] trace() — target not recognised.');
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Run registry takes precedence over group registry when targetId exists in both
|
|
21
|
+
const parentData = runRegistry.get(targetId) || groupRegistry.get(targetId);
|
|
22
|
+
if (!parentData) {
|
|
23
|
+
if (getConfig().debug) console.warn('[warpmetrics] trace() — parent not found in registry.');
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const id = generateId('call');
|
|
28
|
+
|
|
29
|
+
const event = {
|
|
30
|
+
id,
|
|
31
|
+
provider: data.provider,
|
|
32
|
+
model: data.model,
|
|
33
|
+
messages: data.messages || null,
|
|
34
|
+
response: data.response || null,
|
|
35
|
+
tools: data.tools || null,
|
|
36
|
+
toolCalls: data.toolCalls || null,
|
|
37
|
+
tokens: data.tokens || null,
|
|
38
|
+
latency: data.latency ?? null,
|
|
39
|
+
timestamp: data.timestamp || new Date().toISOString(),
|
|
40
|
+
status: data.status || 'success',
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
if (data.error) event.error = data.error;
|
|
44
|
+
if (data.opts) event.opts = data.opts;
|
|
45
|
+
if (data.cost != null) {
|
|
46
|
+
const costNum = Number(data.cost);
|
|
47
|
+
if (!isNaN(costNum)) event.costOverride = Math.round(costNum * 1_000_000);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
logCall(event);
|
|
51
|
+
logLink({ parentId: targetId, childId: id, type: 'call' });
|
|
52
|
+
if (parentData?.calls) parentData.calls.push(id);
|
|
53
|
+
|
|
54
|
+
return Object.freeze({ id, _type: 'call' });
|
|
55
|
+
}
|