@warpmetrics/warp 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,8 +18,8 @@ import { warp, run, group, call, outcome } from '@warpmetrics/warp';
18
18
 
19
19
  const openai = warp(new OpenAI(), { apiKey: 'wm_...' });
20
20
 
21
- const r = run('code-review', { name: 'Review PR #42' });
22
- const planning = group(r, 'planning');
21
+ const r = run('Code Review', { name: 'Review PR #42' });
22
+ const planning = group(r, 'Planning');
23
23
 
24
24
  const response = await openai.chat.completions.create({
25
25
  model: 'gpt-4o',
@@ -27,7 +27,7 @@ const response = await openai.chat.completions.create({
27
27
  });
28
28
 
29
29
  call(planning, response);
30
- outcome(r, 'completed', { reason: 'Approved' });
30
+ outcome(r, 'Completed', { reason: 'Approved' });
31
31
  ```
32
32
 
33
33
  Every LLM call is captured by `warp()` but only sent to the API when you explicitly `call()` it into a run or group. Unclaimed responses are never transmitted.
@@ -59,7 +59,7 @@ Options are only needed on the first call. After that, config is shared across a
59
59
  Create a run — the top-level unit that tracks one agent execution.
60
60
 
61
61
  ```js
62
- const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
62
+ const r = run('Code Review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
63
63
  ```
64
64
 
65
65
  ### `run(act, label, opts?)`
@@ -67,7 +67,7 @@ const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/rep
67
67
  Create a follow-up run from an act (the result of acting on an outcome).
68
68
 
69
69
  ```js
70
- const r2 = run(a, 'code-review', { name: 'Retry' });
70
+ const r2 = run(a, 'Code Review', { name: 'Retry' });
71
71
  ```
72
72
 
73
73
  ### `group(target, label, opts?)`
@@ -75,9 +75,9 @@ const r2 = run(a, 'code-review', { name: 'Retry' });
75
75
  Create a group — a logical phase or step inside a run or group.
76
76
 
77
77
  ```js
78
- const planning = group(r, 'planning', { name: 'Planning phase' });
79
- const coding = group(r, 'coding');
80
- const subStep = group(planning, 'sub-step'); // groups can nest
78
+ const planning = group(r, 'Planning', { name: 'Planning Phase' });
79
+ const coding = group(r, 'Coding');
80
+ const subStep = group(planning, 'Sub Step'); // groups can nest
81
81
  ```
82
82
 
83
83
  ### `call(target, response, opts?)`
@@ -95,7 +95,7 @@ call(g, response, { label: 'extract' }); // with opts
95
95
  Record an outcome on any tracked target.
96
96
 
97
97
  ```js
98
- outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
98
+ outcome(r, 'Completed', { reason: 'All checks passed', source: 'ci' });
99
99
  ```
100
100
 
101
101
  ### `act(target, name, opts?)`
@@ -103,9 +103,9 @@ outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
103
103
  Record an action taken on an outcome. Returns an act handle that can be passed to `run()` for follow-ups.
104
104
 
105
105
  ```js
106
- const oc = outcome(r, 'failed', { reason: 'Tests failed' });
107
- const a = act(oc, 'retry', { strategy: 'fix-and-rerun' });
108
- const r2 = run(a, 'code-review');
106
+ const oc = outcome(r, 'Failed', { reason: 'Tests failed' });
107
+ const a = act(oc, 'Retry', { strategy: 'fix-and-rerun' });
108
+ const r2 = run(a, 'Code Review');
109
109
  ```
110
110
 
111
111
  ### `ref(target)`
@@ -140,6 +140,31 @@ Need another provider? [Open an issue](https://github.com/warpmetrics/warp/issue
140
140
  | `WARPMETRICS_API_URL` | Custom API endpoint |
141
141
  | `WARPMETRICS_DEBUG` | Set to `"true"` to enable debug logging |
142
142
 
143
+ ## Development
144
+
145
+ ### Running tests
146
+
147
+ ```bash
148
+ npm install
149
+ npm test # unit tests only (integration tests auto-skip)
150
+ npm run test:coverage # with coverage report
151
+ npm run test:watch # watch mode
152
+ ```
153
+
154
+ ### Integration tests
155
+
156
+ Integration tests make real API calls to OpenAI and Anthropic. They are **automatically skipped** unless the corresponding API keys are set.
157
+
158
+ To run them:
159
+
160
+ ```bash
161
+ cp .env.example .env
162
+ # Edit .env with your API keys
163
+ npm run test:integration
164
+ ```
165
+
166
+ > **Note:** Integration tests make a small number of API calls with `max_tokens: 5`, so costs are minimal (fractions of a cent per run).
167
+
143
168
  ## License
144
169
 
145
170
  MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warpmetrics/warp",
3
- "version": "0.0.16",
3
+ "version": "0.0.17",
4
4
  "description": "Measure your agents, not your LLM calls.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -16,11 +16,12 @@
16
16
  ],
17
17
  "scripts": {
18
18
  "test": "vitest run",
19
+ "test:integration": "vitest run integration",
19
20
  "test:watch": "vitest",
20
21
  "test:coverage": "vitest run --coverage",
21
22
  "preversion": "vitest run --coverage",
22
- "release:patch": "npm version patch && git push origin main --tags",
23
- "release:minor": "npm version minor && git push origin main --tags"
23
+ "release:patch": "npm install && npm version patch && git push origin main --tags",
24
+ "release:minor": "npm install && npm version minor && git push origin main --tags"
24
25
  },
25
26
  "keywords": [
26
27
  "ai",
@@ -43,7 +44,10 @@
43
44
  "ulid": "^3.0.2"
44
45
  },
45
46
  "devDependencies": {
47
+ "@anthropic-ai/sdk": "^0.74.0",
46
48
  "@vitest/coverage-v8": "^1.6.1",
49
+ "dotenv": "^17.3.1",
50
+ "openai": "^6.22.0",
47
51
  "vitest": "^1.2.0"
48
52
  }
49
53
  }
package/src/core/warp.js CHANGED
@@ -85,7 +85,9 @@ function wrapStream(stream, ctx) {
85
85
  for await (const chunk of stream) {
86
86
  const delta = ctx.provider.extractStreamDelta(chunk);
87
87
  if (delta.content) content += delta.content;
88
- if (delta.usage) usage = delta.usage;
88
+ if (delta.usage) {
89
+ usage = usage ? { ...usage, ...delta.usage } : delta.usage;
90
+ }
89
91
  yield chunk;
90
92
  }
91
93
 
package/src/index.js CHANGED
@@ -6,6 +6,7 @@
6
6
  // run(act, label, opts?) — create a follow-up run from an act
7
7
  // group(target, label, opts?) — create a group inside a run or group
8
8
  // call(target, response, opts?) — track an LLM call
9
+ // trace(target, data) — manually trace a call (non-SDK tools)
9
10
  // outcome(target, name, opts?) — record a result
10
11
  // act(target, name, opts?) — record an action, returns act ref
11
12
  // ref(target) — get tracking ID
@@ -13,6 +14,7 @@ export { warp } from './core/warp.js';
13
14
  export { run } from './trace/run.js';
14
15
  export { group } from './trace/group.js';
15
16
  export { call } from './trace/call.js';
17
+ export { trace } from './trace/trace.js';
16
18
  export { outcome } from './trace/outcome.js';
17
19
  export { act } from './trace/act.js';
18
20
  export { ref } from './trace/ref.js';
@@ -21,10 +21,16 @@ export function extract(result) {
21
21
  }
22
22
 
23
23
  export function extractStreamDelta(chunk) {
24
- return {
25
- content: chunk.type === 'content_block_delta' ? (chunk.delta?.text || null) : null,
26
- usage: chunk.type === 'message_delta' ? (chunk.usage || null) : null,
27
- };
24
+ if (chunk.type === 'content_block_delta') {
25
+ return { content: chunk.delta?.text || null, usage: null };
26
+ }
27
+ if (chunk.type === 'message_start') {
28
+ return { content: null, usage: chunk.message?.usage || null };
29
+ }
30
+ if (chunk.type === 'message_delta') {
31
+ return { content: null, usage: chunk.usage || null };
32
+ }
33
+ return { content: null, usage: null };
28
34
  }
29
35
 
30
36
  export function normalizeUsage(usage) {
package/src/trace/act.js CHANGED
@@ -9,7 +9,7 @@ import { logAct, getConfig } from '../core/transport.js';
9
9
  * Record an action taken on an outcome (e.g. acting on feedback).
10
10
  *
11
11
  * @param {{ id: string, _type: 'outcome' } | string} target — Outcome handle from outcome(), or outcome ref string (wm_oc_*)
12
- * @param {string} name — action name ("improve-section", "refine-prompt")
12
+ * @param {string} name — action name ("Improve Section", "Refine Prompt")
13
13
  * @param {Record<string, any>} [opts]
14
14
  * @returns {{ readonly id: string, readonly _type: 'act' } | undefined}
15
15
  */
@@ -9,7 +9,7 @@ import { logGroup, logLink, getConfig } from '../core/transport.js';
9
9
  * Create a group — a logical phase or step inside a run or another group.
10
10
  *
11
11
  * @param {object | string} target — Run, Group, or ref string
12
- * @param {string} label — group type used for aggregation ("planner", "coder")
12
+ * @param {string} label — group type used for aggregation ("Planner", "Coder")
13
13
  * @param {Record<string, any>} [opts]
14
14
  * @returns {{ readonly id: string, readonly _type: 'group' }}
15
15
  */
@@ -11,7 +11,7 @@ import { logOutcome, getConfig } from '../core/transport.js';
11
11
  * Returns a frozen Outcome handle that can be passed to act().
12
12
  *
13
13
  * @param {object | string} target — Run, Group, LLM response, or ref string
14
- * @param {string} name — outcome name ("completed", "failed", "helpful")
14
+ * @param {string} name — outcome name ("Completed", "Failed", "Helpful")
15
15
  * @param {Record<string, any>} [opts]
16
16
  * @returns {{ id: string, _type: 'outcome' } | undefined}
17
17
  */
@@ -0,0 +1,55 @@
1
+ // Warpmetrics SDK — trace()
2
+
3
+ import { ref as getRef } from './ref.js';
4
+ import { generateId } from '../core/utils.js';
5
+ import { runRegistry, groupRegistry } from '../core/registry.js';
6
+ import { logCall, logLink, getConfig } from '../core/transport.js';
7
+
8
+ export function trace(target, data) {
9
+ if (!data || !data.provider || !data.model) {
10
+ if (getConfig().debug) console.warn('[warpmetrics] trace() — data must include provider and model.');
11
+ return;
12
+ }
13
+
14
+ const targetId = getRef(target);
15
+ if (!targetId) {
16
+ if (getConfig().debug) console.warn('[warpmetrics] trace() — target not recognised.');
17
+ return;
18
+ }
19
+
20
+ // Run registry takes precedence over group registry when targetId exists in both
21
+ const parentData = runRegistry.get(targetId) || groupRegistry.get(targetId);
22
+ if (!parentData) {
23
+ if (getConfig().debug) console.warn('[warpmetrics] trace() — parent not found in registry.');
24
+ return;
25
+ }
26
+
27
+ const id = generateId('call');
28
+
29
+ const event = {
30
+ id,
31
+ provider: data.provider,
32
+ model: data.model,
33
+ messages: data.messages || null,
34
+ response: data.response || null,
35
+ tools: data.tools || null,
36
+ toolCalls: data.toolCalls || null,
37
+ tokens: data.tokens || null,
38
+ latency: data.latency ?? null,
39
+ timestamp: data.timestamp || new Date().toISOString(),
40
+ status: data.status || 'success',
41
+ };
42
+
43
+ if (data.error) event.error = data.error;
44
+ if (data.opts) event.opts = data.opts;
45
+ if (data.cost != null) {
46
+ const costNum = Number(data.cost);
47
+ if (!isNaN(costNum)) event.costOverride = Math.round(costNum * 1_000_000);
48
+ }
49
+
50
+ logCall(event);
51
+ logLink({ parentId: targetId, childId: id, type: 'call' });
52
+ if (parentData?.calls) parentData.calls.push(id);
53
+
54
+ return Object.freeze({ id, _type: 'call' });
55
+ }