@warpmetrics/warp 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,12 +14,12 @@ npm install @warpmetrics/warp
14
14
 
15
15
  ```js
16
16
  import OpenAI from 'openai';
17
- import { warp, run, group, call, outcome } from '@warpmetrics/warp';
17
+ import { warp, run, group, call, trace, outcome } from '@warpmetrics/warp';
18
18
 
19
19
  const openai = warp(new OpenAI(), { apiKey: 'wm_...' });
20
20
 
21
- const r = run('code-review', { name: 'Review PR #42' });
22
- const planning = group(r, 'planning');
21
+ const r = run('Code Review', { name: 'Review PR #42' });
22
+ const planning = group(r, 'Planning');
23
23
 
24
24
  const response = await openai.chat.completions.create({
25
25
  model: 'gpt-4o',
@@ -27,7 +27,7 @@ const response = await openai.chat.completions.create({
27
27
  });
28
28
 
29
29
  call(planning, response);
30
- outcome(r, 'completed', { reason: 'Approved' });
30
+ outcome(r, 'Completed', { reason: 'Approved' });
31
31
  ```
32
32
 
33
33
  Every LLM call is captured by `warp()` but only sent to the API when you explicitly `call()` it into a run or group. Unclaimed responses are never transmitted.
@@ -59,7 +59,7 @@ Options are only needed on the first call. After that, config is shared across a
59
59
  Create a run — the top-level unit that tracks one agent execution.
60
60
 
61
61
  ```js
62
- const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
62
+ const r = run('Code Review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
63
63
  ```
64
64
 
65
65
  ### `run(act, label, opts?)`
@@ -67,7 +67,7 @@ const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/rep
67
67
  Create a follow-up run from an act (the result of acting on an outcome).
68
68
 
69
69
  ```js
70
- const r2 = run(a, 'code-review', { name: 'Retry' });
70
+ const r2 = run(a, 'Code Review', { name: 'Retry' });
71
71
  ```
72
72
 
73
73
  ### `group(target, label, opts?)`
@@ -75,9 +75,9 @@ const r2 = run(a, 'code-review', { name: 'Retry' });
75
75
  Create a group — a logical phase or step inside a run or group.
76
76
 
77
77
  ```js
78
- const planning = group(r, 'planning', { name: 'Planning phase' });
79
- const coding = group(r, 'coding');
80
- const subStep = group(planning, 'sub-step'); // groups can nest
78
+ const planning = group(r, 'Planning', { name: 'Planning Phase' });
79
+ const coding = group(r, 'Coding');
80
+ const subStep = group(planning, 'Sub Step'); // groups can nest
81
81
  ```
82
82
 
83
83
  ### `call(target, response, opts?)`
@@ -90,12 +90,44 @@ call(r, response);
90
90
  call(g, response, { label: 'extract' }); // with opts
91
91
  ```
92
92
 
93
+ ### `trace(target, data)`
94
+
95
+ Manually record an LLM call for providers not wrapped by `warp()`.
96
+
97
+ ```js
98
+ trace(r, {
99
+ provider: 'google',
100
+ model: 'gemini-2.0-flash',
101
+ messages: [{ role: 'user', content: 'Hello' }],
102
+ response: 'Hi there!',
103
+ tokens: { prompt: 10, completion: 5 },
104
+ latency: 230,
105
+ cost: 0.0001,
106
+ });
107
+ ```
108
+
109
+ | Field | Type | Required | Description |
110
+ |---|---|---|---|
111
+ | `provider` | `string` | Yes | Provider name (e.g. `"google"`, `"cohere"`) |
112
+ | `model` | `string` | Yes | Model identifier |
113
+ | `messages` | `any` | No | Request messages/input |
114
+ | `response` | `string` | No | Response text |
115
+ | `tools` | `string[]` | No | Tool names available |
116
+ | `toolCalls` | `{ id, name, arguments }[]` | No | Tool calls made |
117
+ | `tokens` | `{ prompt?, completion?, total? }` | No | Token usage |
118
+ | `latency` | `number` | No | Duration in milliseconds |
119
+ | `timestamp` | `string` | No | ISO 8601 timestamp (auto-generated if omitted) |
120
+ | `status` | `string` | No | `"success"` (default) or `"error"` |
121
+ | `error` | `string` | No | Error message |
122
+ | `cost` | `number` | No | Cost in USD |
123
+ | `opts` | `Record<string, any>` | No | Custom metadata |
124
+
93
125
  ### `outcome(target, name, opts?)`
94
126
 
95
127
  Record an outcome on any tracked target.
96
128
 
97
129
  ```js
98
- outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
130
+ outcome(r, 'Completed', { reason: 'All checks passed', source: 'ci' });
99
131
  ```
100
132
 
101
133
  ### `act(target, name, opts?)`
@@ -103,18 +135,19 @@ outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
103
135
  Record an action taken on an outcome. Returns an act handle that can be passed to `run()` for follow-ups.
104
136
 
105
137
  ```js
106
- const oc = outcome(r, 'failed', { reason: 'Tests failed' });
107
- const a = act(oc, 'retry', { strategy: 'fix-and-rerun' });
108
- const r2 = run(a, 'code-review');
138
+ const oc = outcome(r, 'Failed', { reason: 'Tests failed' });
139
+ const a = act(oc, 'Retry', { strategy: 'fix-and-rerun' });
140
+ const r2 = run(a, 'Code Review');
109
141
  ```
110
142
 
111
143
  ### `ref(target)`
112
144
 
113
- Resolve any target (run, group, or LLM response) to its string ID.
145
+ Resolve any target (run, group, outcome, act, or LLM response) to its string ID. Also accepts raw ID strings (e.g. `"wm_run_..."` loaded from a database) and registers them locally.
114
146
 
115
147
  ```js
116
148
  ref(r) // 'wm_run_01jkx3ndek0gh4r5tmqp9a3bcv'
117
149
  ref(response) // 'wm_call_01jkx3ndef8mn2q7kpvhc4e9ws'
150
+ ref('wm_run_01jkx3ndek0gh4r5tmqp9a3bcv') // adopts and returns the ID
118
151
  ```
119
152
 
120
153
  ### `flush()`
@@ -140,6 +173,31 @@ Need another provider? [Open an issue](https://github.com/warpmetrics/warp/issue
140
173
  | `WARPMETRICS_API_URL` | Custom API endpoint |
141
174
  | `WARPMETRICS_DEBUG` | Set to `"true"` to enable debug logging |
142
175
 
176
+ ## Development
177
+
178
+ ### Running tests
179
+
180
+ ```bash
181
+ npm install
182
+ npm test # unit tests only (integration tests auto-skip)
183
+ npm run test:coverage # with coverage report
184
+ npm run test:watch # watch mode
185
+ ```
186
+
187
+ ### Integration tests
188
+
189
+ Integration tests make real API calls to OpenAI and Anthropic. They are **automatically skipped** unless the corresponding API keys are set.
190
+
191
+ To run them:
192
+
193
+ ```bash
194
+ cp .env.example .env
195
+ # Edit .env with your API keys
196
+ npm run test:integration
197
+ ```
198
+
199
+ > **Note:** Integration tests make a small number of API calls with `max_tokens: 5`, so costs are minimal (fractions of a cent per run).
200
+
143
201
  ## License
144
202
 
145
203
  MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@warpmetrics/warp",
3
- "version": "0.0.16",
3
+ "version": "0.0.18",
4
4
  "description": "Measure your agents, not your LLM calls.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -16,11 +16,12 @@
16
16
  ],
17
17
  "scripts": {
18
18
  "test": "vitest run",
19
+ "test:integration": "vitest run integration",
19
20
  "test:watch": "vitest",
20
21
  "test:coverage": "vitest run --coverage",
21
22
  "preversion": "vitest run --coverage",
22
- "release:patch": "npm version patch && git push origin main --tags",
23
- "release:minor": "npm version minor && git push origin main --tags"
23
+ "release:patch": "npm install && npm version patch && git push origin main --tags",
24
+ "release:minor": "npm install && npm version minor && git push origin main --tags"
24
25
  },
25
26
  "keywords": [
26
27
  "ai",
@@ -43,7 +44,10 @@
43
44
  "ulid": "^3.0.2"
44
45
  },
45
46
  "devDependencies": {
47
+ "@anthropic-ai/sdk": "^0.74.0",
46
48
  "@vitest/coverage-v8": "^1.6.1",
49
+ "dotenv": "^17.3.1",
50
+ "openai": "^6.22.0",
47
51
  "vitest": "^1.2.0"
48
52
  }
49
53
  }
package/src/core/warp.js CHANGED
@@ -85,7 +85,9 @@ function wrapStream(stream, ctx) {
85
85
  for await (const chunk of stream) {
86
86
  const delta = ctx.provider.extractStreamDelta(chunk);
87
87
  if (delta.content) content += delta.content;
88
- if (delta.usage) usage = delta.usage;
88
+ if (delta.usage) {
89
+ usage = usage ? { ...usage, ...delta.usage } : delta.usage;
90
+ }
89
91
  yield chunk;
90
92
  }
91
93
 
package/src/index.d.ts CHANGED
@@ -47,6 +47,43 @@ export function group(target: Run | Group | string, label: string, opts?: Record
47
47
  /** Track an LLM call by linking a response to a run or group. */
48
48
  export function call(target: Run | Group | string, response: object, opts?: Record<string, any>): void;
49
49
 
50
+ export interface TraceData {
51
+ /** Provider name (e.g. "google", "cohere"). */
52
+ provider: string;
53
+ /** Model identifier. */
54
+ model: string;
55
+ /** Request messages/input. */
56
+ messages?: any;
57
+ /** Response text. */
58
+ response?: string;
59
+ /** Tool names available. */
60
+ tools?: string[];
61
+ /** Tool calls made. */
62
+ toolCalls?: { id?: string; name: string; arguments?: string }[];
63
+ /** Token usage. */
64
+ tokens?: { prompt?: number; completion?: number; total?: number };
65
+ /** Duration in milliseconds. */
66
+ latency?: number;
67
+ /** ISO 8601 timestamp (auto-generated if omitted). */
68
+ timestamp?: string;
69
+ /** "success" (default) or "error". */
70
+ status?: string;
71
+ /** Error message. */
72
+ error?: string;
73
+ /** Cost in USD. */
74
+ cost?: number;
75
+ /** Custom metadata. */
76
+ opts?: Record<string, any>;
77
+ }
78
+
79
+ export interface Call {
80
+ readonly id: string;
81
+ readonly _type: 'call';
82
+ }
83
+
84
+ /** Manually record an LLM call for providers not wrapped by warp(). */
85
+ export function trace(target: Run | Group | string, data: TraceData): Call | undefined;
86
+
50
87
  /** Record an outcome on any tracked target. Returns an Outcome handle for use with act(). */
51
88
  export function outcome(
52
89
  target: Run | Group | object | string,
package/src/index.js CHANGED
@@ -6,6 +6,7 @@
6
6
  // run(act, label, opts?) — create a follow-up run from an act
7
7
  // group(target, label, opts?) — create a group inside a run or group
8
8
  // call(target, response, opts?) — track an LLM call
9
+ // trace(target, data) — manually trace a call (non-SDK tools)
9
10
  // outcome(target, name, opts?) — record a result
10
11
  // act(target, name, opts?) — record an action, returns act ref
11
12
  // ref(target) — get tracking ID
@@ -13,6 +14,7 @@ export { warp } from './core/warp.js';
13
14
  export { run } from './trace/run.js';
14
15
  export { group } from './trace/group.js';
15
16
  export { call } from './trace/call.js';
17
+ export { trace } from './trace/trace.js';
16
18
  export { outcome } from './trace/outcome.js';
17
19
  export { act } from './trace/act.js';
18
20
  export { ref } from './trace/ref.js';
@@ -21,10 +21,16 @@ export function extract(result) {
21
21
  }
22
22
 
23
23
  export function extractStreamDelta(chunk) {
24
- return {
25
- content: chunk.type === 'content_block_delta' ? (chunk.delta?.text || null) : null,
26
- usage: chunk.type === 'message_delta' ? (chunk.usage || null) : null,
27
- };
24
+ if (chunk.type === 'content_block_delta') {
25
+ return { content: chunk.delta?.text || null, usage: null };
26
+ }
27
+ if (chunk.type === 'message_start') {
28
+ return { content: null, usage: chunk.message?.usage || null };
29
+ }
30
+ if (chunk.type === 'message_delta') {
31
+ return { content: null, usage: chunk.usage || null };
32
+ }
33
+ return { content: null, usage: null };
28
34
  }
29
35
 
30
36
  export function normalizeUsage(usage) {
package/src/trace/act.js CHANGED
@@ -9,7 +9,7 @@ import { logAct, getConfig } from '../core/transport.js';
9
9
  * Record an action taken on an outcome (e.g. acting on feedback).
10
10
  *
11
11
  * @param {{ id: string, _type: 'outcome' } | string} target — Outcome handle from outcome(), or outcome ref string (wm_oc_*)
12
- * @param {string} name — action name ("improve-section", "refine-prompt")
12
+ * @param {string} name — action name ("Improve Section", "Refine Prompt")
13
13
  * @param {Record<string, any>} [opts]
14
14
  * @returns {{ readonly id: string, readonly _type: 'act' } | undefined}
15
15
  */
@@ -9,7 +9,7 @@ import { logGroup, logLink, getConfig } from '../core/transport.js';
9
9
  * Create a group — a logical phase or step inside a run or another group.
10
10
  *
11
11
  * @param {object | string} target — Run, Group, or ref string
12
- * @param {string} label — group type used for aggregation ("planner", "coder")
12
+ * @param {string} label — group type used for aggregation ("Planner", "Coder")
13
13
  * @param {Record<string, any>} [opts]
14
14
  * @returns {{ readonly id: string, readonly _type: 'group' }}
15
15
  */
@@ -11,7 +11,7 @@ import { logOutcome, getConfig } from '../core/transport.js';
11
11
  * Returns a frozen Outcome handle that can be passed to act().
12
12
  *
13
13
  * @param {object | string} target — Run, Group, LLM response, or ref string
14
- * @param {string} name — outcome name ("completed", "failed", "helpful")
14
+ * @param {string} name — outcome name ("Completed", "Failed", "Helpful")
15
15
  * @param {Record<string, any>} [opts]
16
16
  * @returns {{ id: string, _type: 'outcome' } | undefined}
17
17
  */
@@ -0,0 +1,55 @@
1
+ // Warpmetrics SDK — trace()
2
+
3
+ import { ref as getRef } from './ref.js';
4
+ import { generateId } from '../core/utils.js';
5
+ import { runRegistry, groupRegistry } from '../core/registry.js';
6
+ import { logCall, logLink, getConfig } from '../core/transport.js';
7
+
8
+ export function trace(target, data) {
9
+ if (!data || !data.provider || !data.model) {
10
+ if (getConfig().debug) console.warn('[warpmetrics] trace() — data must include provider and model.');
11
+ return;
12
+ }
13
+
14
+ const targetId = getRef(target);
15
+ if (!targetId) {
16
+ if (getConfig().debug) console.warn('[warpmetrics] trace() — target not recognised.');
17
+ return;
18
+ }
19
+
20
+ // Run registry takes precedence over group registry when targetId exists in both
21
+ const parentData = runRegistry.get(targetId) || groupRegistry.get(targetId);
22
+ if (!parentData) {
23
+ if (getConfig().debug) console.warn('[warpmetrics] trace() — parent not found in registry.');
24
+ return;
25
+ }
26
+
27
+ const id = generateId('call');
28
+
29
+ const event = {
30
+ id,
31
+ provider: data.provider,
32
+ model: data.model,
33
+ messages: data.messages || null,
34
+ response: data.response || null,
35
+ tools: data.tools || null,
36
+ toolCalls: data.toolCalls || null,
37
+ tokens: data.tokens || null,
38
+ latency: data.latency ?? null,
39
+ timestamp: data.timestamp || new Date().toISOString(),
40
+ status: data.status || 'success',
41
+ };
42
+
43
+ if (data.error) event.error = data.error;
44
+ if (data.opts) event.opts = data.opts;
45
+ if (data.cost != null) {
46
+ const costNum = Number(data.cost);
47
+ if (!isNaN(costNum)) event.costOverride = Math.round(costNum * 1_000_000);
48
+ }
49
+
50
+ logCall(event);
51
+ logLink({ parentId: targetId, childId: id, type: 'call' });
52
+ if (parentData?.calls) parentData.calls.push(id);
53
+
54
+ return Object.freeze({ id, _type: 'call' });
55
+ }