npm - @warpmetrics/warp - Versions diffs - 0.0.16 → 0.0.18 - Mend

@warpmetrics/warp 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +72 -14
package/package.json +7 -3
package/src/core/warp.js +3 -1
package/src/index.d.ts +37 -0
package/src/index.js +2 -0
package/src/providers/anthropic.js +10 -4
package/src/trace/act.js +1 -1
package/src/trace/group.js +1 -1
package/src/trace/outcome.js +1 -1
package/src/trace/trace.js +55 -0

package/README.md CHANGED Viewed

@@ -14,12 +14,12 @@ npm install @warpmetrics/warp
 ```js
 import OpenAI from 'openai';
-import { warp, run, group, call, outcome } from '@warpmetrics/warp';
+import { warp, run, group, call, trace, outcome } from '@warpmetrics/warp';
 const openai = warp(new OpenAI(), { apiKey: 'wm_...' });
-const r = run('code-review', { name: 'Review PR #42' });
-const planning = group(r, 'planning');
+const r = run('Code Review', { name: 'Review PR #42' });
+const planning = group(r, 'Planning');
 const response = await openai.chat.completions.create({
   model: 'gpt-4o',
@@ -27,7 +27,7 @@ const response = await openai.chat.completions.create({
 });
 call(planning, response);
-outcome(r, 'completed', { reason: 'Approved' });
+outcome(r, 'Completed', { reason: 'Approved' });
 ```
 Every LLM call is captured by `warp()` but only sent to the API when you explicitly `call()` it into a run or group. Unclaimed responses are never transmitted.
@@ -59,7 +59,7 @@ Options are only needed on the first call. After that, config is shared across a
 Create a run — the top-level unit that tracks one agent execution.
 ```js
-const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
+const r = run('Code Review', { name: 'PR #42', link: 'https://github.com/org/repo/pull/42' });
 ```
 ### `run(act, label, opts?)`
@@ -67,7 +67,7 @@ const r = run('code-review', { name: 'PR #42', link: 'https://github.com/org/rep
 Create a follow-up run from an act (the result of acting on an outcome).
 ```js
-const r2 = run(a, 'code-review', { name: 'Retry' });
+const r2 = run(a, 'Code Review', { name: 'Retry' });
 ```
 ### `group(target, label, opts?)`
@@ -75,9 +75,9 @@ const r2 = run(a, 'code-review', { name: 'Retry' });
 Create a group — a logical phase or step inside a run or group.
 ```js
-const planning = group(r, 'planning', { name: 'Planning phase' });
-const coding = group(r, 'coding');
-const subStep = group(planning, 'sub-step');  // groups can nest
+const planning = group(r, 'Planning', { name: 'Planning Phase' });
+const coding = group(r, 'Coding');
+const subStep = group(planning, 'Sub Step');  // groups can nest
 ```
 ### `call(target, response, opts?)`
@@ -90,12 +90,44 @@ call(r, response);
 call(g, response, { label: 'extract' });  // with opts
 ```
+### `trace(target, data)`
+Manually record an LLM call for providers not wrapped by `warp()`.
+```js
+trace(r, {
+  provider: 'google',
+  model: 'gemini-2.0-flash',
+  messages: [{ role: 'user', content: 'Hello' }],
+  response: 'Hi there!',
+  tokens: { prompt: 10, completion: 5 },
+  latency: 230,
+  cost: 0.0001,
+});
+```
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `provider` | `string` | Yes | Provider name (e.g. `"google"`, `"cohere"`) |
+| `model` | `string` | Yes | Model identifier |
+| `messages` | `any` | No | Request messages/input |
+| `response` | `string` | No | Response text |
+| `tools` | `string[]` | No | Tool names available |
+| `toolCalls` | `{ id, name, arguments }[]` | No | Tool calls made |
+| `tokens` | `{ prompt?, completion?, total? }` | No | Token usage |
+| `latency` | `number` | No | Duration in milliseconds |
+| `timestamp` | `string` | No | ISO 8601 timestamp (auto-generated if omitted) |
+| `status` | `string` | No | `"success"` (default) or `"error"` |
+| `error` | `string` | No | Error message |
+| `cost` | `number` | No | Cost in USD |
+| `opts` | `Record<string, any>` | No | Custom metadata |
 ### `outcome(target, name, opts?)`
 Record an outcome on any tracked target.
 ```js
-outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
+outcome(r, 'Completed', { reason: 'All checks passed', source: 'ci' });
 ```
 ### `act(target, name, opts?)`
@@ -103,18 +135,19 @@ outcome(r, 'completed', { reason: 'All checks passed', source: 'ci' });
 Record an action taken on an outcome. Returns an act handle that can be passed to `run()` for follow-ups.
 ```js
-const oc = outcome(r, 'failed', { reason: 'Tests failed' });
-const a = act(oc, 'retry', { strategy: 'fix-and-rerun' });
-const r2 = run(a, 'code-review');
+const oc = outcome(r, 'Failed', { reason: 'Tests failed' });
+const a = act(oc, 'Retry', { strategy: 'fix-and-rerun' });
+const r2 = run(a, 'Code Review');
 ```
 ### `ref(target)`
-Resolve any target (run, group, or LLM response) to its string ID.
+Resolve any target (run, group, outcome, act, or LLM response) to its string ID. Also accepts raw ID strings (e.g. `"wm_run_..."` loaded from a database) and registers them locally.
 ```js
 ref(r)         // 'wm_run_01jkx3ndek0gh4r5tmqp9a3bcv'
 ref(response)  // 'wm_call_01jkx3ndef8mn2q7kpvhc4e9ws'
+ref('wm_run_01jkx3ndek0gh4r5tmqp9a3bcv')  // adopts and returns the ID
 ```
 ### `flush()`
@@ -140,6 +173,31 @@ Need another provider? [Open an issue](https://github.com/warpmetrics/warp/issue
 | `WARPMETRICS_API_URL` | Custom API endpoint |
 | `WARPMETRICS_DEBUG` | Set to `"true"` to enable debug logging |
+## Development
+### Running tests
+```bash
+npm install
+npm test              # unit tests only (integration tests auto-skip)
+npm run test:coverage # with coverage report
+npm run test:watch    # watch mode
+```
+### Integration tests
+Integration tests make real API calls to OpenAI and Anthropic. They are **automatically skipped** unless the corresponding API keys are set.
+To run them:
+```bash
+cp .env.example .env
+# Edit .env with your API keys
+npm run test:integration
+```
+> **Note:** Integration tests make a small number of API calls with `max_tokens: 5`, so costs are minimal (fractions of a cent per run).
 ## License
 MIT

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@warpmetrics/warp",
-  "version": "0.0.16",
+  "version": "0.0.18",
   "description": "Measure your agents, not your LLM calls.",
   "type": "module",
   "main": "src/index.js",
@@ -16,11 +16,12 @@
   ],
   "scripts": {
     "test": "vitest run",
+    "test:integration": "vitest run integration",
     "test:watch": "vitest",
     "test:coverage": "vitest run --coverage",
     "preversion": "vitest run --coverage",
-    "release:patch": "npm version patch && git push origin main --tags",
-    "release:minor": "npm version minor && git push origin main --tags"
+    "release:patch": "npm install && npm version patch && git push origin main --tags",
+    "release:minor": "npm install && npm version minor && git push origin main --tags"
   },
   "keywords": [
     "ai",
@@ -43,7 +44,10 @@
     "ulid": "^3.0.2"
   },
   "devDependencies": {
+    "@anthropic-ai/sdk": "^0.74.0",
     "@vitest/coverage-v8": "^1.6.1",
+    "dotenv": "^17.3.1",
+    "openai": "^6.22.0",
     "vitest": "^1.2.0"
   }
 }

package/src/core/warp.js CHANGED Viewed

@@ -85,7 +85,9 @@ function wrapStream(stream, ctx) {
       for await (const chunk of stream) {
         const delta = ctx.provider.extractStreamDelta(chunk);
         if (delta.content) content += delta.content;
-        if (delta.usage) usage = delta.usage;
+        if (delta.usage) {
+          usage = usage ? { ...usage, ...delta.usage } : delta.usage;
+        }
         yield chunk;
       }

package/src/index.d.ts CHANGED Viewed

@@ -47,6 +47,43 @@ export function group(target: Run | Group | string, label: string, opts?: Record
 /** Track an LLM call by linking a response to a run or group. */
 export function call(target: Run | Group | string, response: object, opts?: Record<string, any>): void;
+export interface TraceData {
+  /** Provider name (e.g. "google", "cohere"). */
+  provider: string;
+  /** Model identifier. */
+  model: string;
+  /** Request messages/input. */
+  messages?: any;
+  /** Response text. */
+  response?: string;
+  /** Tool names available. */
+  tools?: string[];
+  /** Tool calls made. */
+  toolCalls?: { id?: string; name: string; arguments?: string }[];
+  /** Token usage. */
+  tokens?: { prompt?: number; completion?: number; total?: number };
+  /** Duration in milliseconds. */
+  latency?: number;
+  /** ISO 8601 timestamp (auto-generated if omitted). */
+  timestamp?: string;
+  /** "success" (default) or "error". */
+  status?: string;
+  /** Error message. */
+  error?: string;
+  /** Cost in USD. */
+  cost?: number;
+  /** Custom metadata. */
+  opts?: Record<string, any>;
+}
+export interface Call {
+  readonly id: string;
+  readonly _type: 'call';
+}
+/** Manually record an LLM call for providers not wrapped by warp(). */
+export function trace(target: Run | Group | string, data: TraceData): Call | undefined;
 /** Record an outcome on any tracked target. Returns an Outcome handle for use with act(). */
 export function outcome(
   target: Run | Group | object | string,

package/src/index.js CHANGED Viewed

@@ -6,6 +6,7 @@
 //   run(act, label, opts?)           — create a follow-up run from an act
 //   group(target, label, opts?)      — create a group inside a run or group
 //   call(target, response, opts?)    — track an LLM call
+//   trace(target, data)              — manually trace a call (non-SDK tools)
 //   outcome(target, name, opts?)     — record a result
 //   act(target, name, opts?)         — record an action, returns act ref
 //   ref(target)                      — get tracking ID
@@ -13,6 +14,7 @@ export { warp } from './core/warp.js';
 export { run } from './trace/run.js';
 export { group } from './trace/group.js';
 export { call } from './trace/call.js';
+export { trace } from './trace/trace.js';
 export { outcome } from './trace/outcome.js';
 export { act } from './trace/act.js';
 export { ref } from './trace/ref.js';

package/src/providers/anthropic.js CHANGED Viewed

@@ -21,10 +21,16 @@ export function extract(result) {
 }
 export function extractStreamDelta(chunk) {
-  return {
-    content: chunk.type === 'content_block_delta' ? (chunk.delta?.text || null) : null,
-    usage:   chunk.type === 'message_delta' ? (chunk.usage || null) : null,
-  };
+  if (chunk.type === 'content_block_delta') {
+    return { content: chunk.delta?.text || null, usage: null };
+  }
+  if (chunk.type === 'message_start') {
+    return { content: null, usage: chunk.message?.usage || null };
+  }
+  if (chunk.type === 'message_delta') {
+    return { content: null, usage: chunk.usage || null };
+  }
+  return { content: null, usage: null };
 }
 export function normalizeUsage(usage) {

package/src/trace/act.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { logAct, getConfig } from '../core/transport.js';
  * Record an action taken on an outcome (e.g. acting on feedback).
  *
  * @param {{ id: string, _type: 'outcome' } | string} target — Outcome handle from outcome(), or outcome ref string (wm_oc_*)
- * @param {string} name            — action name ("improve-section", "refine-prompt")
+ * @param {string} name            — action name ("Improve Section", "Refine Prompt")
  * @param {Record<string, any>} [opts]
  * @returns {{ readonly id: string, readonly _type: 'act' } | undefined}
  */

package/src/trace/group.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { logGroup, logLink, getConfig } from '../core/transport.js';
  * Create a group — a logical phase or step inside a run or another group.
  *
  * @param {object | string} target  — Run, Group, or ref string
- * @param {string} label            — group type used for aggregation ("planner", "coder")
+ * @param {string} label            — group type used for aggregation ("Planner", "Coder")
  * @param {Record<string, any>} [opts]
  * @returns {{ readonly id: string, readonly _type: 'group' }}
  */

package/src/trace/outcome.js CHANGED Viewed

@@ -11,7 +11,7 @@ import { logOutcome, getConfig } from '../core/transport.js';
  * Returns a frozen Outcome handle that can be passed to act().
  *
  * @param {object | string} target — Run, Group, LLM response, or ref string
- * @param {string} name            — outcome name ("completed", "failed", "helpful")
+ * @param {string} name            — outcome name ("Completed", "Failed", "Helpful")
  * @param {Record<string, any>} [opts]
  * @returns {{ id: string, _type: 'outcome' } | undefined}
  */

package/src/trace/trace.js ADDED Viewed

@@ -0,0 +1,55 @@
+// Warpmetrics SDK — trace()
+import { ref as getRef } from './ref.js';
+import { generateId } from '../core/utils.js';
+import { runRegistry, groupRegistry } from '../core/registry.js';
+import { logCall, logLink, getConfig } from '../core/transport.js';
+export function trace(target, data) {
+  if (!data || !data.provider || !data.model) {
+    if (getConfig().debug) console.warn('[warpmetrics] trace() — data must include provider and model.');
+    return;
+  }
+  const targetId = getRef(target);
+  if (!targetId) {
+    if (getConfig().debug) console.warn('[warpmetrics] trace() — target not recognised.');
+    return;
+  }
+  // Run registry takes precedence over group registry when targetId exists in both
+  const parentData = runRegistry.get(targetId) || groupRegistry.get(targetId);
+  if (!parentData) {
+    if (getConfig().debug) console.warn('[warpmetrics] trace() — parent not found in registry.');
+    return;
+  }
+  const id = generateId('call');
+  const event = {
+    id,
+    provider: data.provider,
+    model: data.model,
+    messages: data.messages || null,
+    response: data.response || null,
+    tools: data.tools || null,
+    toolCalls: data.toolCalls || null,
+    tokens: data.tokens || null,
+    latency: data.latency ?? null,
+    timestamp: data.timestamp || new Date().toISOString(),
+    status: data.status || 'success',
+  };
+  if (data.error) event.error = data.error;
+  if (data.opts) event.opts = data.opts;
+  if (data.cost != null) {
+    const costNum = Number(data.cost);
+    if (!isNaN(costNum)) event.costOverride = Math.round(costNum * 1_000_000);
+  }
+  logCall(event);
+  logLink({ parentId: targetId, childId: id, type: 'call' });
+  if (parentData?.calls) parentData.calls.push(id);
+  return Object.freeze({ id, _type: 'call' });
+}