npm - ai-experiments - Versions diffs - 0.1.0 → 2.0.1 - Mend

ai-experiments 0.1.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/.turbo/turbo-build.log ADDED Viewed

@@ -0,0 +1,5 @@
+> ai-experiments@2.0.1 build /Users/nathanclevenger/projects/primitives.org.ai/packages/ai-experiments
+> tsc

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,8 @@
+# ai-experiments
+## 2.0.1
+### Patch Changes
+- Updated dependencies
+  - ai-functions@2.0.1

package/README.md CHANGED Viewed

@@ -1,146 +1,361 @@
 # ai-experiments
-A minimalistic experiment runner for AI tasks.
+AI-powered experimentation primitives for testing and evaluating models.
+## Overview
+`ai-experiments` provides a comprehensive toolkit for A/B testing, parameter exploration, decision making, and tracking in AI applications. It follows the same patterns and conventions as `ai-functions` from the primitives monorepo.
 ## Installation
 ```bash
-npm install ai-experiments
-# or
-yarn add ai-experiments
-# or
 pnpm add ai-experiments
 ```
-## Usage
+## Core APIs
-### Basic Example
+### `Experiment()` - A/B Testing and Variant Evaluation
+Run experiments with multiple variants to find the best configuration.
 ```typescript
-import { Experiment } from 'ai-experiments';
+import { Experiment } from 'ai-experiments'
+const results = await Experiment({
+  id: 'prompt-comparison',
+  name: 'Prompt Engineering Test',
+  variants: [
+    {
+      id: 'baseline',
+      name: 'Baseline Prompt',
+      config: { prompt: 'Summarize this text.' },
+    },
+    {
+      id: 'detailed',
+      name: 'Detailed Prompt',
+      config: { prompt: 'Provide a comprehensive summary...' },
+    },
+  ],
+  execute: async (config) => {
+    return await ai.generate({ prompt: config.prompt })
+  },
+  metric: (result) => result.quality_score,
+})
+console.log('Best variant:', results.bestVariant)
+```
-const result = await Experiment('simple-test', {
-  models: ['gpt-4o'],
-  temperature: 0.7,
-  prompt: 'What is the capital of France?',
-});
+**Options:**
+- `parallel: true` - Run variants in parallel (default)
+- `maxConcurrency: 5` - Limit concurrent executions
+- `stopOnError: false` - Stop on first error
+- Event callbacks: `onVariantStart`, `onVariantComplete`, `onVariantError`
-console.log(result);
-```
+### `cartesian()` - Parameter Grid Exploration
-### Using Parameter Combinations
+Generate all combinations of parameters for exhaustive testing.
 ```typescript
-import { Experiment } from 'ai-experiments';
-const result = await Experiment('temperature-comparison', {
-  models: ['gpt-4o', 'gpt-4o-mini'],
-  temperature: [0, 0.3, 0.7, 1.0],
-  prompt: 'Generate a creative story about a robot.',
-});
+import { cartesian } from 'ai-experiments'
-// This will run 8 combinations (2 models × 4 temperatures)
-console.log(result);
+const combinations = cartesian({
+  model: ['sonnet', 'opus', 'gpt-4o'],
+  temperature: [0.3, 0.7, 1.0],
+  maxTokens: [100, 500, 1000],
+})
+// Returns 27 combinations (3 × 3 × 3)
+// Use with experiments:
+const variants = combinations.map((config, i) => ({
+  id: `variant-${i}`,
+  name: `${config.model} T=${config.temperature}`,
+  config,
+}))
 ```
-### Using the Cartesian Function Directly
+**Related functions:**
+- `cartesianFilter()` - Filter invalid combinations
+- `cartesianSample()` - Random sample when full product is too large
+- `cartesianCount()` - Count combinations without generating them
+- `cartesianWithLabels()` - Include dimension indices
-```typescript
-import { cartesian } from 'ai-experiments';
+### `decide()` - Intelligent Decision Making
-const combinations = cartesian({
-  model: ['gpt-4o', 'gpt-4o-mini'],
-  temperature: [0, 0.7],
-  maxTokens: [100, 500]
-});
+Make decisions by scoring and comparing options.
-// Returns:
+```typescript
+import { decide } from 'ai-experiments'
+// Simple decision
+const result = await decide({
+  options: ['fast', 'accurate', 'balanced'],
+  score: (approach) => evaluateApproach(approach),
+  context: 'Choosing summarization approach',
+})
+console.log(result.selected) // 'balanced'
+console.log(result.score)    // 0.9
+// Return all options sorted by score
+const result = await decide({
+  options: ['option-a', 'option-b', 'option-c'],
+  score: async (opt) => await scoreOption(opt),
+  returnAll: true,
+})
+console.log(result.allOptions)
 // [
-//   { model: 'gpt-4o', temperature: 0, maxTokens: 100 },
-//   { model: 'gpt-4o', temperature: 0, maxTokens: 500 },
-//   { model: 'gpt-4o', temperature: 0.7, maxTokens: 100 },
-//   { model: 'gpt-4o', temperature: 0.7, maxTokens: 500 },
-//   { model: 'gpt-4o-mini', temperature: 0, maxTokens: 100 },
-//   { model: 'gpt-4o-mini', temperature: 0, maxTokens: 500 },
-//   { model: 'gpt-4o-mini', temperature: 0.7, maxTokens: 100 },
-//   { model: 'gpt-4o-mini', temperature: 0.7, maxTokens: 500 }
+//   { option: 'option-b', score: 0.95 },
+//   { option: 'option-a', score: 0.82 },
+//   { option: 'option-c', score: 0.71 },
 // ]
 ```
-### Using the Runner
+**Advanced decision strategies:**
+- `decideWeighted()` - Weighted random selection
+- `decideEpsilonGreedy()` - Exploration vs exploitation
+- `decideThompsonSampling()` - Bayesian bandit algorithm
+- `decideUCB()` - Upper Confidence Bound
+### `track()` - Event Tracking
+Track experiment events and metrics.
 ```typescript
-// vitest.config.ts
-import { defineConfig } from 'vitest/config';
-import { createRunner } from 'ai-experiments';
-export default createRunner({
-  outputDir: '.ai/experiments',
-  testMatch: ['**/*experiment*.(js|ts|mjs|cjs)'],
-  watch: false,
-});
+import { track, configureTracking, createFileBackend } from 'ai-experiments'
+// Configure tracking backend
+configureTracking({
+  backend: createFileBackend({ path: './experiments.jsonl' }),
+  metadata: { projectId: 'my-project' },
+})
+// Events are automatically tracked by Experiment()
+// You can also track custom events:
+track({
+  type: 'experiment.start',
+  timestamp: new Date(),
+  data: {
+    experimentId: 'my-experiment',
+    variantCount: 3,
+  },
+})
 ```
-## API Reference
+**Built-in backends:**
+- `createConsoleBackend()` - Log to console (default)
+- `createMemoryBackend()` - Store events in memory
+- `createBatchBackend()` - Batch events before sending
+- `createFileBackend()` - Write to JSONL file
+## Usage Patterns
+### Pattern 1: Parameter Sweep
-### Experiment
+Test all combinations of hyperparameters:
 ```typescript
-function Experiment<T = any, E = any>(
-  name: string,
-  config: ExperimentConfig<T, E>
-): Promise<ExperimentResult>
+import { cartesian, Experiment } from 'ai-experiments'
+const paramGrid = cartesian({
+  temperature: [0.3, 0.5, 0.7, 0.9],
+  topP: [0.9, 0.95, 1.0],
+  maxTokens: [100, 500, 1000],
+})
+const variants = paramGrid.map((params, i) => ({
+  id: `config-${i}`,
+  name: `T=${params.temperature} P=${params.topP} max=${params.maxTokens}`,
+  config: params,
+}))
+const results = await Experiment({
+  id: 'param-sweep',
+  name: 'Hyperparameter Optimization',
+  variants,
+  execute: async (config) => {
+    return await ai.generate({ ...config, prompt: 'Test prompt' })
+  },
+  metric: (result) => evaluateQuality(result),
+})
+console.log('Best config:', results.bestVariant)
 ```
-#### Parameters
+### Pattern 2: Progressive Testing
-- `name`: Name of the experiment
-- `config`: Configuration object with the following properties:
-  - `models`: Array of model names to use
-  - `temperature`: Number or array of temperature values
-  - `seed` (optional): Number or array of seed values
-  - `prompt` (optional): String or function that generates prompts
-  - `inputs` (optional): Array or function that returns input values
-  - `expected` (optional): Expected output for validation
-  - `schema` (optional): Schema for structured output
+Start with a sample, then test more if needed:
-#### Returns
+```typescript
+import { cartesianSample, Experiment } from 'ai-experiments'
+// Sample 20 random combinations from a large space
+const sample = cartesianSample(
+  {
+    param1: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    param2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    param3: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+  },
+  20
+)
+const results = await Experiment({
+  id: 'initial-sample',
+  name: 'Initial Random Sample',
+  variants: sample.map((config, i) => ({
+    id: `sample-${i}`,
+    name: `Sample ${i}`,
+    config,
+  })),
+  execute: async (config) => runTest(config),
+  metric: (result) => result.score,
+})
+// If results are promising, expand the search
+if (results.bestVariant && results.bestVariant.metricValue > 0.8) {
+  console.log('Found promising region, expanding search...')
+  // Test more combinations near the best one
+}
+```
-Promise that resolves to an `ExperimentResult` object with:
-- `name`: Name of the experiment
-- `results`: Array of results for each parameter combination
-- `totalTime`: Total time taken for the experiment
-- `timestamp`: ISO string of when the experiment was run
+### Pattern 3: Multi-Armed Bandit
-### cartesian
+Adaptively choose variants based on performance:
 ```typescript
-function cartesian<T extends Record<string, readonly any[]>>(
-  spec: T
-): Array<{ [K in keyof T]: T[K][number] }>
+import { decideThompsonSampling, track } from 'ai-experiments'
+// Track success/failure for each variant
+const stats = {
+  'variant-a': { alpha: 10, beta: 5 },  // 10 successes, 5 failures
+  'variant-b': { alpha: 8, beta: 3 },   // 8 successes, 3 failures
+  'variant-c': { alpha: 2, beta: 2 },   // 2 successes, 2 failures (uncertain)
+}
+// Thompson sampling balances exploration and exploitation
+const selected = decideThompsonSampling(
+  ['variant-a', 'variant-b', 'variant-c'],
+  stats
+)
+// Update stats based on result
+const result = await runVariant(selected)
+if (result.success) {
+  stats[selected].alpha += 1
+} else {
+  stats[selected].beta += 1
+}
 ```
-#### Parameters
+### Pattern 4: Sequential Testing with Early Stopping
-- `spec`: Object with keys mapping to arrays of values
+Stop testing once a clear winner emerges:
-#### Returns
+```typescript
+import { Experiment } from 'ai-experiments'
+let bestScore = 0
+let testCount = 0
+const maxTests = 100
+const results = await Experiment(
+  {
+    id: 'sequential-test',
+    name: 'Sequential Testing',
+    variants: [...],
+    execute: async (config) => runTest(config),
+    metric: (result) => result.score,
+  },
+  {
+    parallel: false, // Sequential execution
+    onVariantComplete: (result) => {
+      testCount++
+      if (result.metricValue && result.metricValue > bestScore) {
+        bestScore = result.metricValue
+      }
+      // Stop if we found a really good result
+      if (bestScore > 0.95) {
+        console.log('Found excellent result, stopping early')
+        // In a real implementation, you'd need to handle early stopping
+      }
+    },
+  }
+)
+```
-Array of objects representing all possible combinations of the input values.
+## TypeScript Types
-### createRunner
+The package is fully typed with comprehensive TypeScript definitions:
 ```typescript
-function createRunner(config?: RunnerConfig): VitestConfig
+import type {
+  ExperimentConfig,
+  ExperimentResult,
+  ExperimentSummary,
+  ExperimentVariant,
+  DecisionResult,
+  TrackingEvent,
+  TrackingBackend,
+} from 'ai-experiments'
 ```
-#### Parameters
+## Integration with ai-functions
-- `config` (optional): Configuration object with the following properties:
-  - `outputDir` (optional): Directory where experiment results will be saved
-  - `testMatch` (optional): Custom test matcher pattern
-  - `watch` (optional): Whether to watch for file changes
+Works seamlessly with `ai-functions` for AI-powered experiments:
+```typescript
+import { generateObject } from 'ai-functions'
+import { Experiment, cartesian } from 'ai-experiments'
+const prompts = [
+  'Summarize briefly',
+  'Provide a detailed summary',
+  'Extract key points',
+]
+const models = ['sonnet', 'opus', 'gpt-4o']
+// Test all combinations of prompts and models
+const combinations = cartesian({ prompt: prompts, model: models })
+const results = await Experiment({
+  id: 'prompt-model-test',
+  name: 'Prompt and Model Comparison',
+  variants: combinations.map((config, i) => ({
+    id: `combo-${i}`,
+    name: `${config.model}: "${config.prompt.slice(0, 20)}..."`,
+    config,
+  })),
+  execute: async (config) => {
+    return await generateObject({
+      model: config.model,
+      schema: { summary: 'The summary text' },
+      prompt: config.prompt,
+    })
+  },
+  metric: (result) => evaluateSummary(result.object.summary),
+})
+console.log('Best combination:', results.bestVariant)
+```
+## Examples
+See [examples.ts](./examples.ts) for complete working examples demonstrating:
+- Simple A/B experiments
+- Parameter grid exploration
+- Decision making strategies
+- Event tracking
+- Sequential vs parallel execution
+Run the examples:
+```bash
+pnpm build
+node --import tsx examples.ts
+```
-#### Returns
+## License
-A Vitest configuration function that can be used in `vitest.config.ts`.
+MIT

package/dist/cartesian.d.ts ADDED Viewed

@@ -0,0 +1,140 @@
+/**
+ * Cartesian product utilities for parameter exploration
+ */
+import type { CartesianParams, CartesianResult } from './types.js';
+/**
+ * Generate cartesian product of parameter sets
+ *
+ * Takes an object where each key maps to an array of possible values,
+ * and returns all possible combinations as an array of objects.
+ *
+ * @example
+ * ```ts
+ * import { cartesian } from 'ai-experiments'
+ *
+ * const combinations = cartesian({
+ *   model: ['sonnet', 'opus', 'gpt-4o'],
+ *   temperature: [0.3, 0.7, 1.0],
+ *   maxTokens: [100, 500, 1000],
+ * })
+ *
+ * // Returns 27 combinations (3 * 3 * 3):
+ * // [
+ * //   { model: 'sonnet', temperature: 0.3, maxTokens: 100 },
+ * //   { model: 'sonnet', temperature: 0.3, maxTokens: 500 },
+ * //   { model: 'sonnet', temperature: 0.3, maxTokens: 1000 },
+ * //   { model: 'sonnet', temperature: 0.7, maxTokens: 100 },
+ * //   ...
+ * // ]
+ *
+ * // Use with experiments:
+ * const variants = combinations.map((config, i) => ({
+ *   id: `variant-${i}`,
+ *   name: `${config.model} T=${config.temperature} max=${config.maxTokens}`,
+ *   config,
+ * }))
+ * ```
+ */
+export declare function cartesian<T extends CartesianParams>(params: T): CartesianResult<T>;
+/**
+ * Generate a grid of parameter combinations with filtering
+ *
+ * Similar to cartesian(), but allows filtering out invalid combinations.
+ *
+ * @example
+ * ```ts
+ * import { cartesianFilter } from 'ai-experiments'
+ *
+ * const combinations = cartesianFilter(
+ *   {
+ *     model: ['sonnet', 'opus'],
+ *     temperature: [0.3, 0.7, 1.0],
+ *     maxTokens: [100, 500],
+ *   },
+ *   // Filter out combinations where opus uses high temperature
+ *   (combo) => !(combo.model === 'opus' && combo.temperature > 0.7)
+ * )
+ * ```
+ */
+export declare function cartesianFilter<T extends CartesianParams>(params: T, filter: (combo: {
+    [K in keyof T]: T[K][number];
+}) => boolean): CartesianResult<T>;
+/**
+ * Generate a random sample from the cartesian product
+ *
+ * Useful when the full cartesian product is too large to test all combinations.
+ *
+ * @example
+ * ```ts
+ * import { cartesianSample } from 'ai-experiments'
+ *
+ * // Full product would be 1000 combinations (10 * 10 * 10)
+ * // Sample just 20 random combinations
+ * const sample = cartesianSample(
+ *   {
+ *     param1: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ *     param2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ *     param3: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+ *   },
+ *   20
+ * )
+ * ```
+ */
+export declare function cartesianSample<T extends CartesianParams>(params: T, sampleSize: number, options?: {
+    /** Random seed for reproducibility */
+    seed?: number;
+    /** Whether to sample without replacement (default: true) */
+    unique?: boolean;
+}): CartesianResult<T>;
+/**
+ * Count the total number of combinations without generating them
+ *
+ * Useful for checking if cartesian product is feasible before generating.
+ *
+ * @example
+ * ```ts
+ * import { cartesianCount } from 'ai-experiments'
+ *
+ * const count = cartesianCount({
+ *   model: ['sonnet', 'opus', 'gpt-4o'],
+ *   temperature: [0.3, 0.5, 0.7, 0.9],
+ *   maxTokens: [100, 500, 1000, 2000],
+ * })
+ * // Returns 48 (3 * 4 * 4)
+ *
+ * if (count > 100) {
+ *   console.log('Too many combinations, use cartesianSample instead')
+ * }
+ * ```
+ */
+export declare function cartesianCount<T extends CartesianParams>(params: T): number;
+/**
+ * Generate cartesian product with labels for each dimension
+ *
+ * Returns combinations with additional metadata about which dimension each value came from.
+ *
+ * @example
+ * ```ts
+ * import { cartesianWithLabels } from 'ai-experiments'
+ *
+ * const labeled = cartesianWithLabels({
+ *   model: ['sonnet', 'opus'],
+ *   temperature: [0.3, 0.7],
+ * })
+ * // [
+ * //   { values: { model: 'sonnet', temperature: 0.3 }, labels: { model: 0, temperature: 0 } },
+ * //   { values: { model: 'sonnet', temperature: 0.7 }, labels: { model: 0, temperature: 1 } },
+ * //   { values: { model: 'opus', temperature: 0.3 }, labels: { model: 1, temperature: 0 } },
+ * //   { values: { model: 'opus', temperature: 0.7 }, labels: { model: 1, temperature: 1 } },
+ * // ]
+ * ```
+ */
+export declare function cartesianWithLabels<T extends CartesianParams>(params: T): Array<{
+    values: {
+        [K in keyof T]: T[K][number];
+    };
+    labels: {
+        [K in keyof T]: number;
+    };
+}>;
+//# sourceMappingURL=cartesian.d.ts.map

package/dist/cartesian.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"cartesian.d.ts","sourceRoot":"","sources":["../src/cartesian.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAElE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,wBAAgB,SAAS,CAAC,CAAC,SAAS,eAAe,EAAE,MAAM,EAAE,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC,CA0BlF;AA4BD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,eAAe,CAAC,CAAC,SAAS,eAAe,EACvD,MAAM,EAAE,CAAC,EACT,MAAM,EAAE,CAAC,KAAK,EAAE;KAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;CAAE,KAAK,OAAO,GAC3D,eAAe,CAAC,CAAC,CAAC,CAGpB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,eAAe,CAAC,CAAC,SAAS,eAAe,EACvD,MAAM,EAAE,CAAC,EACT,UAAU,EAAE,MAAM,EAClB,OAAO,GAAE;IACP,sCAAsC;IACtC,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,4DAA4D;IAC5D,MAAM,CAAC,EAAE,OAAO,CAAA;CACZ,GACL,eAAe,CAAC,CAAC,CAAC,CAqBpB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,cAAc,CAAC,CAAC,SAAS,eAAe,EAAE,MAAM,EAAE,CAAC,GAAG,MAAM,CAS3E;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,mBAAmB,CAAC,CAAC,SAAS,eAAe,EAC3D,MAAM,EAAE,CAAC,GACR,KAAK,CAAC;IACP,MAAM,EAAE;SAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;KAAE,CAAA;IACxC,MAAM,EAAE;SAAG,CAAC,IAAI,MAAM,CAAC,GAAG,MAAM;KAAE,CAAA;CACnC,CAAC,CA0BD"}