@percepta/kaizen 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -126
- package/agent/claude-command.md +23 -0
- package/agent/evals.md +41 -0
- package/agent/overview.md +53 -0
- package/agent/variant-builder.md +22 -0
- package/agent/views.md +51 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/BUILD_ID +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/build-manifest.json +22 -22
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/prerender-manifest.json +3 -3
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/routes-manifest.json +30 -10
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/27.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/516.js +8 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/913.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/middleware-build-manifest.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/404.html +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/500.html +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.html +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/benchmarks.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].html +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data/[[...path]].js.nft.json +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.html +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/eval.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].html +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments/[[...path]].js.nft.json +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.html +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/ideas.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-action.js.nft.json +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-item.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset-mutation.js.nft.json +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-dataset.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-datasets.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-trace.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/langfuse-traces.js.nft.json +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js +2 -2
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/linear-ideas.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-events.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-failures.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/run-traces.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js +2 -2
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/runs.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js +2 -2
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/systems.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/api/trace-renderer.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.html +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/index.js.nft.json +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages-manifest.json +8 -5
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/SCF0o7YxElB9rzWaOohsA/_buildManifest.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/253-85c76c34f33c9604.js +8 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{benchmarks-559dc9df52db3af4.js → benchmarks-30a17b7659010b8c.js} +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data/[[...path]]-e5f4083fe9ffe429.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{eval-3c911ea8744631fd.js → eval-160237a604b47416.js} +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments/[[...path]]-91e47a4893093600.js +1 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/{ideas-6829a271003150a9.js → ideas-96e58e4624952e26.js} +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/{index-1d8b6719f49e4ae0.js → index-d3306bb6f5d7d235.js} +1 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/cd3873236eb77caa.css +1 -0
- package/dashboard/.next/standalone/packages/kaizen/package.json +5 -3
- package/dashboard/.next/standalone/packages/kaizen/shared/workspace-paths.js +84 -0
- package/dist/commands/create-view.js +58 -0
- package/dist/commands/create-view.js.map +1 -0
- package/dist/commands/guide.js +66 -0
- package/dist/commands/guide.js.map +1 -0
- package/dist/commands/ideas.js +4 -8
- package/dist/commands/ideas.js.map +1 -1
- package/dist/commands/init-system.js +22 -20
- package/dist/commands/init-system.js.map +1 -1
- package/dist/commands/init.js +28 -64
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/log.js +5 -11
- package/dist/commands/log.js.map +1 -1
- package/dist/commands/rebuild.js +7 -9
- package/dist/commands/rebuild.js.map +1 -1
- package/dist/commands/run.js +5 -9
- package/dist/commands/run.js.map +1 -1
- package/dist/commands/studio.js +3 -3
- package/dist/commands/studio.js.map +1 -1
- package/dist/index.js +17 -21
- package/dist/index.js.map +1 -1
- package/dist/lib/cli.js +20 -0
- package/dist/lib/cli.js.map +1 -0
- package/dist/lib/events.js.map +1 -1
- package/dist/lib/fs-utils.js +3 -27
- package/dist/lib/fs-utils.js.map +1 -1
- package/dist/lib/leaderboard.js +1 -1
- package/dist/lib/leaderboard.js.map +1 -1
- package/dist/lib/paths.js +3 -3
- package/dist/lib/paths.js.map +1 -1
- package/dist/lib/promotion.js.map +1 -1
- package/dist/lib/run-dir.js +1 -1
- package/dist/lib/run-dir.js.map +1 -1
- package/dist/lib/runner.js +6 -5
- package/dist/lib/runner.js.map +1 -1
- package/dist/lib/system.js +4 -2
- package/dist/lib/system.js.map +1 -1
- package/dist/package.js +5 -3
- package/dist/shared/view-types.d.ts +67 -0
- package/dist/shared/view-types.d.ts.map +1 -0
- package/dist/shared/workspace-paths.js +84 -0
- package/dist/shared/workspace-paths.js.map +1 -0
- package/dist/types.d.ts +3 -30
- package/dist/types.d.ts.map +1 -1
- package/package.json +5 -3
- package/shared/view-types.d.ts +69 -0
- package/shared/view-types.js +1 -0
- package/shared/workspace-paths.d.ts +19 -0
- package/shared/workspace-paths.js +84 -0
- package/templates/system/eval.py +13 -6
- package/templates/system/eval.ts +11 -5
- package/templates/system/rubric.md +1 -1
- package/templates/system/system.md +6 -5
- package/templates/view/dataset-item.tsx +63 -0
- package/templates/view/trace.tsx +10 -0
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/chunks/715.js +0 -6
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.html +0 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/data.js.nft.json +0 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.html +0 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/server/pages/[system]/experiments.js.nft.json +0 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/YpQ-I4VL-aEdQrM5uN7_3/_buildManifest.js +0 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/673-ed4be46027ae7a37.js +0 -6
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/data-644e4280b4c86fe0.js +0 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/chunks/pages/[system]/experiments-42f31600c2bb47ad.js +0 -1
- package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/css/b18a6732b96168e1.css +0 -1
- package/dist/lib/env.js +0 -2
- package/dist/shared/env.js +0 -4
- package/templates/workspace/.claude/agents/variant-builder.md +0 -51
- package/templates/workspace/.claude/commands/kaizen.md +0 -65
- /package/dashboard/.next/standalone/packages/kaizen/dashboard/.next/static/{YpQ-I4VL-aEdQrM5uN7_3 → SCF0o7YxElB9rzWaOohsA}/_ssgManifest.js +0 -0
package/README.md
CHANGED
|
@@ -1,176 +1,104 @@
|
|
|
1
1
|
# Kaizen
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Kaizen is an agentic eval platform for AI systems. It helps a coding agent create a system definition, curate a Langfuse-backed dataset, write an eval script, run a baseline, and iterate on variants while Kaizen records scored runs under `kaizen/.kaizen/runs/`.
|
|
4
4
|
|
|
5
5
|
## Install In A Target Repo
|
|
6
6
|
|
|
7
|
-
For a persistent local `kaizen` command:
|
|
8
|
-
|
|
9
7
|
```bash
|
|
10
8
|
npm install -g @percepta/kaizen
|
|
11
9
|
kaizen init
|
|
12
|
-
kaizen
|
|
13
|
-
kaizen
|
|
10
|
+
kaizen guide
|
|
11
|
+
kaizen create system <system-id>
|
|
12
|
+
kaizen create view <system-id> --type trace
|
|
13
|
+
kaizen create view <system-id> --type dataset-item
|
|
14
|
+
kaizen run --system <system-id> --variant baseline --diagnostic --hypothesis "starting baseline"
|
|
14
15
|
kaizen studio
|
|
15
16
|
```
|
|
16
17
|
|
|
17
|
-
For one-off use
|
|
18
|
+
For one-off use:
|
|
18
19
|
|
|
19
20
|
```bash
|
|
20
21
|
npx @percepta/kaizen init
|
|
21
22
|
```
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
Kaizen is installed inside the customer repo. The customer-owned footprint is intentionally small:
|
|
24
25
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
- `kaizen/config.ts`
|
|
27
|
+
- `kaizen/systems/<system-id>/system.md`
|
|
28
|
+
- `kaizen/systems/<system-id>/eval.py|ts`
|
|
29
|
+
- optional `kaizen/systems/<system-id>/trace.tsx`
|
|
30
|
+
- optional `kaizen/systems/<system-id>/dataset-item.tsx`
|
|
31
|
+
- optional `kaizen/systems/<system-id>/rubric.md`
|
|
32
|
+
- `kaizen/.kaizen/runs/`
|
|
29
33
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
## Developing This Repo
|
|
33
|
-
|
|
34
|
-
```bash
|
|
35
|
-
pnpm install
|
|
36
|
-
pnpm --filter @percepta/kaizen dev:studio
|
|
37
|
-
```
|
|
34
|
+
Package-owned agent guidance is printed with `kaizen guide`. Customer-specific durable notes belong in `kaizen/systems/<system-id>/system.md`; Kaizen does not create repo-level agent markdown such as `KAIZEN.md`, `AGENTS.md`, or `CLAUDE.md`.
|
|
38
35
|
|
|
39
|
-
|
|
36
|
+
## Lifecycle
|
|
40
37
|
|
|
41
|
-
|
|
38
|
+
1. Run `kaizen init` once in the target repo.
|
|
39
|
+
2. Run `kaizen create system <system-id>` and fill in `kaizen/systems/<system-id>/system.md`.
|
|
40
|
+
3. Use Studio Data to create or select a Langfuse dataset, add useful source traces, and label dataset items.
|
|
41
|
+
4. Replace `kaizen/systems/<system-id>/eval.py|ts` with a real eval that reads the dataset named by `dataset_version`.
|
|
42
|
+
5. Run a diagnostic baseline, then a full baseline.
|
|
43
|
+
6. Run variants with `kaizen run`, inspect `kaizen log`, and use Studio to compare runs and failures.
|
|
42
44
|
|
|
43
|
-
|
|
44
|
-
| ------------------------------------------- | --------------------------------- |
|
|
45
|
-
| `pnpm --filter @percepta/kaizen dev:studio` | Start the Studio dashboard |
|
|
46
|
-
| `pnpm --filter @percepta/kaizen dev:next` | Start only the Next.js dev server |
|
|
47
|
-
| `pnpm typecheck` | Typecheck all packages |
|
|
48
|
-
| `pnpm test` | Run package tests |
|
|
45
|
+
The eval script emits NDJSON events to `--out-fd`; the runner owns process supervision, `kaizen/.kaizen/runs/`, crash recording, and automatic promotion. For Langfuse-backed evals, the eval should also link each dataset item to the fresh trace generated by that run and write the primary metric as a trace score.
|
|
49
46
|
|
|
50
|
-
##
|
|
47
|
+
## Custom Views
|
|
51
48
|
|
|
52
|
-
|
|
53
|
-
that affect the published package, add a changeset:
|
|
49
|
+
Custom views are plain React components co-located with the system:
|
|
54
50
|
|
|
55
51
|
```bash
|
|
56
|
-
|
|
52
|
+
kaizen create view <system-id> --type trace
|
|
53
|
+
kaizen create view <system-id> --type dataset-item
|
|
57
54
|
```
|
|
58
55
|
|
|
59
|
-
|
|
60
|
-
the CLI and bundled Studio, then either opens a version PR or publishes to npm
|
|
61
|
-
using the `NPM_TOKEN` repository secret.
|
|
62
|
-
|
|
63
|
-
## How It Works
|
|
64
|
-
|
|
65
|
-
Kaizen closes the eval loop for AI systems:
|
|
66
|
-
|
|
67
|
-
1. **Investigate** -- pull production traces from Langfuse, analyze failure patterns
|
|
68
|
-
2. **Build dataset** -- create versioned eval datasets from traces with ground truth
|
|
69
|
-
3. **Annotate** -- label ground truth via the dashboard's inline annotation view
|
|
70
|
-
4. **Record runs** -- test system variants against ground truth, scored automatically
|
|
71
|
-
5. **Improve** -- prepare a PR from the latest promoted baseline when a human asks
|
|
72
|
-
|
|
73
|
-
The `/kaizen` slash command in Claude Code orchestrates this workflow. Variant-builder agents can execute in parallel worktrees, but they pass the main checkout's `.kaizen` path via `KAIZEN_STATE_DIR` or `--state-dir` so the dashboard always reads one canonical state tree.
|
|
74
|
-
|
|
75
|
-
## Dashboard
|
|
76
|
-
|
|
77
|
-
The web app (Next.js, pages router) provides:
|
|
56
|
+
`trace.tsx` receives the full Langfuse trace payload plus actions for writing scores. `dataset-item.tsx` receives the dataset item, the linked source trace when available, and actions for updating the dataset item or linking run items. Browser-side credentials are not required; Studio proxies the write actions through local API routes.
|
|
78
57
|
|
|
79
|
-
|
|
80
|
-
- **Experiments** -- inspect local Kaizen runs from the customer repo's `.kaizen/runs/` store
|
|
81
|
-
- **Ideas** -- inspect Linear issues scoped to the system's configured project and the shared `Kaizen` label
|
|
82
|
-
- **Source indicators** -- show whether a field is sourced from repo code, Langfuse, Linear, or the local filesystem
|
|
58
|
+
Run `kaizen guide views` for the exact prop and action interfaces.
|
|
83
59
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
| Shortcut | Action |
|
|
87
|
-
| -------- | -------------- |
|
|
88
|
-
| `Cmd+[` | Toggle sidebar |
|
|
89
|
-
| `Cmd+/` | Show shortcuts |
|
|
90
|
-
|
|
91
|
-
## System Definitions
|
|
92
|
-
|
|
93
|
-
In real use, each target/customer repo owns its own `customers/`, `systems/`, `rubrics/`, `eval/`, and optional `views/` directories. Each system is defined in `systems/*.md` with YAML frontmatter:
|
|
60
|
+
## Developing This Repo
|
|
94
61
|
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
dataset_version: v1
|
|
99
|
-
eval_style: ground-truth
|
|
100
|
-
primary_metric: score
|
|
62
|
+
```bash
|
|
63
|
+
pnpm install
|
|
64
|
+
pnpm --filter @percepta/kaizen dev:studio
|
|
101
65
|
```
|
|
102
66
|
|
|
103
|
-
|
|
104
|
-
Kaizen runs Python evals with `python3`, JavaScript evals with `node`, and
|
|
105
|
-
TypeScript evals with the package's bundled `tsx` loader. New system scaffolds
|
|
106
|
-
default to Python; pass `--eval-language ts` to create a TypeScript eval.
|
|
107
|
-
For Langfuse-backed production evals, the same script should also link each
|
|
108
|
-
dataset item to the fresh trace produced by that run in a Langfuse dataset run
|
|
109
|
-
and write the primary metric as a trace score. Those writes are for durable
|
|
110
|
-
trace inspection; the NDJSON `complete.score` remains Kaizen's required result
|
|
111
|
-
contract.
|
|
112
|
-
|
|
113
|
-
This repo keeps historical definitions under `examples/legacy-workspace/` only as sample data for local Studio development:
|
|
114
|
-
|
|
115
|
-
| Customer | System | Primary Metric |
|
|
116
|
-
| ---------------- | ---------------------------- | ----------------------- |
|
|
117
|
-
| Transcarent | EMO HIE Processing | F2 |
|
|
118
|
-
| Transcarent | EMO Facility Processing | F2 |
|
|
119
|
-
| Transcarent | EMO Cost Savings Agent | Classification Accuracy |
|
|
120
|
-
| Transcarent | EMO Summarization | -- |
|
|
121
|
-
| Transcarent | Orbit Call Summarization | Judge Quality |
|
|
122
|
-
| Cityblock Health | BOI Chaselist Impact | Calibration Error |
|
|
123
|
-
| Cityblock Health | Concurrent Review Agent | -- |
|
|
124
|
-
| Cityblock Health | Contract Exclusion Detection | -- |
|
|
125
|
-
| Cityblock Health | Quality Gap Modeling | Calibration Error |
|
|
126
|
-
| Janus Henderson | Portfolio Analytics | -- |
|
|
127
|
-
| Summa Health | Agentic BI (SLCC) | -- |
|
|
128
|
-
|
|
129
|
-
## Repository Structure
|
|
67
|
+
This starts Studio at `http://localhost:6789` against `examples/demo-workspace`, a local fixture for package development. The CLI lives in `src/`; the bundled Next.js Studio lives in `dashboard/`.
|
|
130
68
|
|
|
131
|
-
|
|
132
|
-
kaizen/
|
|
133
|
-
├── packages/kaizen/ # Published @percepta/kaizen package
|
|
134
|
-
│ ├── src/ # CLI source
|
|
135
|
-
│ ├── dashboard/ # Next.js Studio (built into the published bundle)
|
|
136
|
-
│ └── examples/legacy-workspace/ # Transitional customer/system fixture for local dev
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
## Tech Stack
|
|
69
|
+
Useful scripts:
|
|
140
70
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
71
|
+
| Script | What it does |
|
|
72
|
+
| ------------------------------------------- | ---------------------------------- |
|
|
73
|
+
| `pnpm --filter @percepta/kaizen dev:studio` | Start Studio with the demo fixture |
|
|
74
|
+
| `pnpm --filter @percepta/kaizen dev:next` | Start only the Next.js dev server |
|
|
75
|
+
| `pnpm --filter @percepta/kaizen typecheck` | Typecheck the package |
|
|
76
|
+
| `pnpm --filter @percepta/kaizen test` | Run package tests |
|
|
145
77
|
|
|
146
|
-
## Environment
|
|
78
|
+
## Environment
|
|
147
79
|
|
|
148
|
-
Create `.env.local` in the workspace repo root
|
|
80
|
+
Create `.env.local` in the workspace repo root:
|
|
149
81
|
|
|
150
|
-
```
|
|
82
|
+
```text
|
|
151
83
|
LANGFUSE_HOST=https://...
|
|
152
84
|
LANGFUSE_PUBLIC_KEY=pk-lf-...
|
|
153
85
|
LANGFUSE_SECRET_KEY=sk-lf-...
|
|
154
86
|
LINEAR_API_KEY=lin_api_...
|
|
87
|
+
LINEAR_TEAM_KEY=ENG
|
|
155
88
|
```
|
|
156
89
|
|
|
157
|
-
|
|
158
|
-
`.env.local`. Put the values there instead of app package env files so stale
|
|
159
|
-
package-level placeholders cannot shadow the credentials Studio needs.
|
|
160
|
-
|
|
161
|
-
Langfuse credentials power the Data surface. `LINEAR_API_KEY` powers the Ideas
|
|
162
|
-
surface and the `kaizen ideas --system <id>` CLI command.
|
|
90
|
+
Langfuse credentials power the Data surface and custom view actions. `LINEAR_API_KEY` and `LINEAR_TEAM_KEY` power `kaizen ideas --system <id>`.
|
|
163
91
|
|
|
164
|
-
System Ideas configuration should use a stable Linear project URL or ID
|
|
92
|
+
System Ideas configuration should use a stable Linear project URL or ID in `system.md`:
|
|
165
93
|
|
|
166
94
|
```yaml
|
|
167
|
-
linear_project: https://linear.app
|
|
95
|
+
linear_project: https://linear.app/<workspace>/project/<project-slug>
|
|
168
96
|
```
|
|
169
97
|
|
|
170
|
-
|
|
171
|
-
change in Linear without changing project identity.
|
|
98
|
+
## Publishing
|
|
172
99
|
|
|
173
|
-
|
|
100
|
+
Publishing `@percepta/kaizen` to npm is automated with Changesets. For changes that affect the published package, add a changeset:
|
|
174
101
|
|
|
175
|
-
|
|
176
|
-
|
|
102
|
+
```bash
|
|
103
|
+
pnpm changeset
|
|
104
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Kaizen Claude Command Guide
|
|
2
|
+
|
|
3
|
+
Use this as a Claude Code command body when Claude should drive the Kaizen lifecycle. It is package-owned; do not copy it into customer repos as durable markdown unless the user explicitly asks.
|
|
4
|
+
|
|
5
|
+
## Rules
|
|
6
|
+
|
|
7
|
+
- Never commit PHI or credentials.
|
|
8
|
+
- Run `kaizen guide` first when guidance is not already in context.
|
|
9
|
+
- Put customer-specific notes in `kaizen/systems/<system-id>/system.md`.
|
|
10
|
+
- Use Studio for dataset curation and custom dataset item views for labeling workflows.
|
|
11
|
+
|
|
12
|
+
## Workflow
|
|
13
|
+
|
|
14
|
+
1. Select a system. If none is given, list `kaizen/systems/*/system.md` and ask which one.
|
|
15
|
+
2. Read `system.md`, relevant application code, and current `kaizen log --system <system-id> --json`.
|
|
16
|
+
3. If the system is new, run `kaizen create system <system-id> --eval-language py|ts` and fill in the scaffold.
|
|
17
|
+
4. Use Studio Data to create/select a dataset, add traces, and label expected outputs.
|
|
18
|
+
5. Replace the starter eval with real code that loads `--dataset`, runs the candidate, emits NDJSON events, and persists Langfuse links/scores when available.
|
|
19
|
+
6. Run a diagnostic baseline, then the full baseline.
|
|
20
|
+
7. Iterate variants with `kaizen run`.
|
|
21
|
+
8. Create `trace.tsx` or `dataset-item.tsx` only when the default views are insufficient.
|
|
22
|
+
|
|
23
|
+
For exact eval and view contracts, run `kaizen guide evals` and `kaizen guide views`.
|
package/agent/evals.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Kaizen Eval Guide
|
|
2
|
+
|
|
3
|
+
Eval scripts are customer-owned executable code stored at `kaizen/systems/<system-id>/eval.py|ts`. `kaizen run` invokes the path named by `run_eval` in `kaizen/systems/<system-id>/system.md`:
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
<run_eval> --variant <variant-id> --dataset <dataset_version> --out-fd 3 [--max-items <n>]
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
The eval must write NDJSON events to `--out-fd`. Do not write these events to normal stdout.
|
|
10
|
+
|
|
11
|
+
```json
|
|
12
|
+
{"type":"start","n":10,"eval_version":1,"dataset_version":"v1"}
|
|
13
|
+
{"type":"item","id":"item-1","score":0.8,"breakdown":{"score":0.8},"trace_id":"trace-id-or-null"}
|
|
14
|
+
{"type":"complete","score":0.82,"n":10,"breakdown":{"score":0.82},"worst_traces":[{"id":"item-1","score":0.8,"trace_id":"trace-id-or-null"}]}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
The terminal `complete.score` is Kaizen's authoritative result. It must be a number in `[0, 1]`.
|
|
18
|
+
|
|
19
|
+
## Langfuse Persistence
|
|
20
|
+
|
|
21
|
+
For Langfuse-backed evals:
|
|
22
|
+
|
|
23
|
+
- Treat `--dataset` as the Langfuse dataset name unless `system.md` says otherwise.
|
|
24
|
+
- Load dataset items from that dataset.
|
|
25
|
+
- Run the candidate system for each item.
|
|
26
|
+
- Capture the fresh Langfuse trace id for that item.
|
|
27
|
+
- Link the dataset item to the fresh trace in a Langfuse dataset run.
|
|
28
|
+
- Write the primary metric as a Langfuse score on the fresh trace.
|
|
29
|
+
- Emit the same item score through Kaizen's NDJSON stream.
|
|
30
|
+
|
|
31
|
+
Langfuse stores trace inspection, dataset-run history, and score metadata. `kaizen/.kaizen/runs/` remains the source of truth for promotion and run state.
|
|
32
|
+
|
|
33
|
+
## Baseline
|
|
34
|
+
|
|
35
|
+
Run a diagnostic baseline first:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
kaizen run --system <system-id> --variant baseline --diagnostic --hypothesis "starting baseline"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
If setup, credentials, dataset access, and event schema are valid, run the full baseline without `--diagnostic`.
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Kaizen Agent Guide
|
|
2
|
+
|
|
3
|
+
Kaizen helps a coding agent define, evaluate, inspect, and improve an AI system inside the customer repo. This guide is package-owned; rerun `kaizen guide` after package upgrades.
|
|
4
|
+
|
|
5
|
+
Do not create extra long-lived agent markdown files. Customer-specific notes belong in `kaizen/systems/<system-id>/system.md`; repo-owned code belongs beside it in `kaizen/systems/<system-id>/`.
|
|
6
|
+
|
|
7
|
+
## Commands
|
|
8
|
+
|
|
9
|
+
Run commands from the repo root:
|
|
10
|
+
|
|
11
|
+
- `kaizen init` - scaffold Kaizen once.
|
|
12
|
+
- `kaizen guide topics` - list focused guide topics.
|
|
13
|
+
- `kaizen create system <system-id> --eval-language py|ts` - create `kaizen/systems/<system-id>/system.md` and `kaizen/systems/<system-id>/eval.py|ts`.
|
|
14
|
+
- `kaizen create view <system-id> --type trace` - create `kaizen/systems/<system-id>/trace.tsx`.
|
|
15
|
+
- `kaizen create view <system-id> --type dataset-item` - create `kaizen/systems/<system-id>/dataset-item.tsx`.
|
|
16
|
+
- `kaizen studio` - open Studio for dataset curation, trace inspection, and run review.
|
|
17
|
+
- `kaizen run --system <system-id> --variant <variant-id> --hypothesis "<why>"` - record one eval run.
|
|
18
|
+
- `kaizen run --system <system-id> --variant <variant-id> --diagnostic --hypothesis "<why>"` - run a small diagnostic sample first.
|
|
19
|
+
- `kaizen log --system <system-id> --json` - inspect the promoted baseline and recent runs.
|
|
20
|
+
|
|
21
|
+
Run state is written to `kaizen/.kaizen/`. When evaluating from a Git linked worktree, Kaizen automatically stores run state in the primary checkout's `kaizen/.kaizen/`.
|
|
22
|
+
|
|
23
|
+
## Files
|
|
24
|
+
|
|
25
|
+
- `kaizen/systems/<system-id>/system.md` is the durable system definition. It should explain the workflow, key files, setup, dataset, metric, known failures, and variant ideas.
|
|
26
|
+
- `kaizen/systems/<system-id>/eval.py|ts` is the eval entrypoint named by `run_eval`.
|
|
27
|
+
- `kaizen/systems/<system-id>/trace.tsx` is an optional custom trace view.
|
|
28
|
+
- `kaizen/systems/<system-id>/dataset-item.tsx` is an optional custom dataset labeling view.
|
|
29
|
+
- `kaizen/systems/<system-id>/rubric.md` is optional and only needed for LLM-as-judge or hybrid evals.
|
|
30
|
+
|
|
31
|
+
Each `system.md` must include:
|
|
32
|
+
|
|
33
|
+
```yaml
|
|
34
|
+
run_eval: kaizen/systems/<system-id>/eval.py
|
|
35
|
+
eval_version: 1
|
|
36
|
+
dataset_version: <langfuse-dataset-name>
|
|
37
|
+
eval_style: ground-truth
|
|
38
|
+
primary_metric: score
|
|
39
|
+
target: 0.90
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Lifecycle
|
|
43
|
+
|
|
44
|
+
1. Run `kaizen create system <system-id>` unless the system already exists.
|
|
45
|
+
2. Read the codebase and fill in `system.md` with real key files, setup, data sources, dataset, and metric.
|
|
46
|
+
3. Use Studio Data to create/select a dataset, add representative traces, and label expected outputs.
|
|
47
|
+
4. Replace the starter eval with real code that loads the dataset named by `--dataset`.
|
|
48
|
+
5. Run a diagnostic baseline.
|
|
49
|
+
6. Run the full baseline.
|
|
50
|
+
7. Iterate on variants with `kaizen run`; read `kaizen log` and Studio failures between attempts.
|
|
51
|
+
8. Create custom views only when the default JSON views are not enough for trace inspection or dataset labeling.
|
|
52
|
+
|
|
53
|
+
For eval details, run `kaizen guide evals`. For view props and actions, run `kaizen guide views`.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Kaizen Variant Builder Guide
|
|
2
|
+
|
|
3
|
+
You implement and evaluate one variant, record one run with `kaizen run`, then stop.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
1. Work in the assigned worktree, not the main checkout.
|
|
8
|
+
2. Let Kaizen auto-detect the primary checkout for run state. Runs from linked worktrees are recorded under the primary checkout's `kaizen/.kaizen/`.
|
|
9
|
+
3. Read `kaizen/systems/<system-id>/system.md`, the parent run manifest when present, and the parent failures.
|
|
10
|
+
4. Install or start only what the system setup section requires.
|
|
11
|
+
|
|
12
|
+
## Run
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
kaizen run \
|
|
16
|
+
--system <system-id> \
|
|
17
|
+
--variant <variant-id> \
|
|
18
|
+
--parent <parent-run-id> \
|
|
19
|
+
--hypothesis "<what changed and why>"
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The runner owns process supervision, `kaizen/.kaizen/runs/`, crash recording, and promotion. Read the single summary line it prints and include the run id and score in your handoff.
|
package/agent/views.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Kaizen Custom Views Guide
|
|
2
|
+
|
|
3
|
+
Custom views are customer-owned React components co-located with the system.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
kaizen create view <system-id> --type trace
|
|
7
|
+
kaizen create view <system-id> --type dataset-item
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Studio loads:
|
|
11
|
+
|
|
12
|
+
- `kaizen/systems/<system-id>/trace.tsx`
|
|
13
|
+
- `kaizen/systems/<system-id>/dataset-item.tsx`
|
|
14
|
+
|
|
15
|
+
No `system.md` frontmatter field is required.
|
|
16
|
+
|
|
17
|
+
## Trace View
|
|
18
|
+
|
|
19
|
+
```tsx
|
|
20
|
+
import type { TraceRendererProps } from "@percepta/kaizen";
|
|
21
|
+
|
|
22
|
+
export default function TraceView({ trace, actions }: TraceRendererProps) {
|
|
23
|
+
return <pre>{JSON.stringify(trace, null, 2)}</pre>;
|
|
24
|
+
}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Trace views receive `{ trace, context, actions }`. `actions.createScore(...)` writes a Langfuse score for the current or supplied trace id.
|
|
28
|
+
|
|
29
|
+
## Dataset Item View
|
|
30
|
+
|
|
31
|
+
```tsx
|
|
32
|
+
import type { DatasetItemRendererProps } from "@percepta/kaizen";
|
|
33
|
+
|
|
34
|
+
export default function DatasetItemView({
|
|
35
|
+
datasetItem,
|
|
36
|
+
trace,
|
|
37
|
+
actions,
|
|
38
|
+
}: DatasetItemRendererProps) {
|
|
39
|
+
return <pre>{JSON.stringify({ datasetItem, trace }, null, 2)}</pre>;
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Dataset item views receive `{ datasetItem, trace, context, actions }`. Use them for labeling expected output, metadata, review status, and scoring workflows.
|
|
44
|
+
|
|
45
|
+
Available dataset actions:
|
|
46
|
+
|
|
47
|
+
- `actions.updateDatasetItem({ expectedOutput?, metadata?, input?, sourceTraceId?, status? })`
|
|
48
|
+
- `actions.createDatasetRunItem({ runName, datasetItemId?, traceId?, runDescription?, metadata? })`
|
|
49
|
+
- `actions.createScore({ name, value, traceId?, comment?, metadata? })`
|
|
50
|
+
|
|
51
|
+
When omitted, `datasetName`, `itemId`, and `traceId` default to the current Studio selection where Studio can infer them.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
SCF0o7YxElB9rzWaOohsA
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
],
|
|
5
5
|
"devFiles": [],
|
|
6
6
|
"lowPriorityFiles": [
|
|
7
|
-
"static/
|
|
8
|
-
"static/
|
|
7
|
+
"static/SCF0o7YxElB9rzWaOohsA/_buildManifest.js",
|
|
8
|
+
"static/SCF0o7YxElB9rzWaOohsA/_ssgManifest.js"
|
|
9
9
|
],
|
|
10
10
|
"rootMainFiles": [],
|
|
11
11
|
"rootMainFilesTree": {},
|
|
@@ -15,9 +15,9 @@
|
|
|
15
15
|
"static/chunks/framework-7089c270fe56b51f.js",
|
|
16
16
|
"static/chunks/main-7ac7f96d288497aa.js",
|
|
17
17
|
"static/chunks/431-43358ce3c29e5e1b.js",
|
|
18
|
-
"static/css/
|
|
19
|
-
"static/chunks/
|
|
20
|
-
"static/chunks/pages/index-
|
|
18
|
+
"static/css/cd3873236eb77caa.css",
|
|
19
|
+
"static/chunks/253-85c76c34f33c9604.js",
|
|
20
|
+
"static/chunks/pages/index-d3306bb6f5d7d235.js"
|
|
21
21
|
],
|
|
22
22
|
"/[system]": [
|
|
23
23
|
"static/chunks/webpack-8c7966d82a2912f0.js",
|
|
@@ -30,45 +30,45 @@
|
|
|
30
30
|
"static/chunks/framework-7089c270fe56b51f.js",
|
|
31
31
|
"static/chunks/main-7ac7f96d288497aa.js",
|
|
32
32
|
"static/chunks/431-43358ce3c29e5e1b.js",
|
|
33
|
-
"static/css/
|
|
34
|
-
"static/chunks/
|
|
35
|
-
"static/chunks/pages/[system]/benchmarks-
|
|
33
|
+
"static/css/cd3873236eb77caa.css",
|
|
34
|
+
"static/chunks/253-85c76c34f33c9604.js",
|
|
35
|
+
"static/chunks/pages/[system]/benchmarks-30a17b7659010b8c.js"
|
|
36
36
|
],
|
|
37
|
-
"/[system]/data": [
|
|
37
|
+
"/[system]/data/[[...path]]": [
|
|
38
38
|
"static/chunks/webpack-8c7966d82a2912f0.js",
|
|
39
39
|
"static/chunks/framework-7089c270fe56b51f.js",
|
|
40
40
|
"static/chunks/main-7ac7f96d288497aa.js",
|
|
41
41
|
"static/chunks/431-43358ce3c29e5e1b.js",
|
|
42
|
-
"static/css/
|
|
43
|
-
"static/chunks/
|
|
44
|
-
"static/chunks/pages/[system]/data-
|
|
42
|
+
"static/css/cd3873236eb77caa.css",
|
|
43
|
+
"static/chunks/253-85c76c34f33c9604.js",
|
|
44
|
+
"static/chunks/pages/[system]/data/[[...path]]-e5f4083fe9ffe429.js"
|
|
45
45
|
],
|
|
46
46
|
"/[system]/eval": [
|
|
47
47
|
"static/chunks/webpack-8c7966d82a2912f0.js",
|
|
48
48
|
"static/chunks/framework-7089c270fe56b51f.js",
|
|
49
49
|
"static/chunks/main-7ac7f96d288497aa.js",
|
|
50
50
|
"static/chunks/431-43358ce3c29e5e1b.js",
|
|
51
|
-
"static/css/
|
|
52
|
-
"static/chunks/
|
|
53
|
-
"static/chunks/pages/[system]/eval-
|
|
51
|
+
"static/css/cd3873236eb77caa.css",
|
|
52
|
+
"static/chunks/253-85c76c34f33c9604.js",
|
|
53
|
+
"static/chunks/pages/[system]/eval-160237a604b47416.js"
|
|
54
54
|
],
|
|
55
|
-
"/[system]/experiments": [
|
|
55
|
+
"/[system]/experiments/[[...path]]": [
|
|
56
56
|
"static/chunks/webpack-8c7966d82a2912f0.js",
|
|
57
57
|
"static/chunks/framework-7089c270fe56b51f.js",
|
|
58
58
|
"static/chunks/main-7ac7f96d288497aa.js",
|
|
59
59
|
"static/chunks/431-43358ce3c29e5e1b.js",
|
|
60
|
-
"static/css/
|
|
61
|
-
"static/chunks/
|
|
62
|
-
"static/chunks/pages/[system]/experiments-
|
|
60
|
+
"static/css/cd3873236eb77caa.css",
|
|
61
|
+
"static/chunks/253-85c76c34f33c9604.js",
|
|
62
|
+
"static/chunks/pages/[system]/experiments/[[...path]]-91e47a4893093600.js"
|
|
63
63
|
],
|
|
64
64
|
"/[system]/ideas": [
|
|
65
65
|
"static/chunks/webpack-8c7966d82a2912f0.js",
|
|
66
66
|
"static/chunks/framework-7089c270fe56b51f.js",
|
|
67
67
|
"static/chunks/main-7ac7f96d288497aa.js",
|
|
68
68
|
"static/chunks/431-43358ce3c29e5e1b.js",
|
|
69
|
-
"static/css/
|
|
70
|
-
"static/chunks/
|
|
71
|
-
"static/chunks/pages/[system]/ideas-
|
|
69
|
+
"static/css/cd3873236eb77caa.css",
|
|
70
|
+
"static/chunks/253-85c76c34f33c9604.js",
|
|
71
|
+
"static/chunks/pages/[system]/ideas-96e58e4624952e26.js"
|
|
72
72
|
],
|
|
73
73
|
"/_app": [
|
|
74
74
|
"static/chunks/webpack-8c7966d82a2912f0.js",
|
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
"routes": {},
|
|
4
4
|
"dynamicRoutes": {},
|
|
5
5
|
"preview": {
|
|
6
|
-
"previewModeId": "
|
|
7
|
-
"previewModeSigningKey": "
|
|
8
|
-
"previewModeEncryptionKey": "
|
|
6
|
+
"previewModeId": "0ba1834cfc7d7c8ea8708ad29269e503",
|
|
7
|
+
"previewModeSigningKey": "4cdb34c8c9deccef53be3f014ab0ccdd422a7b57e71290f1ce447fd0a2e2a138",
|
|
8
|
+
"previewModeEncryptionKey": "c48cc1326503410c45e8e0baf97d801d067e317fa759e378825440098271163f"
|
|
9
9
|
},
|
|
10
10
|
"notFoundRoutes": []
|
|
11
11
|
}
|
|
@@ -38,12 +38,13 @@
|
|
|
38
38
|
"namedRegex": "^/(?<nxtPsystem>[^/]+?)/benchmarks(?:/)?$"
|
|
39
39
|
},
|
|
40
40
|
{
|
|
41
|
-
"page": "/[system]/data",
|
|
42
|
-
"regex": "^/([^/]+?)/data(?:/)?$",
|
|
41
|
+
"page": "/[system]/data/[[...path]]",
|
|
42
|
+
"regex": "^/([^/]+?)/data(?:/(.+?))?(?:/)?$",
|
|
43
43
|
"routeKeys": {
|
|
44
|
-
"nxtPsystem": "nxtPsystem"
|
|
44
|
+
"nxtPsystem": "nxtPsystem",
|
|
45
|
+
"nxtPpath": "nxtPpath"
|
|
45
46
|
},
|
|
46
|
-
"namedRegex": "^/(?<nxtPsystem>[^/]+?)/data(?:/)?$"
|
|
47
|
+
"namedRegex": "^/(?<nxtPsystem>[^/]+?)/data(?:/(?<nxtPpath>.+?))?(?:/)?$"
|
|
47
48
|
},
|
|
48
49
|
{
|
|
49
50
|
"page": "/[system]/eval",
|
|
@@ -54,12 +55,13 @@
|
|
|
54
55
|
"namedRegex": "^/(?<nxtPsystem>[^/]+?)/eval(?:/)?$"
|
|
55
56
|
},
|
|
56
57
|
{
|
|
57
|
-
"page": "/[system]/experiments",
|
|
58
|
-
"regex": "^/([^/]+?)/experiments(?:/)?$",
|
|
58
|
+
"page": "/[system]/experiments/[[...path]]",
|
|
59
|
+
"regex": "^/([^/]+?)/experiments(?:/(.+?))?(?:/)?$",
|
|
59
60
|
"routeKeys": {
|
|
60
|
-
"nxtPsystem": "nxtPsystem"
|
|
61
|
+
"nxtPsystem": "nxtPsystem",
|
|
62
|
+
"nxtPpath": "nxtPpath"
|
|
61
63
|
},
|
|
62
|
-
"namedRegex": "^/(?<nxtPsystem>[^/]+?)/experiments(?:/)?$"
|
|
64
|
+
"namedRegex": "^/(?<nxtPsystem>[^/]+?)/experiments(?:/(?<nxtPpath>.+?))?(?:/)?$"
|
|
63
65
|
},
|
|
64
66
|
{
|
|
65
67
|
"page": "/[system]/ideas",
|
|
@@ -77,6 +79,12 @@
|
|
|
77
79
|
"routeKeys": {},
|
|
78
80
|
"namedRegex": "^/(?:/)?$"
|
|
79
81
|
},
|
|
82
|
+
{
|
|
83
|
+
"page": "/api/langfuse-action",
|
|
84
|
+
"regex": "^/api/langfuse\\-action(?:/)?$",
|
|
85
|
+
"routeKeys": {},
|
|
86
|
+
"namedRegex": "^/api/langfuse\\-action(?:/)?$"
|
|
87
|
+
},
|
|
80
88
|
{
|
|
81
89
|
"page": "/api/langfuse-dataset",
|
|
82
90
|
"regex": "^/api/langfuse\\-dataset(?:/)?$",
|
|
@@ -89,6 +97,12 @@
|
|
|
89
97
|
"routeKeys": {},
|
|
90
98
|
"namedRegex": "^/api/langfuse\\-dataset\\-item(?:/)?$"
|
|
91
99
|
},
|
|
100
|
+
{
|
|
101
|
+
"page": "/api/langfuse-dataset-mutation",
|
|
102
|
+
"regex": "^/api/langfuse\\-dataset\\-mutation(?:/)?$",
|
|
103
|
+
"routeKeys": {},
|
|
104
|
+
"namedRegex": "^/api/langfuse\\-dataset\\-mutation(?:/)?$"
|
|
105
|
+
},
|
|
92
106
|
{
|
|
93
107
|
"page": "/api/langfuse-datasets",
|
|
94
108
|
"regex": "^/api/langfuse\\-datasets(?:/)?$",
|
|
@@ -101,6 +115,12 @@
|
|
|
101
115
|
"routeKeys": {},
|
|
102
116
|
"namedRegex": "^/api/langfuse\\-trace(?:/)?$"
|
|
103
117
|
},
|
|
118
|
+
{
|
|
119
|
+
"page": "/api/langfuse-traces",
|
|
120
|
+
"regex": "^/api/langfuse\\-traces(?:/)?$",
|
|
121
|
+
"routeKeys": {},
|
|
122
|
+
"namedRegex": "^/api/langfuse\\-traces(?:/)?$"
|
|
123
|
+
},
|
|
104
124
|
{
|
|
105
125
|
"page": "/api/linear-ideas",
|
|
106
126
|
"regex": "^/api/linear\\-ideas(?:/)?$",
|
|
@@ -150,8 +170,8 @@
|
|
|
150
170
|
"routeKeys": {
|
|
151
171
|
"nxtPsystem": "nxtPsystem"
|
|
152
172
|
},
|
|
153
|
-
"dataRouteRegex": "^/_next/data/
|
|
154
|
-
"namedDataRouteRegex": "^/_next/data/
|
|
173
|
+
"dataRouteRegex": "^/_next/data/SCF0o7YxElB9rzWaOohsA/([^/]+?)\\.json$",
|
|
174
|
+
"namedDataRouteRegex": "^/_next/data/SCF0o7YxElB9rzWaOohsA/(?<nxtPsystem>[^/]+?)\\.json$"
|
|
155
175
|
}
|
|
156
176
|
],
|
|
157
177
|
"rsc": {
|