opencode-swarm-plugin 0.43.0 → 0.44.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cass.characterization.test.ts +422 -0
- package/bin/swarm.serve.test.ts +6 -4
- package/bin/swarm.test.ts +68 -0
- package/bin/swarm.ts +81 -8
- package/dist/compaction-prompt-scoring.js +139 -0
- package/dist/contributor-tools.d.ts +42 -0
- package/dist/contributor-tools.d.ts.map +1 -0
- package/dist/eval-capture.js +12811 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +12 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7728 -62590
- package/dist/plugin.js +23833 -78695
- package/dist/sessions/agent-discovery.d.ts +59 -0
- package/dist/sessions/agent-discovery.d.ts.map +1 -0
- package/dist/sessions/index.d.ts +10 -0
- package/dist/sessions/index.d.ts.map +1 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm-review.d.ts.map +1 -1
- package/package.json +17 -5
- package/.changeset/swarm-insights-data-layer.md +0 -63
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
- package/.hive/analysis/session-data-quality-audit.md +0 -320
- package/.hive/eval-results.json +0 -483
- package/.hive/issues.jsonl +0 -138
- package/.hive/memories.jsonl +0 -729
- package/.opencode/eval-history.jsonl +0 -327
- package/.turbo/turbo-build.log +0 -9
- package/CHANGELOG.md +0 -2255
- package/SCORER-ANALYSIS.md +0 -598
- package/docs/analysis/subagent-coordination-patterns.md +0 -902
- package/docs/analysis-socratic-planner-pattern.md +0 -504
- package/docs/planning/ADR-001-monorepo-structure.md +0 -171
- package/docs/planning/ADR-002-package-extraction.md +0 -393
- package/docs/planning/ADR-003-performance-improvements.md +0 -451
- package/docs/planning/ADR-004-message-queue-features.md +0 -187
- package/docs/planning/ADR-005-devtools-observability.md +0 -202
- package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
- package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
- package/docs/planning/ROADMAP.md +0 -368
- package/docs/semantic-memory-cli-syntax.md +0 -123
- package/docs/swarm-mail-architecture.md +0 -1147
- package/docs/testing/context-recovery-test.md +0 -470
- package/evals/ARCHITECTURE.md +0 -1189
- package/evals/README.md +0 -768
- package/evals/compaction-prompt.eval.ts +0 -149
- package/evals/compaction-resumption.eval.ts +0 -289
- package/evals/coordinator-behavior.eval.ts +0 -307
- package/evals/coordinator-session.eval.ts +0 -154
- package/evals/evalite.config.ts.bak +0 -15
- package/evals/example.eval.ts +0 -31
- package/evals/fixtures/compaction-cases.ts +0 -350
- package/evals/fixtures/compaction-prompt-cases.ts +0 -311
- package/evals/fixtures/coordinator-sessions.ts +0 -328
- package/evals/fixtures/decomposition-cases.ts +0 -105
- package/evals/lib/compaction-loader.test.ts +0 -248
- package/evals/lib/compaction-loader.ts +0 -320
- package/evals/lib/data-loader.evalite-test.ts +0 -289
- package/evals/lib/data-loader.test.ts +0 -345
- package/evals/lib/data-loader.ts +0 -281
- package/evals/lib/llm.ts +0 -115
- package/evals/scorers/compaction-prompt-scorers.ts +0 -145
- package/evals/scorers/compaction-scorers.ts +0 -305
- package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
- package/evals/scorers/coordinator-discipline.ts +0 -325
- package/evals/scorers/index.test.ts +0 -146
- package/evals/scorers/index.ts +0 -328
- package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
- package/evals/scorers/outcome-scorers.ts +0 -349
- package/evals/swarm-decomposition.eval.ts +0 -121
- package/examples/commands/swarm.md +0 -745
- package/examples/plugin-wrapper-template.ts +0 -2426
- package/examples/skills/hive-workflow/SKILL.md +0 -212
- package/examples/skills/skill-creator/SKILL.md +0 -223
- package/examples/skills/swarm-coordination/SKILL.md +0 -292
- package/global-skills/cli-builder/SKILL.md +0 -344
- package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
- package/global-skills/learning-systems/SKILL.md +0 -644
- package/global-skills/skill-creator/LICENSE.txt +0 -202
- package/global-skills/skill-creator/SKILL.md +0 -352
- package/global-skills/skill-creator/references/output-patterns.md +0 -82
- package/global-skills/skill-creator/references/workflows.md +0 -28
- package/global-skills/swarm-coordination/SKILL.md +0 -995
- package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
- package/global-skills/swarm-coordination/references/strategies.md +0 -138
- package/global-skills/system-design/SKILL.md +0 -213
- package/global-skills/testing-patterns/SKILL.md +0 -430
- package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
- package/opencode-swarm-plugin-0.30.7.tgz +0 -0
- package/opencode-swarm-plugin-0.31.0.tgz +0 -0
- package/scripts/cleanup-test-memories.ts +0 -346
- package/scripts/init-skill.ts +0 -222
- package/scripts/migrate-unknown-sessions.ts +0 -349
- package/scripts/validate-skill.ts +0 -204
- package/src/agent-mail.ts +0 -1724
- package/src/anti-patterns.test.ts +0 -1167
- package/src/anti-patterns.ts +0 -448
- package/src/compaction-capture.integration.test.ts +0 -257
- package/src/compaction-hook.test.ts +0 -838
- package/src/compaction-hook.ts +0 -1204
- package/src/compaction-observability.integration.test.ts +0 -139
- package/src/compaction-observability.test.ts +0 -187
- package/src/compaction-observability.ts +0 -324
- package/src/compaction-prompt-scorers.test.ts +0 -475
- package/src/compaction-prompt-scoring.ts +0 -300
- package/src/dashboard.test.ts +0 -611
- package/src/dashboard.ts +0 -462
- package/src/error-enrichment.test.ts +0 -403
- package/src/error-enrichment.ts +0 -219
- package/src/eval-capture.test.ts +0 -1015
- package/src/eval-capture.ts +0 -929
- package/src/eval-gates.test.ts +0 -306
- package/src/eval-gates.ts +0 -218
- package/src/eval-history.test.ts +0 -508
- package/src/eval-history.ts +0 -214
- package/src/eval-learning.test.ts +0 -378
- package/src/eval-learning.ts +0 -360
- package/src/eval-runner.test.ts +0 -223
- package/src/eval-runner.ts +0 -402
- package/src/export-tools.test.ts +0 -476
- package/src/export-tools.ts +0 -257
- package/src/hive.integration.test.ts +0 -2241
- package/src/hive.ts +0 -1628
- package/src/index.ts +0 -935
- package/src/learning.integration.test.ts +0 -1815
- package/src/learning.ts +0 -1079
- package/src/logger.test.ts +0 -189
- package/src/logger.ts +0 -135
- package/src/mandate-promotion.test.ts +0 -473
- package/src/mandate-promotion.ts +0 -239
- package/src/mandate-storage.integration.test.ts +0 -601
- package/src/mandate-storage.test.ts +0 -578
- package/src/mandate-storage.ts +0 -794
- package/src/mandates.ts +0 -540
- package/src/memory-tools.test.ts +0 -195
- package/src/memory-tools.ts +0 -344
- package/src/memory.integration.test.ts +0 -334
- package/src/memory.test.ts +0 -158
- package/src/memory.ts +0 -527
- package/src/model-selection.test.ts +0 -188
- package/src/model-selection.ts +0 -68
- package/src/observability-tools.test.ts +0 -359
- package/src/observability-tools.ts +0 -871
- package/src/output-guardrails.test.ts +0 -438
- package/src/output-guardrails.ts +0 -381
- package/src/pattern-maturity.test.ts +0 -1160
- package/src/pattern-maturity.ts +0 -525
- package/src/planning-guardrails.test.ts +0 -491
- package/src/planning-guardrails.ts +0 -438
- package/src/plugin.ts +0 -23
- package/src/post-compaction-tracker.test.ts +0 -251
- package/src/post-compaction-tracker.ts +0 -237
- package/src/query-tools.test.ts +0 -636
- package/src/query-tools.ts +0 -324
- package/src/rate-limiter.integration.test.ts +0 -466
- package/src/rate-limiter.ts +0 -774
- package/src/replay-tools.test.ts +0 -496
- package/src/replay-tools.ts +0 -240
- package/src/repo-crawl.integration.test.ts +0 -441
- package/src/repo-crawl.ts +0 -610
- package/src/schemas/cell-events.test.ts +0 -347
- package/src/schemas/cell-events.ts +0 -807
- package/src/schemas/cell.ts +0 -257
- package/src/schemas/evaluation.ts +0 -166
- package/src/schemas/index.test.ts +0 -199
- package/src/schemas/index.ts +0 -286
- package/src/schemas/mandate.ts +0 -232
- package/src/schemas/swarm-context.ts +0 -115
- package/src/schemas/task.ts +0 -161
- package/src/schemas/worker-handoff.test.ts +0 -302
- package/src/schemas/worker-handoff.ts +0 -131
- package/src/skills.integration.test.ts +0 -1192
- package/src/skills.test.ts +0 -643
- package/src/skills.ts +0 -1549
- package/src/storage.integration.test.ts +0 -341
- package/src/storage.ts +0 -884
- package/src/structured.integration.test.ts +0 -817
- package/src/structured.test.ts +0 -1046
- package/src/structured.ts +0 -762
- package/src/swarm-decompose.test.ts +0 -188
- package/src/swarm-decompose.ts +0 -1302
- package/src/swarm-deferred.integration.test.ts +0 -157
- package/src/swarm-deferred.test.ts +0 -38
- package/src/swarm-insights.test.ts +0 -214
- package/src/swarm-insights.ts +0 -459
- package/src/swarm-mail.integration.test.ts +0 -970
- package/src/swarm-mail.ts +0 -739
- package/src/swarm-orchestrate.integration.test.ts +0 -282
- package/src/swarm-orchestrate.test.ts +0 -548
- package/src/swarm-orchestrate.ts +0 -3084
- package/src/swarm-prompts.test.ts +0 -1270
- package/src/swarm-prompts.ts +0 -2077
- package/src/swarm-research.integration.test.ts +0 -701
- package/src/swarm-research.test.ts +0 -698
- package/src/swarm-research.ts +0 -472
- package/src/swarm-review.integration.test.ts +0 -285
- package/src/swarm-review.test.ts +0 -879
- package/src/swarm-review.ts +0 -709
- package/src/swarm-strategies.ts +0 -407
- package/src/swarm-worktree.test.ts +0 -501
- package/src/swarm-worktree.ts +0 -575
- package/src/swarm.integration.test.ts +0 -2377
- package/src/swarm.ts +0 -38
- package/src/tool-adapter.integration.test.ts +0 -1221
- package/src/tool-availability.ts +0 -461
- package/tsconfig.json +0 -28
|
@@ -1,327 +0,0 @@
|
|
|
1
|
-
{"timestamp":"2025-12-25T04:28:42.041Z","eval_name":"compaction-prompt","score":0.85,"run_count":1}
|
|
2
|
-
{"timestamp":"2025-12-25T04:28:42.041Z","eval_name":"coordinator-behavior","score":0.85,"run_count":1}
|
|
3
|
-
{"timestamp":"2025-12-25T04:28:42.042Z","eval_name":"coordinator-session","score":0.85,"run_count":1}
|
|
4
|
-
{"timestamp":"2025-12-25T04:28:42.042Z","eval_name":"swarm-decomposition","score":0.85,"run_count":1}
|
|
5
|
-
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"compaction-prompt","score":0.85,"run_count":2}
|
|
6
|
-
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"coordinator-behavior","score":0.85,"run_count":2}
|
|
7
|
-
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"coordinator-session","score":0.85,"run_count":2}
|
|
8
|
-
{"timestamp":"2025-12-25T04:28:52.405Z","eval_name":"swarm-decomposition","score":0.85,"run_count":2}
|
|
9
|
-
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"compaction-prompt","score":0.85,"run_count":3}
|
|
10
|
-
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"coordinator-behavior","score":0.85,"run_count":3}
|
|
11
|
-
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"coordinator-session","score":0.85,"run_count":3}
|
|
12
|
-
{"timestamp":"2025-12-25T05:11:18.469Z","eval_name":"swarm-decomposition","score":0.85,"run_count":3}
|
|
13
|
-
{"timestamp":"2025-12-25T16:30:42.957Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":1}
|
|
14
|
-
{"timestamp":"2025-12-25T16:30:42.957Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":1}
|
|
15
|
-
{"timestamp":"2025-12-25T16:30:42.957Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":1}
|
|
16
|
-
{"timestamp":"2025-12-25T16:30:42.957Z","eval_name":"Swarm Decomposition Quality","score":0.7213888888888889,"run_count":1}
|
|
17
|
-
{"timestamp":"2025-12-25T16:30:42.957Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":1}
|
|
18
|
-
{"timestamp":"2025-12-25T16:30:42.957Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":1}
|
|
19
|
-
{"timestamp":"2025-12-25T16:30:42.957Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":1}
|
|
20
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"No False Positives","score":1,"run_count":1}
|
|
21
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"Example: Basic scorer test","score":1,"run_count":1}
|
|
22
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":1}
|
|
23
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"Placeholder Detection","score":0,"run_count":1}
|
|
24
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"Generic Instructions Fail","score":0,"run_count":1}
|
|
25
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"First Tool Discipline","score":0,"run_count":1}
|
|
26
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":1}
|
|
27
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":1}
|
|
28
|
-
{"timestamp":"2025-12-25T16:30:42.958Z","eval_name":"Coordinator Behavior After Compaction","score":0.8645833333333333,"run_count":1}
|
|
29
|
-
{"timestamp":"2025-12-25T16:30:43.088Z","eval_name":"Example: Basic scorer test","score":1,"run_count":2}
|
|
30
|
-
{"timestamp":"2025-12-25T16:30:43.202Z","eval_name":"Example: Basic scorer test","score":1,"run_count":3}
|
|
31
|
-
{"timestamp":"2025-12-25T16:30:43.316Z","eval_name":"Example: Basic scorer test","score":1,"run_count":4}
|
|
32
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":2}
|
|
33
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":2}
|
|
34
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":2}
|
|
35
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Swarm Decomposition Quality","score":0.6748148148148146,"run_count":2}
|
|
36
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":2}
|
|
37
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":2}
|
|
38
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":2}
|
|
39
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"No False Positives","score":1,"run_count":2}
|
|
40
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Example: Basic scorer test","score":1,"run_count":5}
|
|
41
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Coordinator Behavior After Compaction","score":1,"run_count":2}
|
|
42
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":2}
|
|
43
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":2}
|
|
44
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Placeholder Detection","score":0,"run_count":2}
|
|
45
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Generic Instructions Fail","score":0,"run_count":2}
|
|
46
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"First Tool Discipline","score":0,"run_count":2}
|
|
47
|
-
{"timestamp":"2025-12-25T16:31:17.738Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":2}
|
|
48
|
-
{"timestamp":"2025-12-25T16:31:17.874Z","eval_name":"Example: Basic scorer test","score":1,"run_count":6}
|
|
49
|
-
{"timestamp":"2025-12-25T16:31:17.995Z","eval_name":"Example: Basic scorer test","score":1,"run_count":7}
|
|
50
|
-
{"timestamp":"2025-12-25T16:31:18.113Z","eval_name":"Example: Basic scorer test","score":1,"run_count":8}
|
|
51
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":3}
|
|
52
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":3}
|
|
53
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":3}
|
|
54
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Swarm Decomposition Quality","score":0.6988888888888889,"run_count":3}
|
|
55
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":3}
|
|
56
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":3}
|
|
57
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":3}
|
|
58
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"No False Positives","score":1,"run_count":3}
|
|
59
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Example: Basic scorer test","score":1,"run_count":9}
|
|
60
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":3}
|
|
61
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":3}
|
|
62
|
-
{"timestamp":"2025-12-25T16:32:49.807Z","eval_name":"Placeholder Detection","score":0,"run_count":3}
|
|
63
|
-
{"timestamp":"2025-12-25T16:32:49.808Z","eval_name":"Generic Instructions Fail","score":0,"run_count":3}
|
|
64
|
-
{"timestamp":"2025-12-25T16:32:49.808Z","eval_name":"First Tool Discipline","score":0,"run_count":3}
|
|
65
|
-
{"timestamp":"2025-12-25T16:32:49.808Z","eval_name":"Coordinator Behavior After Compaction","score":1,"run_count":3}
|
|
66
|
-
{"timestamp":"2025-12-25T16:32:49.808Z","eval_name":"Coordinator Resists Direct Implementation","score":0.9375,"run_count":3}
|
|
67
|
-
{"timestamp":"2025-12-25T16:32:49.943Z","eval_name":"Example: Basic scorer test","score":1,"run_count":10}
|
|
68
|
-
{"timestamp":"2025-12-25T16:32:50.073Z","eval_name":"Example: Basic scorer test","score":1,"run_count":11}
|
|
69
|
-
{"timestamp":"2025-12-25T16:32:50.199Z","eval_name":"Example: Basic scorer test","score":1,"run_count":12}
|
|
70
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":4}
|
|
71
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":4}
|
|
72
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":4}
|
|
73
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Swarm Decomposition Quality","score":0.6798611111111109,"run_count":4}
|
|
74
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":4}
|
|
75
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":4}
|
|
76
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":4}
|
|
77
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"No False Positives","score":1,"run_count":4}
|
|
78
|
-
{"timestamp":"2025-12-25T16:35:31.083Z","eval_name":"Example: Basic scorer test","score":1,"run_count":13}
|
|
79
|
-
{"timestamp":"2025-12-25T16:35:31.084Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":4}
|
|
80
|
-
{"timestamp":"2025-12-25T16:35:31.084Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":4}
|
|
81
|
-
{"timestamp":"2025-12-25T16:35:31.084Z","eval_name":"Placeholder Detection","score":0,"run_count":4}
|
|
82
|
-
{"timestamp":"2025-12-25T16:35:31.084Z","eval_name":"Generic Instructions Fail","score":0,"run_count":4}
|
|
83
|
-
{"timestamp":"2025-12-25T16:35:31.084Z","eval_name":"First Tool Discipline","score":0,"run_count":4}
|
|
84
|
-
{"timestamp":"2025-12-25T16:35:31.084Z","eval_name":"Coordinator Resists Direct Implementation","score":0.9375,"run_count":4}
|
|
85
|
-
{"timestamp":"2025-12-25T16:35:31.084Z","eval_name":"Coordinator Behavior After Compaction","score":0.8645833333333333,"run_count":4}
|
|
86
|
-
{"timestamp":"2025-12-25T16:35:31.221Z","eval_name":"Example: Basic scorer test","score":1,"run_count":14}
|
|
87
|
-
{"timestamp":"2025-12-25T16:35:31.329Z","eval_name":"Example: Basic scorer test","score":1,"run_count":15}
|
|
88
|
-
{"timestamp":"2025-12-25T16:35:31.444Z","eval_name":"Example: Basic scorer test","score":1,"run_count":16}
|
|
89
|
-
{"timestamp":"2025-12-25T16:35:31.685Z","eval_name":"Example: Basic scorer test","score":1,"run_count":17}
|
|
90
|
-
{"timestamp":"2025-12-25T16:35:31.843Z","eval_name":"Example: Basic scorer test","score":1,"run_count":18}
|
|
91
|
-
{"timestamp":"2025-12-25T16:35:31.962Z","eval_name":"Example: Basic scorer test","score":1,"run_count":19}
|
|
92
|
-
{"timestamp":"2025-12-25T16:35:32.076Z","eval_name":"Example: Basic scorer test","score":1,"run_count":20}
|
|
93
|
-
{"timestamp":"2025-12-25T16:36:03.596Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":5}
|
|
94
|
-
{"timestamp":"2025-12-25T16:36:03.596Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":5}
|
|
95
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":5}
|
|
96
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Swarm Decomposition Quality","score":0.6845833333333333,"run_count":5}
|
|
97
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":5}
|
|
98
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":5}
|
|
99
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":5}
|
|
100
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"No False Positives","score":1,"run_count":5}
|
|
101
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Example: Basic scorer test","score":1,"run_count":21}
|
|
102
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":5}
|
|
103
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":5}
|
|
104
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Placeholder Detection","score":0,"run_count":5}
|
|
105
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Generic Instructions Fail","score":0,"run_count":5}
|
|
106
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"First Tool Discipline","score":0,"run_count":5}
|
|
107
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Coordinator Behavior After Compaction","score":1,"run_count":5}
|
|
108
|
-
{"timestamp":"2025-12-25T16:36:03.597Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":5}
|
|
109
|
-
{"timestamp":"2025-12-25T16:36:03.727Z","eval_name":"Example: Basic scorer test","score":1,"run_count":22}
|
|
110
|
-
{"timestamp":"2025-12-25T16:36:03.842Z","eval_name":"Example: Basic scorer test","score":1,"run_count":23}
|
|
111
|
-
{"timestamp":"2025-12-25T16:36:03.951Z","eval_name":"Example: Basic scorer test","score":1,"run_count":24}
|
|
112
|
-
{"timestamp":"2025-12-25T16:36:04.183Z","eval_name":"Example: Basic scorer test","score":1,"run_count":25}
|
|
113
|
-
{"timestamp":"2025-12-25T16:36:04.330Z","eval_name":"Example: Basic scorer test","score":1,"run_count":26}
|
|
114
|
-
{"timestamp":"2025-12-25T16:36:04.445Z","eval_name":"Example: Basic scorer test","score":1,"run_count":27}
|
|
115
|
-
{"timestamp":"2025-12-25T16:36:04.555Z","eval_name":"Example: Basic scorer test","score":1,"run_count":28}
|
|
116
|
-
{"timestamp":"2025-12-25T16:36:32.176Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":6}
|
|
117
|
-
{"timestamp":"2025-12-25T16:36:32.176Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":6}
|
|
118
|
-
{"timestamp":"2025-12-25T16:36:32.176Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":6}
|
|
119
|
-
{"timestamp":"2025-12-25T16:36:32.176Z","eval_name":"Swarm Decomposition Quality","score":0.6852777777777778,"run_count":6}
|
|
120
|
-
{"timestamp":"2025-12-25T16:36:32.176Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":6}
|
|
121
|
-
{"timestamp":"2025-12-25T16:36:32.176Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":6}
|
|
122
|
-
{"timestamp":"2025-12-25T16:36:32.176Z","eval_name":"No False Positives","score":1,"run_count":6}
|
|
123
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Example: Basic scorer test","score":1,"run_count":29}
|
|
124
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":6}
|
|
125
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":6}
|
|
126
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Placeholder Detection","score":0,"run_count":6}
|
|
127
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Generic Instructions Fail","score":0,"run_count":6}
|
|
128
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"First Tool Discipline","score":0,"run_count":6}
|
|
129
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Coordinator Behavior After Compaction","score":0.978125,"run_count":6}
|
|
130
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":6}
|
|
131
|
-
{"timestamp":"2025-12-25T16:36:32.177Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":6}
|
|
132
|
-
{"timestamp":"2025-12-25T16:36:32.305Z","eval_name":"Example: Basic scorer test","score":1,"run_count":30}
|
|
133
|
-
{"timestamp":"2025-12-25T16:36:32.416Z","eval_name":"Example: Basic scorer test","score":1,"run_count":31}
|
|
134
|
-
{"timestamp":"2025-12-25T16:36:32.527Z","eval_name":"Example: Basic scorer test","score":1,"run_count":32}
|
|
135
|
-
{"timestamp":"2025-12-25T16:36:32.755Z","eval_name":"Example: Basic scorer test","score":1,"run_count":33}
|
|
136
|
-
{"timestamp":"2025-12-25T16:36:32.957Z","eval_name":"Example: Basic scorer test","score":1,"run_count":34}
|
|
137
|
-
{"timestamp":"2025-12-25T16:36:33.071Z","eval_name":"Example: Basic scorer test","score":1,"run_count":35}
|
|
138
|
-
{"timestamp":"2025-12-25T16:36:33.180Z","eval_name":"Example: Basic scorer test","score":1,"run_count":36}
|
|
139
|
-
{"timestamp":"2025-12-25T16:38:02.146Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":7}
|
|
140
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":7}
|
|
141
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":7}
|
|
142
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Swarm Decomposition Quality","score":0.6726388888888888,"run_count":7}
|
|
143
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":7}
|
|
144
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":7}
|
|
145
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":7}
|
|
146
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"No False Positives","score":1,"run_count":7}
|
|
147
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Example: Basic scorer test","score":1,"run_count":37}
|
|
148
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":7}
|
|
149
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":7}
|
|
150
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Placeholder Detection","score":0,"run_count":7}
|
|
151
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"Generic Instructions Fail","score":0,"run_count":7}
|
|
152
|
-
{"timestamp":"2025-12-25T16:38:02.147Z","eval_name":"First Tool Discipline","score":0,"run_count":7}
|
|
153
|
-
{"timestamp":"2025-12-25T16:38:02.148Z","eval_name":"Coordinator Behavior After Compaction","score":0.8645833333333333,"run_count":7}
|
|
154
|
-
{"timestamp":"2025-12-25T16:38:02.148Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":7}
|
|
155
|
-
{"timestamp":"2025-12-25T16:38:02.276Z","eval_name":"Example: Basic scorer test","score":1,"run_count":38}
|
|
156
|
-
{"timestamp":"2025-12-25T16:38:02.428Z","eval_name":"Example: Basic scorer test","score":1,"run_count":39}
|
|
157
|
-
{"timestamp":"2025-12-25T16:38:02.547Z","eval_name":"Example: Basic scorer test","score":1,"run_count":40}
|
|
158
|
-
{"timestamp":"2025-12-25T16:38:02.782Z","eval_name":"Example: Basic scorer test","score":1,"run_count":41}
|
|
159
|
-
{"timestamp":"2025-12-25T16:38:02.933Z","eval_name":"Example: Basic scorer test","score":1,"run_count":42}
|
|
160
|
-
{"timestamp":"2025-12-25T16:38:03.050Z","eval_name":"Example: Basic scorer test","score":1,"run_count":43}
|
|
161
|
-
{"timestamp":"2025-12-25T16:38:03.165Z","eval_name":"Example: Basic scorer test","score":1,"run_count":44}
|
|
162
|
-
{"timestamp":"2025-12-25T16:38:52.756Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":8}
|
|
163
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":8}
|
|
164
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":8}
|
|
165
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Swarm Decomposition Quality","score":0.695,"run_count":8}
|
|
166
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":8}
|
|
167
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":8}
|
|
168
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":8}
|
|
169
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"No False Positives","score":1,"run_count":8}
|
|
170
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Example: Basic scorer test","score":1,"run_count":45}
|
|
171
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":8}
|
|
172
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Placeholder Detection","score":0,"run_count":8}
|
|
173
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Generic Instructions Fail","score":0,"run_count":8}
|
|
174
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"First Tool Discipline","score":0,"run_count":8}
|
|
175
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":8}
|
|
176
|
-
{"timestamp":"2025-12-25T16:38:52.757Z","eval_name":"Coordinator Behavior After Compaction","score":0.9526041666666667,"run_count":8}
|
|
177
|
-
{"timestamp":"2025-12-25T16:38:52.758Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":8}
|
|
178
|
-
{"timestamp":"2025-12-25T16:38:52.903Z","eval_name":"Example: Basic scorer test","score":1,"run_count":46}
|
|
179
|
-
{"timestamp":"2025-12-25T16:38:53.020Z","eval_name":"Example: Basic scorer test","score":1,"run_count":47}
|
|
180
|
-
{"timestamp":"2025-12-25T16:38:53.136Z","eval_name":"Example: Basic scorer test","score":1,"run_count":48}
|
|
181
|
-
{"timestamp":"2025-12-25T16:38:53.367Z","eval_name":"Example: Basic scorer test","score":1,"run_count":49}
|
|
182
|
-
{"timestamp":"2025-12-25T16:38:53.511Z","eval_name":"Example: Basic scorer test","score":1,"run_count":50}
|
|
183
|
-
{"timestamp":"2025-12-25T16:38:53.624Z","eval_name":"Example: Basic scorer test","score":1,"run_count":51}
|
|
184
|
-
{"timestamp":"2025-12-25T16:38:53.737Z","eval_name":"Example: Basic scorer test","score":1,"run_count":52}
|
|
185
|
-
{"timestamp":"2025-12-25T16:40:39.219Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":9}
|
|
186
|
-
{"timestamp":"2025-12-25T16:40:39.219Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":9}
|
|
187
|
-
{"timestamp":"2025-12-25T16:40:39.219Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":9}
|
|
188
|
-
{"timestamp":"2025-12-25T16:40:39.219Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":9}
|
|
189
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Swarm Decomposition Quality","score":0.7020833333333334,"run_count":9}
|
|
190
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":9}
|
|
191
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"No False Positives","score":1,"run_count":9}
|
|
192
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":9}
|
|
193
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Example: Basic scorer test","score":1,"run_count":53}
|
|
194
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Coordinator Behavior After Compaction","score":0.7291666666666666,"run_count":9}
|
|
195
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":9}
|
|
196
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":9}
|
|
197
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":9}
|
|
198
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Placeholder Detection","score":0,"run_count":9}
|
|
199
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"Generic Instructions Fail","score":0,"run_count":9}
|
|
200
|
-
{"timestamp":"2025-12-25T16:40:39.220Z","eval_name":"First Tool Discipline","score":0,"run_count":9}
|
|
201
|
-
{"timestamp":"2025-12-25T16:40:39.352Z","eval_name":"Example: Basic scorer test","score":1,"run_count":54}
|
|
202
|
-
{"timestamp":"2025-12-25T16:40:39.460Z","eval_name":"Example: Basic scorer test","score":1,"run_count":55}
|
|
203
|
-
{"timestamp":"2025-12-25T16:40:39.572Z","eval_name":"Example: Basic scorer test","score":1,"run_count":56}
|
|
204
|
-
{"timestamp":"2025-12-25T16:40:39.816Z","eval_name":"Example: Basic scorer test","score":1,"run_count":57}
|
|
205
|
-
{"timestamp":"2025-12-25T16:40:39.947Z","eval_name":"Example: Basic scorer test","score":1,"run_count":58}
|
|
206
|
-
{"timestamp":"2025-12-25T16:40:40.084Z","eval_name":"Example: Basic scorer test","score":1,"run_count":59}
|
|
207
|
-
{"timestamp":"2025-12-25T16:40:40.202Z","eval_name":"Example: Basic scorer test","score":1,"run_count":60}
|
|
208
|
-
{"timestamp":"2025-12-25T16:43:12.851Z","eval_name":"Example: Basic scorer test","score":1,"run_count":61}
|
|
209
|
-
{"timestamp":"2025-12-25T16:43:43.041Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":10}
|
|
210
|
-
{"timestamp":"2025-12-25T16:43:43.041Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":10}
|
|
211
|
-
{"timestamp":"2025-12-25T16:43:43.041Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":10}
|
|
212
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Swarm Decomposition Quality","score":0.6909722222222222,"run_count":10}
|
|
213
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":10}
|
|
214
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":10}
|
|
215
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":10}
|
|
216
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"No False Positives","score":1,"run_count":10}
|
|
217
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Example: Basic scorer test","score":1,"run_count":62}
|
|
218
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":10}
|
|
219
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Placeholder Detection","score":0,"run_count":10}
|
|
220
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Generic Instructions Fail","score":0,"run_count":10}
|
|
221
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"First Tool Discipline","score":0,"run_count":10}
|
|
222
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":10}
|
|
223
|
-
{"timestamp":"2025-12-25T16:43:43.042Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":10}
|
|
224
|
-
{"timestamp":"2025-12-25T16:43:43.043Z","eval_name":"Coordinator Behavior After Compaction","score":1,"run_count":10}
|
|
225
|
-
{"timestamp":"2025-12-25T16:44:12.471Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":11}
|
|
226
|
-
{"timestamp":"2025-12-25T16:44:12.471Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":11}
|
|
227
|
-
{"timestamp":"2025-12-25T16:44:12.471Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":11}
|
|
228
|
-
{"timestamp":"2025-12-25T16:44:12.471Z","eval_name":"Swarm Decomposition Quality","score":0.6720833333333333,"run_count":11}
|
|
229
|
-
{"timestamp":"2025-12-25T16:44:12.471Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":11}
|
|
230
|
-
{"timestamp":"2025-12-25T16:44:12.471Z","eval_name":"Example: Basic scorer test","score":1,"run_count":63}
|
|
231
|
-
{"timestamp":"2025-12-25T16:44:12.471Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":11}
|
|
232
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":11}
|
|
233
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"No False Positives","score":1,"run_count":11}
|
|
234
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"Coordinator Behavior After Compaction","score":0.9796875,"run_count":11}
|
|
235
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":11}
|
|
236
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":11}
|
|
237
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":11}
|
|
238
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"Placeholder Detection","score":0,"run_count":11}
|
|
239
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"Generic Instructions Fail","score":0,"run_count":11}
|
|
240
|
-
{"timestamp":"2025-12-25T16:44:12.472Z","eval_name":"First Tool Discipline","score":0,"run_count":11}
|
|
241
|
-
{"timestamp":"2025-12-25T16:49:55.548Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":12}
|
|
242
|
-
{"timestamp":"2025-12-25T16:49:55.549Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":12}
|
|
243
|
-
{"timestamp":"2025-12-25T16:49:55.555Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":12}
|
|
244
|
-
{"timestamp":"2025-12-25T16:49:55.555Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":12}
|
|
245
|
-
{"timestamp":"2025-12-25T16:49:55.555Z","eval_name":"Swarm Decomposition Quality","score":0.7001388888888888,"run_count":12}
|
|
246
|
-
{"timestamp":"2025-12-25T16:49:55.555Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":12}
|
|
247
|
-
{"timestamp":"2025-12-25T16:49:55.556Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":12}
|
|
248
|
-
{"timestamp":"2025-12-25T16:49:55.557Z","eval_name":"No False Positives","score":1,"run_count":12}
|
|
249
|
-
{"timestamp":"2025-12-25T16:49:55.557Z","eval_name":"Example: Basic scorer test","score":1,"run_count":64}
|
|
250
|
-
{"timestamp":"2025-12-25T16:49:55.557Z","eval_name":"Generic Instructions Fail","score":0,"run_count":12}
|
|
251
|
-
{"timestamp":"2025-12-25T16:49:55.557Z","eval_name":"First Tool Discipline","score":0,"run_count":12}
|
|
252
|
-
{"timestamp":"2025-12-25T16:49:55.561Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":12}
|
|
253
|
-
{"timestamp":"2025-12-25T16:49:55.561Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":12}
|
|
254
|
-
{"timestamp":"2025-12-25T16:49:55.561Z","eval_name":"Placeholder Detection","score":0,"run_count":12}
|
|
255
|
-
{"timestamp":"2025-12-25T16:49:55.561Z","eval_name":"Coordinator Behavior After Compaction","score":1,"run_count":12}
|
|
256
|
-
{"timestamp":"2025-12-25T16:49:55.565Z","eval_name":"Coordinator Resists Direct Implementation","score":0.9375,"run_count":12}
|
|
257
|
-
{"timestamp":"2025-12-25T16:49:55.697Z","eval_name":"Example: Basic scorer test","score":1,"run_count":65}
|
|
258
|
-
{"timestamp":"2025-12-25T16:49:55.813Z","eval_name":"Example: Basic scorer test","score":1,"run_count":66}
|
|
259
|
-
{"timestamp":"2025-12-25T16:49:55.934Z","eval_name":"Example: Basic scorer test","score":1,"run_count":67}
|
|
260
|
-
{"timestamp":"2025-12-25T16:49:56.178Z","eval_name":"Example: Basic scorer test","score":1,"run_count":68}
|
|
261
|
-
{"timestamp":"2025-12-25T16:49:56.327Z","eval_name":"Example: Basic scorer test","score":1,"run_count":69}
|
|
262
|
-
{"timestamp":"2025-12-25T16:49:56.446Z","eval_name":"Example: Basic scorer test","score":1,"run_count":70}
|
|
263
|
-
{"timestamp":"2025-12-25T16:49:56.556Z","eval_name":"Example: Basic scorer test","score":1,"run_count":71}
|
|
264
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":13}
|
|
265
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":13}
|
|
266
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":13}
|
|
267
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":13}
|
|
268
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"Swarm Decomposition Quality","score":0.6847222222222221,"run_count":13}
|
|
269
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":13}
|
|
270
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":13}
|
|
271
|
-
{"timestamp":"2025-12-25T17:06:10.610Z","eval_name":"No False Positives","score":1,"run_count":13}
|
|
272
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"Example: Basic scorer test","score":1,"run_count":72}
|
|
273
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":13}
|
|
274
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"Placeholder Detection","score":0,"run_count":13}
|
|
275
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"Generic Instructions Fail","score":0,"run_count":13}
|
|
276
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"First Tool Discipline","score":0,"run_count":13}
|
|
277
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":13}
|
|
278
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"Coordinator Behavior After Compaction","score":0.8645833333333333,"run_count":13}
|
|
279
|
-
{"timestamp":"2025-12-25T17:06:10.611Z","eval_name":"Coordinator Resists Direct Implementation","score":0.9375,"run_count":13}
|
|
280
|
-
{"timestamp":"2025-12-25T18:58:44.923Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":14}
|
|
281
|
-
{"timestamp":"2025-12-25T18:58:44.923Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":14}
|
|
282
|
-
{"timestamp":"2025-12-25T18:58:44.923Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":14}
|
|
283
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"Swarm Decomposition Quality","score":0.7095833333333333,"run_count":14}
|
|
284
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":14}
|
|
285
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":14}
|
|
286
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":14}
|
|
287
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"No False Positives","score":1,"run_count":14}
|
|
288
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"Example: Basic scorer test","score":1,"run_count":73}
|
|
289
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"Generic Instructions Fail","score":0,"run_count":14}
|
|
290
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"First Tool Discipline","score":0,"run_count":14}
|
|
291
|
-
{"timestamp":"2025-12-25T18:58:44.924Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":14}
|
|
292
|
-
{"timestamp":"2025-12-25T18:58:44.925Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":14}
|
|
293
|
-
{"timestamp":"2025-12-25T18:58:44.925Z","eval_name":"Placeholder Detection","score":0,"run_count":14}
|
|
294
|
-
{"timestamp":"2025-12-25T18:58:44.925Z","eval_name":"Coordinator Behavior After Compaction","score":0.9375,"run_count":14}
|
|
295
|
-
{"timestamp":"2025-12-25T18:58:44.925Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":14}
|
|
296
|
-
{"timestamp":"2025-12-25T18:59:58.928Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":15}
|
|
297
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":15}
|
|
298
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":15}
|
|
299
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Swarm Decomposition Quality","score":0.6944444444444443,"run_count":15}
|
|
300
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":15}
|
|
301
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":15}
|
|
302
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":15}
|
|
303
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"No False Positives","score":1,"run_count":15}
|
|
304
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Example: Basic scorer test","score":1,"run_count":74}
|
|
305
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":15}
|
|
306
|
-
{"timestamp":"2025-12-25T18:59:58.929Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":15}
|
|
307
|
-
{"timestamp":"2025-12-25T18:59:58.930Z","eval_name":"Placeholder Detection","score":0,"run_count":15}
|
|
308
|
-
{"timestamp":"2025-12-25T18:59:58.930Z","eval_name":"Generic Instructions Fail","score":0,"run_count":15}
|
|
309
|
-
{"timestamp":"2025-12-25T18:59:58.930Z","eval_name":"First Tool Discipline","score":0,"run_count":15}
|
|
310
|
-
{"timestamp":"2025-12-25T18:59:58.930Z","eval_name":"Coordinator Behavior After Compaction","score":0.9171875,"run_count":15}
|
|
311
|
-
{"timestamp":"2025-12-25T18:59:58.930Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":15}
|
|
312
|
-
{"timestamp":"2025-12-25T19:00:48.709Z","eval_name":"Coordinator Discipline - Synthetic Fixtures","score":0.6354444444444443,"run_count":16}
|
|
313
|
-
{"timestamp":"2025-12-25T19:00:48.709Z","eval_name":"Coordinator Discipline - Real Sessions","score":0,"run_count":16}
|
|
314
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"Coordinator Discipline - Perfect vs Bad","score":0.5416666666666666,"run_count":16}
|
|
315
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"Swarm Decomposition Quality","score":0.5464583333333334,"run_count":16}
|
|
316
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"Decomposition Edge Cases","score":0.775,"run_count":16}
|
|
317
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"Compaction Hook Coordinator Resumption","score":0.95,"run_count":16}
|
|
318
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"Epic ID Specificity","score":0.5,"run_count":16}
|
|
319
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"No False Positives","score":1,"run_count":16}
|
|
320
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"Example: Basic scorer test","score":1,"run_count":75}
|
|
321
|
-
{"timestamp":"2025-12-25T19:00:48.710Z","eval_name":"Perfect Prompt Scores 100%","score":1,"run_count":16}
|
|
322
|
-
{"timestamp":"2025-12-25T19:00:48.711Z","eval_name":"Placeholder Detection","score":0,"run_count":16}
|
|
323
|
-
{"timestamp":"2025-12-25T19:00:48.711Z","eval_name":"Generic Instructions Fail","score":0,"run_count":16}
|
|
324
|
-
{"timestamp":"2025-12-25T19:00:48.711Z","eval_name":"First Tool Discipline","score":0,"run_count":16}
|
|
325
|
-
{"timestamp":"2025-12-25T19:00:48.711Z","eval_name":"Compaction Prompt Quality","score":0.6342857142857142,"run_count":16}
|
|
326
|
-
{"timestamp":"2025-12-25T19:00:48.711Z","eval_name":"Coordinator Behavior After Compaction","score":1,"run_count":16}
|
|
327
|
-
{"timestamp":"2025-12-25T19:00:48.711Z","eval_name":"Coordinator Resists Direct Implementation","score":1,"run_count":16}
|
package/.turbo/turbo-build.log
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
$ bun build ./src/index.ts --outdir ./dist --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && bun build ./src/plugin.ts --outfile ./dist/plugin.js --target node --external @electric-sql/pglite --external swarm-mail --external vitest --external @vitest/ui --external lightningcss && tsc
|
|
2
|
-
Bundled 1349 modules in 297ms
|
|
3
|
-
|
|
4
|
-
index.js 4.34 MB (entry point)
|
|
5
|
-
|
|
6
|
-
Bundled 1350 modules in 197ms
|
|
7
|
-
|
|
8
|
-
plugin.js 4.31 MB (entry point)
|
|
9
|
-
|