create-walle 0.9.13 → 0.9.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -3
- package/bin/create-walle.js +232 -32
- package/bin/mcp-inject.js +18 -53
- package/package.json +3 -1
- package/template/claude-task-manager/api-prompts.js +11 -2
- package/template/claude-task-manager/approval-agent.js +7 -0
- package/template/claude-task-manager/db.js +94 -75
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
- package/template/claude-task-manager/docs/session-tooltip-freshness-design.md +224 -0
- package/template/claude-task-manager/docs/session-ux-issue-review-2026-05-01.md +369 -0
- package/template/claude-task-manager/fuzzy-utils.js +10 -2
- package/template/claude-task-manager/git-utils.js +140 -10
- package/template/claude-task-manager/lib/agent-capabilities.js +1 -1
- package/template/claude-task-manager/lib/agent-presets.js +38 -5
- package/template/claude-task-manager/lib/codex-terminal-final.js +53 -0
- package/template/claude-task-manager/lib/ctm-session-context-api.js +222 -0
- package/template/claude-task-manager/lib/session-diagnostics.js +56 -0
- package/template/claude-task-manager/lib/session-history.js +309 -16
- package/template/claude-task-manager/lib/session-standup.js +409 -0
- package/template/claude-task-manager/lib/session-stream.js +253 -20
- package/template/claude-task-manager/lib/standup-attention.js +200 -0
- package/template/claude-task-manager/lib/status-hooks.js +8 -2
- package/template/claude-task-manager/lib/update-telemetry.js +114 -0
- package/template/claude-task-manager/lib/walle-ctm-history.js +49 -6
- package/template/claude-task-manager/lib/walle-default-model.js +55 -0
- package/template/claude-task-manager/lib/walle-mcp-auto-config.js +66 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +86 -19
- package/template/claude-task-manager/lib/walle-transcript.js +1 -3
- package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
- package/template/claude-task-manager/package.json +1 -0
- package/template/claude-task-manager/providers/codex-mcp.js +104 -0
- package/template/claude-task-manager/providers/index.js +2 -0
- package/template/claude-task-manager/public/css/setup.css +2 -1
- package/template/claude-task-manager/public/css/walle.css +71 -0
- package/template/claude-task-manager/public/index.html +2388 -429
- package/template/claude-task-manager/public/js/message-renderer.js +314 -35
- package/template/claude-task-manager/public/js/session-search-utils.js +185 -3
- package/template/claude-task-manager/public/js/session-status-precedence.js +125 -0
- package/template/claude-task-manager/public/js/setup.js +62 -19
- package/template/claude-task-manager/public/js/stream-view.js +396 -55
- package/template/claude-task-manager/public/js/terminal-restore-state.js +57 -0
- package/template/claude-task-manager/public/js/walle-session.js +234 -26
- package/template/claude-task-manager/public/js/walle.js +143 -2
- package/template/claude-task-manager/server.js +1402 -433
- package/template/claude-task-manager/session-integrity.js +77 -28
- package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
- package/template/claude-task-manager/workers/scrollback-worker.js +5 -6
- package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
- package/template/package.json +1 -1
- package/template/wall-e/agent-runners/claude-code.js +2 -0
- package/template/wall-e/agent.js +63 -8
- package/template/wall-e/api-walle.js +330 -52
- package/template/wall-e/brain.js +291 -42
- package/template/wall-e/chat.js +172 -15
- package/template/wall-e/coding/compaction-service.js +19 -5
- package/template/wall-e/coding/stream-processor.js +22 -2
- package/template/wall-e/coding/workspace-replay.js +1 -4
- package/template/wall-e/coding-orchestrator.js +250 -80
- package/template/wall-e/compat.js +0 -28
- package/template/wall-e/context/context-builder.js +3 -1
- package/template/wall-e/embeddings.js +2 -7
- package/template/wall-e/eval/agent-runner.js +30 -9
- package/template/wall-e/eval/benchmark-generator.js +21 -1
- package/template/wall-e/eval/benchmarks/chat-eval.json +66 -6
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
- package/template/wall-e/eval/cc-replay.js +1 -0
- package/template/wall-e/eval/codex-cli-baseline.js +633 -0
- package/template/wall-e/eval/debug-agent003.js +1 -0
- package/template/wall-e/eval/eval-orchestrator.js +3 -3
- package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
- package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
- package/template/wall-e/eval/run-model-comparison.js +1 -0
- package/template/wall-e/eval/swebench-adapter.js +1 -0
- package/template/wall-e/evaluation/quorum-evaluator.js +0 -1
- package/template/wall-e/extraction/knowledge-extractor.js +1 -2
- package/template/wall-e/lib/mcp-integration.js +336 -0
- package/template/wall-e/llm/ollama.js +47 -8
- package/template/wall-e/llm/ollama.plugin.json +1 -1
- package/template/wall-e/llm/tool-adapter.js +1 -0
- package/template/wall-e/loops/ingest.js +42 -8
- package/template/wall-e/loops/initiative.js +87 -2
- package/template/wall-e/mcp-server.js +872 -19
- package/template/wall-e/memory/ctm-context-client.js +230 -0
- package/template/wall-e/memory/ctm-session-context.js +1376 -0
- package/template/wall-e/prompts/coding/memory-protocol.md +6 -0
- package/template/wall-e/server.js +30 -1
- package/template/wall-e/skills/_bundled/memory-search/SKILL.md +8 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +471 -188
- package/template/wall-e/skills/skill-planner.js +86 -4
- package/template/wall-e/slack/socket-mode-listener.js +276 -0
- package/template/wall-e/telemetry.js +70 -2
- package/template/wall-e/tools/builtin-middleware.js +55 -2
- package/template/wall-e/tools/shell-policy.js +1 -1
- package/template/wall-e/tools/slack-owner.js +104 -0
- package/template/website/index.html +4 -4
- package/template/builder-journal.md +0 -17
|
@@ -1577,601 +1577,5 @@
|
|
|
1577
1577
|
"expectedFileChanges": [],
|
|
1578
1578
|
"projectFixture": "fullstack-app"
|
|
1579
1579
|
}
|
|
1580
|
-
},
|
|
1581
|
-
{
|
|
1582
|
-
"id": "agent-session-e8413ef1",
|
|
1583
|
-
"prompt": "I'll start by exploring the project structure to understand the Express app setup.",
|
|
1584
|
-
"taskType": "coding-agent",
|
|
1585
|
-
"difficulty": "hard",
|
|
1586
|
-
"expectedTraits": [
|
|
1587
|
-
"reads before writing",
|
|
1588
|
-
"uses edit over write"
|
|
1589
|
-
],
|
|
1590
|
-
"agentExpectations": {
|
|
1591
|
-
"expectedToolCalls": [
|
|
1592
|
-
"list_directory",
|
|
1593
|
-
"read_file",
|
|
1594
|
-
"edit_file",
|
|
1595
|
-
"run_shell"
|
|
1596
|
-
],
|
|
1597
|
-
"maxTurns": 22,
|
|
1598
|
-
"expectedFileChanges": [
|
|
1599
|
-
"server.js",
|
|
1600
|
-
"test.js"
|
|
1601
|
-
]
|
|
1602
|
-
},
|
|
1603
|
-
"sourceSessionId": "8a31c591-bbaf-403f-b82e-3ec093618584",
|
|
1604
|
-
"classifiedType": "coding:generation",
|
|
1605
|
-
"expectedDiff": null,
|
|
1606
|
-
"complexityIndicator": 2
|
|
1607
|
-
},
|
|
1608
|
-
{
|
|
1609
|
-
"id": "agent-session-02a0e2b9",
|
|
1610
|
-
"prompt": "I'll help you rename the 'getData' function to 'fetchRecords' across the project. Let me start by exploring the project structure and finding all occurrences.",
|
|
1611
|
-
"taskType": "coding-agent",
|
|
1612
|
-
"difficulty": "hard",
|
|
1613
|
-
"expectedTraits": [
|
|
1614
|
-
"reads before writing",
|
|
1615
|
-
"uses edit over write"
|
|
1616
|
-
],
|
|
1617
|
-
"agentExpectations": {
|
|
1618
|
-
"expectedToolCalls": [
|
|
1619
|
-
"list_directory",
|
|
1620
|
-
"read_file",
|
|
1621
|
-
"grep_files",
|
|
1622
|
-
"glob",
|
|
1623
|
-
"ask_user",
|
|
1624
|
-
"edit_file",
|
|
1625
|
-
"run_shell"
|
|
1626
|
-
],
|
|
1627
|
-
"maxTurns": 50,
|
|
1628
|
-
"expectedFileChanges": [
|
|
1629
|
-
"server.js",
|
|
1630
|
-
"test.js"
|
|
1631
|
-
]
|
|
1632
|
-
},
|
|
1633
|
-
"sourceSessionId": "07b948cd-2324-442b-849b-ff88de26783d",
|
|
1634
|
-
"classifiedType": "coding:refactoring",
|
|
1635
|
-
"expectedDiff": null,
|
|
1636
|
-
"complexityIndicator": 2
|
|
1637
|
-
},
|
|
1638
|
-
{
|
|
1639
|
-
"id": "agent-session-a78f1dc7",
|
|
1640
|
-
"prompt": "I'll help you find and fix the SQL injection vulnerability. Let me start by exploring the project structure.",
|
|
1641
|
-
"taskType": "coding-agent",
|
|
1642
|
-
"difficulty": "hard",
|
|
1643
|
-
"expectedTraits": [
|
|
1644
|
-
"reads before writing",
|
|
1645
|
-
"uses edit over write"
|
|
1646
|
-
],
|
|
1647
|
-
"agentExpectations": {
|
|
1648
|
-
"expectedToolCalls": [
|
|
1649
|
-
"list_directory",
|
|
1650
|
-
"read_file",
|
|
1651
|
-
"grep_files",
|
|
1652
|
-
"edit_file",
|
|
1653
|
-
"run_shell"
|
|
1654
|
-
],
|
|
1655
|
-
"maxTurns": 30,
|
|
1656
|
-
"expectedFileChanges": [
|
|
1657
|
-
"server.js",
|
|
1658
|
-
"test.js"
|
|
1659
|
-
]
|
|
1660
|
-
},
|
|
1661
|
-
"sourceSessionId": "dcce8928-0376-4058-b5fd-321f50c39783",
|
|
1662
|
-
"classifiedType": "coding:debugging",
|
|
1663
|
-
"expectedDiff": null,
|
|
1664
|
-
"complexityIndicator": 2
|
|
1665
|
-
},
|
|
1666
|
-
{
|
|
1667
|
-
"id": "agent-session-1348957e",
|
|
1668
|
-
"prompt": "I'll help you add tests for both ascending and descending sort. Let me start by exploring the project structure to understand what we're working with.",
|
|
1669
|
-
"taskType": "coding-agent",
|
|
1670
|
-
"difficulty": "medium",
|
|
1671
|
-
"expectedTraits": [
|
|
1672
|
-
"reads before writing",
|
|
1673
|
-
"uses edit over write"
|
|
1674
|
-
],
|
|
1675
|
-
"agentExpectations": {
|
|
1676
|
-
"expectedToolCalls": [
|
|
1677
|
-
"list_directory",
|
|
1678
|
-
"read_file",
|
|
1679
|
-
"run_shell",
|
|
1680
|
-
"ask_user",
|
|
1681
|
-
"edit_file"
|
|
1682
|
-
],
|
|
1683
|
-
"maxTurns": 20,
|
|
1684
|
-
"expectedFileChanges": [
|
|
1685
|
-
"server.js",
|
|
1686
|
-
"test.js"
|
|
1687
|
-
]
|
|
1688
|
-
},
|
|
1689
|
-
"sourceSessionId": "31c2980b-95ce-4685-bdd5-46c92fe9aa36",
|
|
1690
|
-
"classifiedType": "coding:testing",
|
|
1691
|
-
"expectedDiff": null,
|
|
1692
|
-
"complexityIndicator": 2
|
|
1693
|
-
},
|
|
1694
|
-
{
|
|
1695
|
-
"id": "agent-session-f7edd420",
|
|
1696
|
-
"prompt": "I'll start by exploring the project structure to understand the CLI tool setup.",
|
|
1697
|
-
"taskType": "coding-agent",
|
|
1698
|
-
"difficulty": "hard",
|
|
1699
|
-
"expectedTraits": [
|
|
1700
|
-
"reads before writing",
|
|
1701
|
-
"uses edit over write"
|
|
1702
|
-
],
|
|
1703
|
-
"agentExpectations": {
|
|
1704
|
-
"expectedToolCalls": [
|
|
1705
|
-
"list_directory",
|
|
1706
|
-
"read_file",
|
|
1707
|
-
"edit_file",
|
|
1708
|
-
"run_shell"
|
|
1709
|
-
],
|
|
1710
|
-
"maxTurns": 22,
|
|
1711
|
-
"expectedFileChanges": [
|
|
1712
|
-
"index.js",
|
|
1713
|
-
"test.js"
|
|
1714
|
-
]
|
|
1715
|
-
},
|
|
1716
|
-
"sourceSessionId": "5719d4dd-fcdb-47b6-9874-60f4b9fbfb71",
|
|
1717
|
-
"classifiedType": "coding:generation",
|
|
1718
|
-
"expectedDiff": null,
|
|
1719
|
-
"complexityIndicator": 2
|
|
1720
|
-
},
|
|
1721
|
-
{
|
|
1722
|
-
"id": "agent-session-b36c5122",
|
|
1723
|
-
"prompt": "I'll start by exploring the project structure to understand what we're working with.",
|
|
1724
|
-
"taskType": "coding-agent",
|
|
1725
|
-
"difficulty": "hard",
|
|
1726
|
-
"expectedTraits": [
|
|
1727
|
-
"reads before writing",
|
|
1728
|
-
"uses edit over write",
|
|
1729
|
-
"plans before executing"
|
|
1730
|
-
],
|
|
1731
|
-
"agentExpectations": {
|
|
1732
|
-
"expectedToolCalls": [
|
|
1733
|
-
"list_directory",
|
|
1734
|
-
"read_file",
|
|
1735
|
-
"run_shell",
|
|
1736
|
-
"update_todos",
|
|
1737
|
-
"edit_file"
|
|
1738
|
-
],
|
|
1739
|
-
"maxTurns": 36,
|
|
1740
|
-
"expectedFileChanges": [
|
|
1741
|
-
"server/db.js",
|
|
1742
|
-
"server/index.js"
|
|
1743
|
-
]
|
|
1744
|
-
},
|
|
1745
|
-
"sourceSessionId": "f78fa371-7e29-4a52-8f4f-b992fdaccc0d",
|
|
1746
|
-
"classifiedType": "coding:generation",
|
|
1747
|
-
"expectedDiff": null,
|
|
1748
|
-
"complexityIndicator": 2
|
|
1749
|
-
},
|
|
1750
|
-
{
|
|
1751
|
-
"id": "agent-session-29b64889",
|
|
1752
|
-
"prompt": "I'll start by exploring the project structure and understanding what tests exist.",
|
|
1753
|
-
"taskType": "coding-agent",
|
|
1754
|
-
"difficulty": "hard",
|
|
1755
|
-
"expectedTraits": [
|
|
1756
|
-
"reads before writing",
|
|
1757
|
-
"uses edit over write"
|
|
1758
|
-
],
|
|
1759
|
-
"agentExpectations": {
|
|
1760
|
-
"expectedToolCalls": [
|
|
1761
|
-
"list_directory",
|
|
1762
|
-
"read_file",
|
|
1763
|
-
"run_shell",
|
|
1764
|
-
"grep_files",
|
|
1765
|
-
"edit_file"
|
|
1766
|
-
],
|
|
1767
|
-
"maxTurns": 50,
|
|
1768
|
-
"expectedFileChanges": [
|
|
1769
|
-
"server/test.js",
|
|
1770
|
-
"server/db.js"
|
|
1771
|
-
]
|
|
1772
|
-
},
|
|
1773
|
-
"sourceSessionId": "25da8640-eaba-42a6-9eb2-a5831c2162fd",
|
|
1774
|
-
"classifiedType": "coding:testing",
|
|
1775
|
-
"expectedDiff": null,
|
|
1776
|
-
"complexityIndicator": 2
|
|
1777
|
-
},
|
|
1778
|
-
{
|
|
1779
|
-
"id": "agent-session-2494dd1d",
|
|
1780
|
-
"prompt": "I'll help you extract inline SQL queries from route handlers into a separate db.js module. Let me start by exploring the project structure.",
|
|
1781
|
-
"taskType": "coding-agent",
|
|
1782
|
-
"difficulty": "hard",
|
|
1783
|
-
"expectedTraits": [
|
|
1784
|
-
"reads before writing",
|
|
1785
|
-
"uses edit over write",
|
|
1786
|
-
"plans before executing"
|
|
1787
|
-
],
|
|
1788
|
-
"agentExpectations": {
|
|
1789
|
-
"expectedToolCalls": [
|
|
1790
|
-
"list_directory",
|
|
1791
|
-
"read_file",
|
|
1792
|
-
"run_shell",
|
|
1793
|
-
"update_todos",
|
|
1794
|
-
"lsp_references",
|
|
1795
|
-
"grep_files",
|
|
1796
|
-
"edit_file"
|
|
1797
|
-
],
|
|
1798
|
-
"maxTurns": 50,
|
|
1799
|
-
"expectedFileChanges": [
|
|
1800
|
-
"server/db.js",
|
|
1801
|
-
"server/routes/auth.js",
|
|
1802
|
-
"server/routes/users.js",
|
|
1803
|
-
"server/routes/items.js",
|
|
1804
|
-
"server/routes/health.js",
|
|
1805
|
-
"server/middleware/auth.js"
|
|
1806
|
-
]
|
|
1807
|
-
},
|
|
1808
|
-
"sourceSessionId": "08d10d2e-3e63-4354-bf05-8786388b2420",
|
|
1809
|
-
"classifiedType": "coding:refactoring",
|
|
1810
|
-
"expectedDiff": null,
|
|
1811
|
-
"complexityIndicator": 6
|
|
1812
|
-
},
|
|
1813
|
-
{
|
|
1814
|
-
"id": "agent-session-5fcb2df4",
|
|
1815
|
-
"prompt": "I'll start by exploring the project structure to understand the fullstack app, then create the CI/CD pipeline and containerization files.",
|
|
1816
|
-
"taskType": "coding-agent",
|
|
1817
|
-
"difficulty": "hard",
|
|
1818
|
-
"expectedTraits": [
|
|
1819
|
-
"reads before writing"
|
|
1820
|
-
],
|
|
1821
|
-
"agentExpectations": {
|
|
1822
|
-
"expectedToolCalls": [
|
|
1823
|
-
"list_directory",
|
|
1824
|
-
"read_file",
|
|
1825
|
-
"glob",
|
|
1826
|
-
"write_file",
|
|
1827
|
-
"edit_file",
|
|
1828
|
-
"run_shell"
|
|
1829
|
-
],
|
|
1830
|
-
"maxTurns": 50,
|
|
1831
|
-
"expectedFileChanges": [
|
|
1832
|
-
".github/workflows/ci.yml",
|
|
1833
|
-
"Dockerfile",
|
|
1834
|
-
"docker-compose.yml",
|
|
1835
|
-
".dockerignore",
|
|
1836
|
-
"config/default.js",
|
|
1837
|
-
"DOCKER.md",
|
|
1838
|
-
"CI_CD_SETUP.md",
|
|
1839
|
-
"QUICKSTART.md",
|
|
1840
|
-
"IMPLEMENTATION_SUMMARY.md",
|
|
1841
|
-
"DEPLOYMENT_CHECKLIST.md",
|
|
1842
|
-
"CHANGES.md",
|
|
1843
|
-
"README_CI_CD.md"
|
|
1844
|
-
]
|
|
1845
|
-
},
|
|
1846
|
-
"sourceSessionId": "9055411d-427f-498b-b501-b7f695b22214",
|
|
1847
|
-
"classifiedType": "coding:generation",
|
|
1848
|
-
"expectedDiff": null,
|
|
1849
|
-
"complexityIndicator": 12
|
|
1850
|
-
},
|
|
1851
|
-
{
|
|
1852
|
-
"id": "agent-session-1c9213cc",
|
|
1853
|
-
"prompt": "I'll start by exploring the project structure to understand the current implementation.",
|
|
1854
|
-
"taskType": "coding-agent",
|
|
1855
|
-
"difficulty": "hard",
|
|
1856
|
-
"expectedTraits": [
|
|
1857
|
-
"reads before writing",
|
|
1858
|
-
"uses edit over write"
|
|
1859
|
-
],
|
|
1860
|
-
"agentExpectations": {
|
|
1861
|
-
"expectedToolCalls": [
|
|
1862
|
-
"list_directory",
|
|
1863
|
-
"read_file",
|
|
1864
|
-
"edit_file",
|
|
1865
|
-
"run_shell"
|
|
1866
|
-
],
|
|
1867
|
-
"maxTurns": 50,
|
|
1868
|
-
"expectedFileChanges": [
|
|
1869
|
-
"server/routes/items.js",
|
|
1870
|
-
"public/js/items.js",
|
|
1871
|
-
"public/js/app.js",
|
|
1872
|
-
"server/test.js"
|
|
1873
|
-
]
|
|
1874
|
-
},
|
|
1875
|
-
"sourceSessionId": "8a0a1609-9031-4bcc-86ff-e690fef3b290",
|
|
1876
|
-
"classifiedType": "coding:generation",
|
|
1877
|
-
"expectedDiff": null,
|
|
1878
|
-
"complexityIndicator": 4
|
|
1879
|
-
},
|
|
1880
|
-
{
|
|
1881
|
-
"id": "agent-session-63ada37e",
|
|
1882
|
-
"prompt": "I'll help you rename the function 'processData' to 'transformPayload' across the entire project. Let me start by exploring the project structure and finding all usages.",
|
|
1883
|
-
"taskType": "coding-agent",
|
|
1884
|
-
"difficulty": "hard",
|
|
1885
|
-
"expectedTraits": [
|
|
1886
|
-
"reads before writing",
|
|
1887
|
-
"uses edit over write"
|
|
1888
|
-
],
|
|
1889
|
-
"agentExpectations": {
|
|
1890
|
-
"expectedToolCalls": [
|
|
1891
|
-
"list_directory",
|
|
1892
|
-
"read_file",
|
|
1893
|
-
"grep_files",
|
|
1894
|
-
"run_shell",
|
|
1895
|
-
"ask_user",
|
|
1896
|
-
"edit_file",
|
|
1897
|
-
"lsp_references"
|
|
1898
|
-
],
|
|
1899
|
-
"maxTurns": 50,
|
|
1900
|
-
"expectedFileChanges": [
|
|
1901
|
-
"server.js",
|
|
1902
|
-
"test.js"
|
|
1903
|
-
]
|
|
1904
|
-
},
|
|
1905
|
-
"sourceSessionId": "a7b388ad-58e9-48ea-9278-b817ba300726",
|
|
1906
|
-
"classifiedType": "coding:refactoring",
|
|
1907
|
-
"expectedDiff": null,
|
|
1908
|
-
"complexityIndicator": 2
|
|
1909
|
-
},
|
|
1910
|
-
{
|
|
1911
|
-
"id": "agent-session-3a3b9214",
|
|
1912
|
-
"prompt": "I'll start by exploring the project structure to understand the Express app and then implement rate limiting and CSRF protection.",
|
|
1913
|
-
"taskType": "coding-agent",
|
|
1914
|
-
"difficulty": "hard",
|
|
1915
|
-
"expectedTraits": [
|
|
1916
|
-
"reads before writing",
|
|
1917
|
-
"uses edit over write"
|
|
1918
|
-
],
|
|
1919
|
-
"agentExpectations": {
|
|
1920
|
-
"expectedToolCalls": [
|
|
1921
|
-
"list_directory",
|
|
1922
|
-
"read_file",
|
|
1923
|
-
"edit_file",
|
|
1924
|
-
"run_shell"
|
|
1925
|
-
],
|
|
1926
|
-
"maxTurns": 36,
|
|
1927
|
-
"expectedFileChanges": [
|
|
1928
|
-
"package.json",
|
|
1929
|
-
"server.js",
|
|
1930
|
-
"test.js"
|
|
1931
|
-
]
|
|
1932
|
-
},
|
|
1933
|
-
"sourceSessionId": "4a7b82b6-88ce-4bb3-a41a-d5ba58f2c83b",
|
|
1934
|
-
"classifiedType": "coding:generation",
|
|
1935
|
-
"expectedDiff": null,
|
|
1936
|
-
"complexityIndicator": 3
|
|
1937
|
-
},
|
|
1938
|
-
{
|
|
1939
|
-
"id": "agent-session-c28e561a",
|
|
1940
|
-
"prompt": "I'll help you add a test for pagination with 25 users. Let me start by exploring the project structure to understand the codebase.",
|
|
1941
|
-
"taskType": "coding-agent",
|
|
1942
|
-
"difficulty": "medium",
|
|
1943
|
-
"expectedTraits": [
|
|
1944
|
-
"reads before writing",
|
|
1945
|
-
"uses edit over write"
|
|
1946
|
-
],
|
|
1947
|
-
"agentExpectations": {
|
|
1948
|
-
"expectedToolCalls": [
|
|
1949
|
-
"list_directory",
|
|
1950
|
-
"read_file",
|
|
1951
|
-
"edit_file",
|
|
1952
|
-
"run_shell"
|
|
1953
|
-
],
|
|
1954
|
-
"maxTurns": 14,
|
|
1955
|
-
"expectedFileChanges": [
|
|
1956
|
-
"server.js",
|
|
1957
|
-
"test.js"
|
|
1958
|
-
]
|
|
1959
|
-
},
|
|
1960
|
-
"sourceSessionId": "91ccf531-0e27-45be-880b-9e37fd8b8349",
|
|
1961
|
-
"classifiedType": "coding:testing",
|
|
1962
|
-
"expectedDiff": null,
|
|
1963
|
-
"complexityIndicator": 2
|
|
1964
|
-
},
|
|
1965
|
-
{
|
|
1966
|
-
"id": "agent-session-ecc4ff3f",
|
|
1967
|
-
"prompt": "I'll start by exploring the project structure to understand the codebase and locate the relevant files.",
|
|
1968
|
-
"taskType": "coding-agent",
|
|
1969
|
-
"difficulty": "hard",
|
|
1970
|
-
"expectedTraits": [
|
|
1971
|
-
"reads before writing",
|
|
1972
|
-
"uses edit over write"
|
|
1973
|
-
],
|
|
1974
|
-
"agentExpectations": {
|
|
1975
|
-
"expectedToolCalls": [
|
|
1976
|
-
"list_directory",
|
|
1977
|
-
"read_file",
|
|
1978
|
-
"run_shell",
|
|
1979
|
-
"edit_file"
|
|
1980
|
-
],
|
|
1981
|
-
"maxTurns": 48,
|
|
1982
|
-
"expectedFileChanges": [
|
|
1983
|
-
"brain.js",
|
|
1984
|
-
"test.js"
|
|
1985
|
-
]
|
|
1986
|
-
},
|
|
1987
|
-
"sourceSessionId": "aeae6b54-c335-4fe4-a903-3e6b440fbe66",
|
|
1988
|
-
"classifiedType": "coding:generation",
|
|
1989
|
-
"expectedDiff": null,
|
|
1990
|
-
"complexityIndicator": 2
|
|
1991
|
-
},
|
|
1992
|
-
{
|
|
1993
|
-
"id": "agent-session-a29571f9",
|
|
1994
|
-
"prompt": "I'll help you add tests for both ascending and descending sort. Let me start by exploring the project structure.",
|
|
1995
|
-
"taskType": "coding-agent",
|
|
1996
|
-
"difficulty": "medium",
|
|
1997
|
-
"expectedTraits": [
|
|
1998
|
-
"reads before writing",
|
|
1999
|
-
"uses edit over write"
|
|
2000
|
-
],
|
|
2001
|
-
"agentExpectations": {
|
|
2002
|
-
"expectedToolCalls": [
|
|
2003
|
-
"list_directory",
|
|
2004
|
-
"read_file",
|
|
2005
|
-
"edit_file",
|
|
2006
|
-
"run_shell"
|
|
2007
|
-
],
|
|
2008
|
-
"maxTurns": 14,
|
|
2009
|
-
"expectedFileChanges": [
|
|
2010
|
-
"server.js",
|
|
2011
|
-
"test.js"
|
|
2012
|
-
]
|
|
2013
|
-
},
|
|
2014
|
-
"sourceSessionId": "bdc47279-f629-465b-bc8e-eaa22cb65266",
|
|
2015
|
-
"classifiedType": "coding:testing",
|
|
2016
|
-
"expectedDiff": null,
|
|
2017
|
-
"complexityIndicator": 2
|
|
2018
|
-
},
|
|
2019
|
-
{
|
|
2020
|
-
"id": "agent-session-8abb12e4",
|
|
2021
|
-
"prompt": "I'll start by exploring the project structure to understand the codebase.",
|
|
2022
|
-
"taskType": "coding-agent",
|
|
2023
|
-
"difficulty": "hard",
|
|
2024
|
-
"expectedTraits": [
|
|
2025
|
-
"reads before writing",
|
|
2026
|
-
"uses edit over write"
|
|
2027
|
-
],
|
|
2028
|
-
"agentExpectations": {
|
|
2029
|
-
"expectedToolCalls": [
|
|
2030
|
-
"list_directory",
|
|
2031
|
-
"read_file",
|
|
2032
|
-
"edit_file",
|
|
2033
|
-
"run_shell"
|
|
2034
|
-
],
|
|
2035
|
-
"maxTurns": 50,
|
|
2036
|
-
"expectedFileChanges": [
|
|
2037
|
-
"server/routes/items.js",
|
|
2038
|
-
"public/js/items.js",
|
|
2039
|
-
"public/js/app.js",
|
|
2040
|
-
"public/css/style.css",
|
|
2041
|
-
"server/test.js",
|
|
2042
|
-
"server/index.js"
|
|
2043
|
-
]
|
|
2044
|
-
},
|
|
2045
|
-
"sourceSessionId": "d25075ab-ec95-4e1d-813d-79c5dbf5bd68",
|
|
2046
|
-
"classifiedType": "coding:generation",
|
|
2047
|
-
"expectedDiff": null,
|
|
2048
|
-
"complexityIndicator": 6
|
|
2049
|
-
},
|
|
2050
|
-
{
|
|
2051
|
-
"id": "agent-session-cfd12d6c",
|
|
2052
|
-
"prompt": "I'll help you convert this monorepo from CommonJS to ES Modules. Let me start by exploring the project structure.",
|
|
2053
|
-
"taskType": "coding-agent",
|
|
2054
|
-
"difficulty": "hard",
|
|
2055
|
-
"expectedTraits": [
|
|
2056
|
-
"reads before writing",
|
|
2057
|
-
"plans before executing"
|
|
2058
|
-
],
|
|
2059
|
-
"agentExpectations": {
|
|
2060
|
-
"expectedToolCalls": [
|
|
2061
|
-
"list_directory",
|
|
2062
|
-
"read_file",
|
|
2063
|
-
"run_shell",
|
|
2064
|
-
"update_todos",
|
|
2065
|
-
"edit_file",
|
|
2066
|
-
"write_file"
|
|
2067
|
-
],
|
|
2068
|
-
"maxTurns": 50,
|
|
2069
|
-
"expectedFileChanges": [
|
|
2070
|
-
"package.json",
|
|
2071
|
-
"packages/shared/package.json",
|
|
2072
|
-
"packages/api/package.json",
|
|
2073
|
-
"packages/cli/package.json",
|
|
2074
|
-
"packages/shared/formatters.js",
|
|
2075
|
-
"packages/shared/validators.js",
|
|
2076
|
-
"packages/shared/index.js",
|
|
2077
|
-
"packages/shared/test.js",
|
|
2078
|
-
"packages/api/data.js",
|
|
2079
|
-
"packages/api/middleware.js",
|
|
2080
|
-
"packages/api/routes.js",
|
|
2081
|
-
"packages/api/server.js",
|
|
2082
|
-
"packages/api/test.js",
|
|
2083
|
-
"packages/cli/output.js",
|
|
2084
|
-
"packages/cli/commands.js",
|
|
2085
|
-
"packages/cli/index.js",
|
|
2086
|
-
"packages/cli/test.js",
|
|
2087
|
-
"test.js"
|
|
2088
|
-
]
|
|
2089
|
-
},
|
|
2090
|
-
"sourceSessionId": "ab4c19bb-d588-43dd-994b-fdf05431e525",
|
|
2091
|
-
"classifiedType": "coding:generation",
|
|
2092
|
-
"expectedDiff": null,
|
|
2093
|
-
"complexityIndicator": 18
|
|
2094
|
-
},
|
|
2095
|
-
{
|
|
2096
|
-
"id": "agent-session-f21db69e",
|
|
2097
|
-
"prompt": "I'll start by exploring the project structure to understand the Flask app setup.",
|
|
2098
|
-
"taskType": "coding-agent",
|
|
2099
|
-
"difficulty": "hard",
|
|
2100
|
-
"expectedTraits": [
|
|
2101
|
-
"reads before writing",
|
|
2102
|
-
"uses edit over write"
|
|
2103
|
-
],
|
|
2104
|
-
"agentExpectations": {
|
|
2105
|
-
"expectedToolCalls": [
|
|
2106
|
-
"list_directory",
|
|
2107
|
-
"read_file",
|
|
2108
|
-
"edit_file",
|
|
2109
|
-
"run_shell"
|
|
2110
|
-
],
|
|
2111
|
-
"maxTurns": 30,
|
|
2112
|
-
"expectedFileChanges": [
|
|
2113
|
-
"app.py",
|
|
2114
|
-
"test_app.py"
|
|
2115
|
-
]
|
|
2116
|
-
},
|
|
2117
|
-
"sourceSessionId": "d44e8d0e-ef75-4f49-8340-2c4628858fe9",
|
|
2118
|
-
"classifiedType": "coding:generation",
|
|
2119
|
-
"expectedDiff": null,
|
|
2120
|
-
"complexityIndicator": 2
|
|
2121
|
-
},
|
|
2122
|
-
{
|
|
2123
|
-
"id": "agent-session-e8eb3472",
|
|
2124
|
-
"prompt": "I'll help you add a test for pagination with 25 users. Let me start by exploring the project structure.",
|
|
2125
|
-
"taskType": "coding-agent",
|
|
2126
|
-
"difficulty": "medium",
|
|
2127
|
-
"expectedTraits": [
|
|
2128
|
-
"reads before writing",
|
|
2129
|
-
"uses edit over write"
|
|
2130
|
-
],
|
|
2131
|
-
"agentExpectations": {
|
|
2132
|
-
"expectedToolCalls": [
|
|
2133
|
-
"list_directory",
|
|
2134
|
-
"read_file",
|
|
2135
|
-
"edit_file",
|
|
2136
|
-
"run_shell"
|
|
2137
|
-
],
|
|
2138
|
-
"maxTurns": 14,
|
|
2139
|
-
"expectedFileChanges": [
|
|
2140
|
-
"server.js",
|
|
2141
|
-
"test.js"
|
|
2142
|
-
]
|
|
2143
|
-
},
|
|
2144
|
-
"sourceSessionId": "991abf0f-4bb1-48fa-8aa8-ddd97d45d6f8",
|
|
2145
|
-
"classifiedType": "coding:testing",
|
|
2146
|
-
"expectedDiff": null,
|
|
2147
|
-
"complexityIndicator": 2
|
|
2148
|
-
},
|
|
2149
|
-
{
|
|
2150
|
-
"id": "agent-session-bd5f99de",
|
|
2151
|
-
"prompt": "I'll start by exploring the project structure and understanding the codebase, then run diagnostics to find type-related issues.",
|
|
2152
|
-
"taskType": "coding-agent",
|
|
2153
|
-
"difficulty": "hard",
|
|
2154
|
-
"expectedTraits": [
|
|
2155
|
-
"reads before writing",
|
|
2156
|
-
"uses edit over write"
|
|
2157
|
-
],
|
|
2158
|
-
"agentExpectations": {
|
|
2159
|
-
"expectedToolCalls": [
|
|
2160
|
-
"list_directory",
|
|
2161
|
-
"read_file",
|
|
2162
|
-
"lsp_diagnostics",
|
|
2163
|
-
"run_shell",
|
|
2164
|
-
"edit_file"
|
|
2165
|
-
],
|
|
2166
|
-
"maxTurns": 32,
|
|
2167
|
-
"expectedFileChanges": [
|
|
2168
|
-
"src/handlers.js",
|
|
2169
|
-
"src/utils.js"
|
|
2170
|
-
]
|
|
2171
|
-
},
|
|
2172
|
-
"sourceSessionId": "abeceb68-cda7-4791-a201-b0d8e00bbb90",
|
|
2173
|
-
"classifiedType": "coding:generation",
|
|
2174
|
-
"expectedDiff": null,
|
|
2175
|
-
"complexityIndicator": 2
|
|
2176
1580
|
}
|
|
2177
1581
|
]
|
|
@@ -289,6 +289,7 @@ async function replayAndScore(session, sandbox, runAgentLoop, opts = {}) {
|
|
|
289
289
|
provider,
|
|
290
290
|
model,
|
|
291
291
|
mode: 'build',
|
|
292
|
+
persistTranscript: false,
|
|
292
293
|
});
|
|
293
294
|
const hardTimeout = new Promise((_, reject) =>
|
|
294
295
|
setTimeout(() => reject(new Error('cc-replay hard timeout exceeded')), timeoutMs + 60_000)
|