create-walle 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/bin/create-walle.js +195 -30
- package/bin/mcp-inject.js +18 -53
- package/package.json +3 -1
- package/template/claude-task-manager/approval-agent.js +7 -0
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
- package/template/claude-task-manager/git-utils.js +111 -3
- package/template/claude-task-manager/lib/session-history.js +144 -16
- package/template/claude-task-manager/lib/session-standup.js +409 -0
- package/template/claude-task-manager/lib/standup-attention.js +200 -0
- package/template/claude-task-manager/lib/status-hooks.js +8 -2
- package/template/claude-task-manager/lib/update-telemetry.js +114 -0
- package/template/claude-task-manager/lib/walle-default-model.js +55 -0
- package/template/claude-task-manager/lib/walle-mcp-auto-config.js +62 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +83 -19
- package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
- package/template/claude-task-manager/providers/codex-mcp.js +104 -0
- package/template/claude-task-manager/providers/index.js +2 -0
- package/template/claude-task-manager/public/css/setup.css +2 -1
- package/template/claude-task-manager/public/css/walle.css +5 -0
- package/template/claude-task-manager/public/index.html +1596 -283
- package/template/claude-task-manager/public/js/session-search-utils.js +171 -1
- package/template/claude-task-manager/public/js/setup.js +62 -19
- package/template/claude-task-manager/public/js/stream-view.js +55 -6
- package/template/claude-task-manager/public/js/walle-session.js +73 -16
- package/template/claude-task-manager/public/js/walle.js +34 -2
- package/template/claude-task-manager/server.js +780 -177
- package/template/claude-task-manager/session-integrity.js +58 -15
- package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
- package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
- package/template/package.json +1 -1
- package/template/wall-e/agent.js +36 -7
- package/template/wall-e/api-walle.js +72 -20
- package/template/wall-e/coding/stream-processor.js +22 -2
- package/template/wall-e/coding-orchestrator.js +26 -6
- package/template/wall-e/eval/agent-runner.js +16 -4
- package/template/wall-e/eval/benchmark-generator.js +21 -1
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
- package/template/wall-e/eval/codex-cli-baseline.js +633 -0
- package/template/wall-e/eval/eval-orchestrator.js +3 -3
- package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
- package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
- package/template/wall-e/lib/mcp-integration.js +220 -0
- package/template/wall-e/llm/ollama.js +47 -8
- package/template/wall-e/llm/ollama.plugin.json +1 -1
- package/template/wall-e/llm/tool-adapter.js +1 -0
- package/template/wall-e/loops/ingest.js +42 -8
- package/template/wall-e/mcp-server.js +272 -10
- package/template/wall-e/memory/ctm-session-context.js +910 -0
- package/template/wall-e/server.js +26 -1
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
- package/template/wall-e/skills/skill-planner.js +52 -3
- package/template/wall-e/tools/builtin-middleware.js +55 -2
- package/template/wall-e/tools/shell-policy.js +1 -1
- package/template/wall-e/tools/slack-owner.js +104 -0
- package/template/website/index.html +2 -2
- package/template/builder-journal.md +0 -17
|
@@ -18,11 +18,31 @@ function classifyCodingType(session) {
|
|
|
18
18
|
return 'coding:generation';
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
function isReplayableBenchmarkPrompt(prompt) {
|
|
22
|
+
const text = String(prompt || '').trim();
|
|
23
|
+
if (text.length < 25) return false;
|
|
24
|
+
|
|
25
|
+
// Session-mined prompts must be the user's task, not the assistant's first
|
|
26
|
+
// progress narration. Assistant prose turns the benchmark into "continue the
|
|
27
|
+
// previous assistant's work", which is not replayable from a fresh sandbox.
|
|
28
|
+
if (/^(i('|’)ll|i will|i can|i('|’)m going to|let me|sure[, ]|happy to help|i('|’)ll help|i('|’)ll start)\b/i.test(text)) {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (/^\s*(go ahead|continue|proceed|do it|yes|yep|ok|okay|thanks|thank you)\b/i.test(text)) {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return /\b(fix|implement|add|change|update|refactor|test|debug|make|write|delete|remove|harden|wire|bug|failing|error|regression|feature|endpoint|api|ui|server|component|code review|review.*code)\b/i.test(text);
|
|
37
|
+
}
|
|
38
|
+
|
|
21
39
|
/**
|
|
22
40
|
* Convert a coding agent session object to a benchmark entry.
|
|
23
41
|
* Returns null if the session would be a duplicate (id already in existingIds).
|
|
24
42
|
*/
|
|
25
43
|
function sessionToBenchmark(session, existingIds = new Set()) {
|
|
44
|
+
if (!isReplayableBenchmarkPrompt(session.prompt)) return null;
|
|
45
|
+
|
|
26
46
|
const id = `agent-session-${crypto.createHash('sha256').update(session.prompt || '').digest('hex').slice(0, 8)}`;
|
|
27
47
|
if (existingIds.has(id)) return null; // dedup
|
|
28
48
|
|
|
@@ -90,4 +110,4 @@ async function generateBenchmarks(brain, { minSignificance = 0.5, limit = 50 } =
|
|
|
90
110
|
return benchmarks;
|
|
91
111
|
}
|
|
92
112
|
|
|
93
|
-
module.exports = { classifyCodingType, sessionToBenchmark, generateBenchmarks };
|
|
113
|
+
module.exports = { classifyCodingType, isReplayableBenchmarkPrompt, sessionToBenchmark, generateBenchmarks };
|
|
@@ -1577,601 +1577,5 @@
|
|
|
1577
1577
|
"expectedFileChanges": [],
|
|
1578
1578
|
"projectFixture": "fullstack-app"
|
|
1579
1579
|
}
|
|
1580
|
-
},
|
|
1581
|
-
{
|
|
1582
|
-
"id": "agent-session-e8413ef1",
|
|
1583
|
-
"prompt": "I'll start by exploring the project structure to understand the Express app setup.",
|
|
1584
|
-
"taskType": "coding-agent",
|
|
1585
|
-
"difficulty": "hard",
|
|
1586
|
-
"expectedTraits": [
|
|
1587
|
-
"reads before writing",
|
|
1588
|
-
"uses edit over write"
|
|
1589
|
-
],
|
|
1590
|
-
"agentExpectations": {
|
|
1591
|
-
"expectedToolCalls": [
|
|
1592
|
-
"list_directory",
|
|
1593
|
-
"read_file",
|
|
1594
|
-
"edit_file",
|
|
1595
|
-
"run_shell"
|
|
1596
|
-
],
|
|
1597
|
-
"maxTurns": 22,
|
|
1598
|
-
"expectedFileChanges": [
|
|
1599
|
-
"server.js",
|
|
1600
|
-
"test.js"
|
|
1601
|
-
]
|
|
1602
|
-
},
|
|
1603
|
-
"sourceSessionId": "8a31c591-bbaf-403f-b82e-3ec093618584",
|
|
1604
|
-
"classifiedType": "coding:generation",
|
|
1605
|
-
"expectedDiff": null,
|
|
1606
|
-
"complexityIndicator": 2
|
|
1607
|
-
},
|
|
1608
|
-
{
|
|
1609
|
-
"id": "agent-session-02a0e2b9",
|
|
1610
|
-
"prompt": "I'll help you rename the 'getData' function to 'fetchRecords' across the project. Let me start by exploring the project structure and finding all occurrences.",
|
|
1611
|
-
"taskType": "coding-agent",
|
|
1612
|
-
"difficulty": "hard",
|
|
1613
|
-
"expectedTraits": [
|
|
1614
|
-
"reads before writing",
|
|
1615
|
-
"uses edit over write"
|
|
1616
|
-
],
|
|
1617
|
-
"agentExpectations": {
|
|
1618
|
-
"expectedToolCalls": [
|
|
1619
|
-
"list_directory",
|
|
1620
|
-
"read_file",
|
|
1621
|
-
"grep_files",
|
|
1622
|
-
"glob",
|
|
1623
|
-
"ask_user",
|
|
1624
|
-
"edit_file",
|
|
1625
|
-
"run_shell"
|
|
1626
|
-
],
|
|
1627
|
-
"maxTurns": 50,
|
|
1628
|
-
"expectedFileChanges": [
|
|
1629
|
-
"server.js",
|
|
1630
|
-
"test.js"
|
|
1631
|
-
]
|
|
1632
|
-
},
|
|
1633
|
-
"sourceSessionId": "07b948cd-2324-442b-849b-ff88de26783d",
|
|
1634
|
-
"classifiedType": "coding:refactoring",
|
|
1635
|
-
"expectedDiff": null,
|
|
1636
|
-
"complexityIndicator": 2
|
|
1637
|
-
},
|
|
1638
|
-
{
|
|
1639
|
-
"id": "agent-session-a78f1dc7",
|
|
1640
|
-
"prompt": "I'll help you find and fix the SQL injection vulnerability. Let me start by exploring the project structure.",
|
|
1641
|
-
"taskType": "coding-agent",
|
|
1642
|
-
"difficulty": "hard",
|
|
1643
|
-
"expectedTraits": [
|
|
1644
|
-
"reads before writing",
|
|
1645
|
-
"uses edit over write"
|
|
1646
|
-
],
|
|
1647
|
-
"agentExpectations": {
|
|
1648
|
-
"expectedToolCalls": [
|
|
1649
|
-
"list_directory",
|
|
1650
|
-
"read_file",
|
|
1651
|
-
"grep_files",
|
|
1652
|
-
"edit_file",
|
|
1653
|
-
"run_shell"
|
|
1654
|
-
],
|
|
1655
|
-
"maxTurns": 30,
|
|
1656
|
-
"expectedFileChanges": [
|
|
1657
|
-
"server.js",
|
|
1658
|
-
"test.js"
|
|
1659
|
-
]
|
|
1660
|
-
},
|
|
1661
|
-
"sourceSessionId": "dcce8928-0376-4058-b5fd-321f50c39783",
|
|
1662
|
-
"classifiedType": "coding:debugging",
|
|
1663
|
-
"expectedDiff": null,
|
|
1664
|
-
"complexityIndicator": 2
|
|
1665
|
-
},
|
|
1666
|
-
{
|
|
1667
|
-
"id": "agent-session-1348957e",
|
|
1668
|
-
"prompt": "I'll help you add tests for both ascending and descending sort. Let me start by exploring the project structure to understand what we're working with.",
|
|
1669
|
-
"taskType": "coding-agent",
|
|
1670
|
-
"difficulty": "medium",
|
|
1671
|
-
"expectedTraits": [
|
|
1672
|
-
"reads before writing",
|
|
1673
|
-
"uses edit over write"
|
|
1674
|
-
],
|
|
1675
|
-
"agentExpectations": {
|
|
1676
|
-
"expectedToolCalls": [
|
|
1677
|
-
"list_directory",
|
|
1678
|
-
"read_file",
|
|
1679
|
-
"run_shell",
|
|
1680
|
-
"ask_user",
|
|
1681
|
-
"edit_file"
|
|
1682
|
-
],
|
|
1683
|
-
"maxTurns": 20,
|
|
1684
|
-
"expectedFileChanges": [
|
|
1685
|
-
"server.js",
|
|
1686
|
-
"test.js"
|
|
1687
|
-
]
|
|
1688
|
-
},
|
|
1689
|
-
"sourceSessionId": "31c2980b-95ce-4685-bdd5-46c92fe9aa36",
|
|
1690
|
-
"classifiedType": "coding:testing",
|
|
1691
|
-
"expectedDiff": null,
|
|
1692
|
-
"complexityIndicator": 2
|
|
1693
|
-
},
|
|
1694
|
-
{
|
|
1695
|
-
"id": "agent-session-f7edd420",
|
|
1696
|
-
"prompt": "I'll start by exploring the project structure to understand the CLI tool setup.",
|
|
1697
|
-
"taskType": "coding-agent",
|
|
1698
|
-
"difficulty": "hard",
|
|
1699
|
-
"expectedTraits": [
|
|
1700
|
-
"reads before writing",
|
|
1701
|
-
"uses edit over write"
|
|
1702
|
-
],
|
|
1703
|
-
"agentExpectations": {
|
|
1704
|
-
"expectedToolCalls": [
|
|
1705
|
-
"list_directory",
|
|
1706
|
-
"read_file",
|
|
1707
|
-
"edit_file",
|
|
1708
|
-
"run_shell"
|
|
1709
|
-
],
|
|
1710
|
-
"maxTurns": 22,
|
|
1711
|
-
"expectedFileChanges": [
|
|
1712
|
-
"index.js",
|
|
1713
|
-
"test.js"
|
|
1714
|
-
]
|
|
1715
|
-
},
|
|
1716
|
-
"sourceSessionId": "5719d4dd-fcdb-47b6-9874-60f4b9fbfb71",
|
|
1717
|
-
"classifiedType": "coding:generation",
|
|
1718
|
-
"expectedDiff": null,
|
|
1719
|
-
"complexityIndicator": 2
|
|
1720
|
-
},
|
|
1721
|
-
{
|
|
1722
|
-
"id": "agent-session-b36c5122",
|
|
1723
|
-
"prompt": "I'll start by exploring the project structure to understand what we're working with.",
|
|
1724
|
-
"taskType": "coding-agent",
|
|
1725
|
-
"difficulty": "hard",
|
|
1726
|
-
"expectedTraits": [
|
|
1727
|
-
"reads before writing",
|
|
1728
|
-
"uses edit over write",
|
|
1729
|
-
"plans before executing"
|
|
1730
|
-
],
|
|
1731
|
-
"agentExpectations": {
|
|
1732
|
-
"expectedToolCalls": [
|
|
1733
|
-
"list_directory",
|
|
1734
|
-
"read_file",
|
|
1735
|
-
"run_shell",
|
|
1736
|
-
"update_todos",
|
|
1737
|
-
"edit_file"
|
|
1738
|
-
],
|
|
1739
|
-
"maxTurns": 36,
|
|
1740
|
-
"expectedFileChanges": [
|
|
1741
|
-
"server/db.js",
|
|
1742
|
-
"server/index.js"
|
|
1743
|
-
]
|
|
1744
|
-
},
|
|
1745
|
-
"sourceSessionId": "f78fa371-7e29-4a52-8f4f-b992fdaccc0d",
|
|
1746
|
-
"classifiedType": "coding:generation",
|
|
1747
|
-
"expectedDiff": null,
|
|
1748
|
-
"complexityIndicator": 2
|
|
1749
|
-
},
|
|
1750
|
-
{
|
|
1751
|
-
"id": "agent-session-29b64889",
|
|
1752
|
-
"prompt": "I'll start by exploring the project structure and understanding what tests exist.",
|
|
1753
|
-
"taskType": "coding-agent",
|
|
1754
|
-
"difficulty": "hard",
|
|
1755
|
-
"expectedTraits": [
|
|
1756
|
-
"reads before writing",
|
|
1757
|
-
"uses edit over write"
|
|
1758
|
-
],
|
|
1759
|
-
"agentExpectations": {
|
|
1760
|
-
"expectedToolCalls": [
|
|
1761
|
-
"list_directory",
|
|
1762
|
-
"read_file",
|
|
1763
|
-
"run_shell",
|
|
1764
|
-
"grep_files",
|
|
1765
|
-
"edit_file"
|
|
1766
|
-
],
|
|
1767
|
-
"maxTurns": 50,
|
|
1768
|
-
"expectedFileChanges": [
|
|
1769
|
-
"server/test.js",
|
|
1770
|
-
"server/db.js"
|
|
1771
|
-
]
|
|
1772
|
-
},
|
|
1773
|
-
"sourceSessionId": "25da8640-eaba-42a6-9eb2-a5831c2162fd",
|
|
1774
|
-
"classifiedType": "coding:testing",
|
|
1775
|
-
"expectedDiff": null,
|
|
1776
|
-
"complexityIndicator": 2
|
|
1777
|
-
},
|
|
1778
|
-
{
|
|
1779
|
-
"id": "agent-session-2494dd1d",
|
|
1780
|
-
"prompt": "I'll help you extract inline SQL queries from route handlers into a separate db.js module. Let me start by exploring the project structure.",
|
|
1781
|
-
"taskType": "coding-agent",
|
|
1782
|
-
"difficulty": "hard",
|
|
1783
|
-
"expectedTraits": [
|
|
1784
|
-
"reads before writing",
|
|
1785
|
-
"uses edit over write",
|
|
1786
|
-
"plans before executing"
|
|
1787
|
-
],
|
|
1788
|
-
"agentExpectations": {
|
|
1789
|
-
"expectedToolCalls": [
|
|
1790
|
-
"list_directory",
|
|
1791
|
-
"read_file",
|
|
1792
|
-
"run_shell",
|
|
1793
|
-
"update_todos",
|
|
1794
|
-
"lsp_references",
|
|
1795
|
-
"grep_files",
|
|
1796
|
-
"edit_file"
|
|
1797
|
-
],
|
|
1798
|
-
"maxTurns": 50,
|
|
1799
|
-
"expectedFileChanges": [
|
|
1800
|
-
"server/db.js",
|
|
1801
|
-
"server/routes/auth.js",
|
|
1802
|
-
"server/routes/users.js",
|
|
1803
|
-
"server/routes/items.js",
|
|
1804
|
-
"server/routes/health.js",
|
|
1805
|
-
"server/middleware/auth.js"
|
|
1806
|
-
]
|
|
1807
|
-
},
|
|
1808
|
-
"sourceSessionId": "08d10d2e-3e63-4354-bf05-8786388b2420",
|
|
1809
|
-
"classifiedType": "coding:refactoring",
|
|
1810
|
-
"expectedDiff": null,
|
|
1811
|
-
"complexityIndicator": 6
|
|
1812
|
-
},
|
|
1813
|
-
{
|
|
1814
|
-
"id": "agent-session-5fcb2df4",
|
|
1815
|
-
"prompt": "I'll start by exploring the project structure to understand the fullstack app, then create the CI/CD pipeline and containerization files.",
|
|
1816
|
-
"taskType": "coding-agent",
|
|
1817
|
-
"difficulty": "hard",
|
|
1818
|
-
"expectedTraits": [
|
|
1819
|
-
"reads before writing"
|
|
1820
|
-
],
|
|
1821
|
-
"agentExpectations": {
|
|
1822
|
-
"expectedToolCalls": [
|
|
1823
|
-
"list_directory",
|
|
1824
|
-
"read_file",
|
|
1825
|
-
"glob",
|
|
1826
|
-
"write_file",
|
|
1827
|
-
"edit_file",
|
|
1828
|
-
"run_shell"
|
|
1829
|
-
],
|
|
1830
|
-
"maxTurns": 50,
|
|
1831
|
-
"expectedFileChanges": [
|
|
1832
|
-
".github/workflows/ci.yml",
|
|
1833
|
-
"Dockerfile",
|
|
1834
|
-
"docker-compose.yml",
|
|
1835
|
-
".dockerignore",
|
|
1836
|
-
"config/default.js",
|
|
1837
|
-
"DOCKER.md",
|
|
1838
|
-
"CI_CD_SETUP.md",
|
|
1839
|
-
"QUICKSTART.md",
|
|
1840
|
-
"IMPLEMENTATION_SUMMARY.md",
|
|
1841
|
-
"DEPLOYMENT_CHECKLIST.md",
|
|
1842
|
-
"CHANGES.md",
|
|
1843
|
-
"README_CI_CD.md"
|
|
1844
|
-
]
|
|
1845
|
-
},
|
|
1846
|
-
"sourceSessionId": "9055411d-427f-498b-b501-b7f695b22214",
|
|
1847
|
-
"classifiedType": "coding:generation",
|
|
1848
|
-
"expectedDiff": null,
|
|
1849
|
-
"complexityIndicator": 12
|
|
1850
|
-
},
|
|
1851
|
-
{
|
|
1852
|
-
"id": "agent-session-1c9213cc",
|
|
1853
|
-
"prompt": "I'll start by exploring the project structure to understand the current implementation.",
|
|
1854
|
-
"taskType": "coding-agent",
|
|
1855
|
-
"difficulty": "hard",
|
|
1856
|
-
"expectedTraits": [
|
|
1857
|
-
"reads before writing",
|
|
1858
|
-
"uses edit over write"
|
|
1859
|
-
],
|
|
1860
|
-
"agentExpectations": {
|
|
1861
|
-
"expectedToolCalls": [
|
|
1862
|
-
"list_directory",
|
|
1863
|
-
"read_file",
|
|
1864
|
-
"edit_file",
|
|
1865
|
-
"run_shell"
|
|
1866
|
-
],
|
|
1867
|
-
"maxTurns": 50,
|
|
1868
|
-
"expectedFileChanges": [
|
|
1869
|
-
"server/routes/items.js",
|
|
1870
|
-
"public/js/items.js",
|
|
1871
|
-
"public/js/app.js",
|
|
1872
|
-
"server/test.js"
|
|
1873
|
-
]
|
|
1874
|
-
},
|
|
1875
|
-
"sourceSessionId": "8a0a1609-9031-4bcc-86ff-e690fef3b290",
|
|
1876
|
-
"classifiedType": "coding:generation",
|
|
1877
|
-
"expectedDiff": null,
|
|
1878
|
-
"complexityIndicator": 4
|
|
1879
|
-
},
|
|
1880
|
-
{
|
|
1881
|
-
"id": "agent-session-63ada37e",
|
|
1882
|
-
"prompt": "I'll help you rename the function 'processData' to 'transformPayload' across the entire project. Let me start by exploring the project structure and finding all usages.",
|
|
1883
|
-
"taskType": "coding-agent",
|
|
1884
|
-
"difficulty": "hard",
|
|
1885
|
-
"expectedTraits": [
|
|
1886
|
-
"reads before writing",
|
|
1887
|
-
"uses edit over write"
|
|
1888
|
-
],
|
|
1889
|
-
"agentExpectations": {
|
|
1890
|
-
"expectedToolCalls": [
|
|
1891
|
-
"list_directory",
|
|
1892
|
-
"read_file",
|
|
1893
|
-
"grep_files",
|
|
1894
|
-
"run_shell",
|
|
1895
|
-
"ask_user",
|
|
1896
|
-
"edit_file",
|
|
1897
|
-
"lsp_references"
|
|
1898
|
-
],
|
|
1899
|
-
"maxTurns": 50,
|
|
1900
|
-
"expectedFileChanges": [
|
|
1901
|
-
"server.js",
|
|
1902
|
-
"test.js"
|
|
1903
|
-
]
|
|
1904
|
-
},
|
|
1905
|
-
"sourceSessionId": "a7b388ad-58e9-48ea-9278-b817ba300726",
|
|
1906
|
-
"classifiedType": "coding:refactoring",
|
|
1907
|
-
"expectedDiff": null,
|
|
1908
|
-
"complexityIndicator": 2
|
|
1909
|
-
},
|
|
1910
|
-
{
|
|
1911
|
-
"id": "agent-session-3a3b9214",
|
|
1912
|
-
"prompt": "I'll start by exploring the project structure to understand the Express app and then implement rate limiting and CSRF protection.",
|
|
1913
|
-
"taskType": "coding-agent",
|
|
1914
|
-
"difficulty": "hard",
|
|
1915
|
-
"expectedTraits": [
|
|
1916
|
-
"reads before writing",
|
|
1917
|
-
"uses edit over write"
|
|
1918
|
-
],
|
|
1919
|
-
"agentExpectations": {
|
|
1920
|
-
"expectedToolCalls": [
|
|
1921
|
-
"list_directory",
|
|
1922
|
-
"read_file",
|
|
1923
|
-
"edit_file",
|
|
1924
|
-
"run_shell"
|
|
1925
|
-
],
|
|
1926
|
-
"maxTurns": 36,
|
|
1927
|
-
"expectedFileChanges": [
|
|
1928
|
-
"package.json",
|
|
1929
|
-
"server.js",
|
|
1930
|
-
"test.js"
|
|
1931
|
-
]
|
|
1932
|
-
},
|
|
1933
|
-
"sourceSessionId": "4a7b82b6-88ce-4bb3-a41a-d5ba58f2c83b",
|
|
1934
|
-
"classifiedType": "coding:generation",
|
|
1935
|
-
"expectedDiff": null,
|
|
1936
|
-
"complexityIndicator": 3
|
|
1937
|
-
},
|
|
1938
|
-
{
|
|
1939
|
-
"id": "agent-session-c28e561a",
|
|
1940
|
-
"prompt": "I'll help you add a test for pagination with 25 users. Let me start by exploring the project structure to understand the codebase.",
|
|
1941
|
-
"taskType": "coding-agent",
|
|
1942
|
-
"difficulty": "medium",
|
|
1943
|
-
"expectedTraits": [
|
|
1944
|
-
"reads before writing",
|
|
1945
|
-
"uses edit over write"
|
|
1946
|
-
],
|
|
1947
|
-
"agentExpectations": {
|
|
1948
|
-
"expectedToolCalls": [
|
|
1949
|
-
"list_directory",
|
|
1950
|
-
"read_file",
|
|
1951
|
-
"edit_file",
|
|
1952
|
-
"run_shell"
|
|
1953
|
-
],
|
|
1954
|
-
"maxTurns": 14,
|
|
1955
|
-
"expectedFileChanges": [
|
|
1956
|
-
"server.js",
|
|
1957
|
-
"test.js"
|
|
1958
|
-
]
|
|
1959
|
-
},
|
|
1960
|
-
"sourceSessionId": "91ccf531-0e27-45be-880b-9e37fd8b8349",
|
|
1961
|
-
"classifiedType": "coding:testing",
|
|
1962
|
-
"expectedDiff": null,
|
|
1963
|
-
"complexityIndicator": 2
|
|
1964
|
-
},
|
|
1965
|
-
{
|
|
1966
|
-
"id": "agent-session-ecc4ff3f",
|
|
1967
|
-
"prompt": "I'll start by exploring the project structure to understand the codebase and locate the relevant files.",
|
|
1968
|
-
"taskType": "coding-agent",
|
|
1969
|
-
"difficulty": "hard",
|
|
1970
|
-
"expectedTraits": [
|
|
1971
|
-
"reads before writing",
|
|
1972
|
-
"uses edit over write"
|
|
1973
|
-
],
|
|
1974
|
-
"agentExpectations": {
|
|
1975
|
-
"expectedToolCalls": [
|
|
1976
|
-
"list_directory",
|
|
1977
|
-
"read_file",
|
|
1978
|
-
"run_shell",
|
|
1979
|
-
"edit_file"
|
|
1980
|
-
],
|
|
1981
|
-
"maxTurns": 48,
|
|
1982
|
-
"expectedFileChanges": [
|
|
1983
|
-
"brain.js",
|
|
1984
|
-
"test.js"
|
|
1985
|
-
]
|
|
1986
|
-
},
|
|
1987
|
-
"sourceSessionId": "aeae6b54-c335-4fe4-a903-3e6b440fbe66",
|
|
1988
|
-
"classifiedType": "coding:generation",
|
|
1989
|
-
"expectedDiff": null,
|
|
1990
|
-
"complexityIndicator": 2
|
|
1991
|
-
},
|
|
1992
|
-
{
|
|
1993
|
-
"id": "agent-session-a29571f9",
|
|
1994
|
-
"prompt": "I'll help you add tests for both ascending and descending sort. Let me start by exploring the project structure.",
|
|
1995
|
-
"taskType": "coding-agent",
|
|
1996
|
-
"difficulty": "medium",
|
|
1997
|
-
"expectedTraits": [
|
|
1998
|
-
"reads before writing",
|
|
1999
|
-
"uses edit over write"
|
|
2000
|
-
],
|
|
2001
|
-
"agentExpectations": {
|
|
2002
|
-
"expectedToolCalls": [
|
|
2003
|
-
"list_directory",
|
|
2004
|
-
"read_file",
|
|
2005
|
-
"edit_file",
|
|
2006
|
-
"run_shell"
|
|
2007
|
-
],
|
|
2008
|
-
"maxTurns": 14,
|
|
2009
|
-
"expectedFileChanges": [
|
|
2010
|
-
"server.js",
|
|
2011
|
-
"test.js"
|
|
2012
|
-
]
|
|
2013
|
-
},
|
|
2014
|
-
"sourceSessionId": "bdc47279-f629-465b-bc8e-eaa22cb65266",
|
|
2015
|
-
"classifiedType": "coding:testing",
|
|
2016
|
-
"expectedDiff": null,
|
|
2017
|
-
"complexityIndicator": 2
|
|
2018
|
-
},
|
|
2019
|
-
{
|
|
2020
|
-
"id": "agent-session-8abb12e4",
|
|
2021
|
-
"prompt": "I'll start by exploring the project structure to understand the codebase.",
|
|
2022
|
-
"taskType": "coding-agent",
|
|
2023
|
-
"difficulty": "hard",
|
|
2024
|
-
"expectedTraits": [
|
|
2025
|
-
"reads before writing",
|
|
2026
|
-
"uses edit over write"
|
|
2027
|
-
],
|
|
2028
|
-
"agentExpectations": {
|
|
2029
|
-
"expectedToolCalls": [
|
|
2030
|
-
"list_directory",
|
|
2031
|
-
"read_file",
|
|
2032
|
-
"edit_file",
|
|
2033
|
-
"run_shell"
|
|
2034
|
-
],
|
|
2035
|
-
"maxTurns": 50,
|
|
2036
|
-
"expectedFileChanges": [
|
|
2037
|
-
"server/routes/items.js",
|
|
2038
|
-
"public/js/items.js",
|
|
2039
|
-
"public/js/app.js",
|
|
2040
|
-
"public/css/style.css",
|
|
2041
|
-
"server/test.js",
|
|
2042
|
-
"server/index.js"
|
|
2043
|
-
]
|
|
2044
|
-
},
|
|
2045
|
-
"sourceSessionId": "d25075ab-ec95-4e1d-813d-79c5dbf5bd68",
|
|
2046
|
-
"classifiedType": "coding:generation",
|
|
2047
|
-
"expectedDiff": null,
|
|
2048
|
-
"complexityIndicator": 6
|
|
2049
|
-
},
|
|
2050
|
-
{
|
|
2051
|
-
"id": "agent-session-cfd12d6c",
|
|
2052
|
-
"prompt": "I'll help you convert this monorepo from CommonJS to ES Modules. Let me start by exploring the project structure.",
|
|
2053
|
-
"taskType": "coding-agent",
|
|
2054
|
-
"difficulty": "hard",
|
|
2055
|
-
"expectedTraits": [
|
|
2056
|
-
"reads before writing",
|
|
2057
|
-
"plans before executing"
|
|
2058
|
-
],
|
|
2059
|
-
"agentExpectations": {
|
|
2060
|
-
"expectedToolCalls": [
|
|
2061
|
-
"list_directory",
|
|
2062
|
-
"read_file",
|
|
2063
|
-
"run_shell",
|
|
2064
|
-
"update_todos",
|
|
2065
|
-
"edit_file",
|
|
2066
|
-
"write_file"
|
|
2067
|
-
],
|
|
2068
|
-
"maxTurns": 50,
|
|
2069
|
-
"expectedFileChanges": [
|
|
2070
|
-
"package.json",
|
|
2071
|
-
"packages/shared/package.json",
|
|
2072
|
-
"packages/api/package.json",
|
|
2073
|
-
"packages/cli/package.json",
|
|
2074
|
-
"packages/shared/formatters.js",
|
|
2075
|
-
"packages/shared/validators.js",
|
|
2076
|
-
"packages/shared/index.js",
|
|
2077
|
-
"packages/shared/test.js",
|
|
2078
|
-
"packages/api/data.js",
|
|
2079
|
-
"packages/api/middleware.js",
|
|
2080
|
-
"packages/api/routes.js",
|
|
2081
|
-
"packages/api/server.js",
|
|
2082
|
-
"packages/api/test.js",
|
|
2083
|
-
"packages/cli/output.js",
|
|
2084
|
-
"packages/cli/commands.js",
|
|
2085
|
-
"packages/cli/index.js",
|
|
2086
|
-
"packages/cli/test.js",
|
|
2087
|
-
"test.js"
|
|
2088
|
-
]
|
|
2089
|
-
},
|
|
2090
|
-
"sourceSessionId": "ab4c19bb-d588-43dd-994b-fdf05431e525",
|
|
2091
|
-
"classifiedType": "coding:generation",
|
|
2092
|
-
"expectedDiff": null,
|
|
2093
|
-
"complexityIndicator": 18
|
|
2094
|
-
},
|
|
2095
|
-
{
|
|
2096
|
-
"id": "agent-session-f21db69e",
|
|
2097
|
-
"prompt": "I'll start by exploring the project structure to understand the Flask app setup.",
|
|
2098
|
-
"taskType": "coding-agent",
|
|
2099
|
-
"difficulty": "hard",
|
|
2100
|
-
"expectedTraits": [
|
|
2101
|
-
"reads before writing",
|
|
2102
|
-
"uses edit over write"
|
|
2103
|
-
],
|
|
2104
|
-
"agentExpectations": {
|
|
2105
|
-
"expectedToolCalls": [
|
|
2106
|
-
"list_directory",
|
|
2107
|
-
"read_file",
|
|
2108
|
-
"edit_file",
|
|
2109
|
-
"run_shell"
|
|
2110
|
-
],
|
|
2111
|
-
"maxTurns": 30,
|
|
2112
|
-
"expectedFileChanges": [
|
|
2113
|
-
"app.py",
|
|
2114
|
-
"test_app.py"
|
|
2115
|
-
]
|
|
2116
|
-
},
|
|
2117
|
-
"sourceSessionId": "d44e8d0e-ef75-4f49-8340-2c4628858fe9",
|
|
2118
|
-
"classifiedType": "coding:generation",
|
|
2119
|
-
"expectedDiff": null,
|
|
2120
|
-
"complexityIndicator": 2
|
|
2121
|
-
},
|
|
2122
|
-
{
|
|
2123
|
-
"id": "agent-session-e8eb3472",
|
|
2124
|
-
"prompt": "I'll help you add a test for pagination with 25 users. Let me start by exploring the project structure.",
|
|
2125
|
-
"taskType": "coding-agent",
|
|
2126
|
-
"difficulty": "medium",
|
|
2127
|
-
"expectedTraits": [
|
|
2128
|
-
"reads before writing",
|
|
2129
|
-
"uses edit over write"
|
|
2130
|
-
],
|
|
2131
|
-
"agentExpectations": {
|
|
2132
|
-
"expectedToolCalls": [
|
|
2133
|
-
"list_directory",
|
|
2134
|
-
"read_file",
|
|
2135
|
-
"edit_file",
|
|
2136
|
-
"run_shell"
|
|
2137
|
-
],
|
|
2138
|
-
"maxTurns": 14,
|
|
2139
|
-
"expectedFileChanges": [
|
|
2140
|
-
"server.js",
|
|
2141
|
-
"test.js"
|
|
2142
|
-
]
|
|
2143
|
-
},
|
|
2144
|
-
"sourceSessionId": "991abf0f-4bb1-48fa-8aa8-ddd97d45d6f8",
|
|
2145
|
-
"classifiedType": "coding:testing",
|
|
2146
|
-
"expectedDiff": null,
|
|
2147
|
-
"complexityIndicator": 2
|
|
2148
|
-
},
|
|
2149
|
-
{
|
|
2150
|
-
"id": "agent-session-bd5f99de",
|
|
2151
|
-
"prompt": "I'll start by exploring the project structure and understanding the codebase, then run diagnostics to find type-related issues.",
|
|
2152
|
-
"taskType": "coding-agent",
|
|
2153
|
-
"difficulty": "hard",
|
|
2154
|
-
"expectedTraits": [
|
|
2155
|
-
"reads before writing",
|
|
2156
|
-
"uses edit over write"
|
|
2157
|
-
],
|
|
2158
|
-
"agentExpectations": {
|
|
2159
|
-
"expectedToolCalls": [
|
|
2160
|
-
"list_directory",
|
|
2161
|
-
"read_file",
|
|
2162
|
-
"lsp_diagnostics",
|
|
2163
|
-
"run_shell",
|
|
2164
|
-
"edit_file"
|
|
2165
|
-
],
|
|
2166
|
-
"maxTurns": 32,
|
|
2167
|
-
"expectedFileChanges": [
|
|
2168
|
-
"src/handlers.js",
|
|
2169
|
-
"src/utils.js"
|
|
2170
|
-
]
|
|
2171
|
-
},
|
|
2172
|
-
"sourceSessionId": "abeceb68-cda7-4791-a201-b0d8e00bbb90",
|
|
2173
|
-
"classifiedType": "coding:generation",
|
|
2174
|
-
"expectedDiff": null,
|
|
2175
|
-
"complexityIndicator": 2
|
|
2176
1580
|
}
|
|
2177
1581
|
]
|