create-walle 0.9.13 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +6 -1
  2. package/bin/create-walle.js +195 -30
  3. package/bin/mcp-inject.js +18 -53
  4. package/package.json +3 -1
  5. package/template/claude-task-manager/approval-agent.js +7 -0
  6. package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
  7. package/template/claude-task-manager/git-utils.js +111 -3
  8. package/template/claude-task-manager/lib/session-history.js +144 -16
  9. package/template/claude-task-manager/lib/session-standup.js +409 -0
  10. package/template/claude-task-manager/lib/standup-attention.js +200 -0
  11. package/template/claude-task-manager/lib/status-hooks.js +8 -2
  12. package/template/claude-task-manager/lib/update-telemetry.js +114 -0
  13. package/template/claude-task-manager/lib/walle-default-model.js +55 -0
  14. package/template/claude-task-manager/lib/walle-mcp-auto-config.js +62 -0
  15. package/template/claude-task-manager/lib/walle-supervisor.js +83 -19
  16. package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
  17. package/template/claude-task-manager/providers/codex-mcp.js +104 -0
  18. package/template/claude-task-manager/providers/index.js +2 -0
  19. package/template/claude-task-manager/public/css/setup.css +2 -1
  20. package/template/claude-task-manager/public/css/walle.css +5 -0
  21. package/template/claude-task-manager/public/index.html +1596 -283
  22. package/template/claude-task-manager/public/js/session-search-utils.js +171 -1
  23. package/template/claude-task-manager/public/js/setup.js +62 -19
  24. package/template/claude-task-manager/public/js/stream-view.js +55 -6
  25. package/template/claude-task-manager/public/js/walle-session.js +73 -16
  26. package/template/claude-task-manager/public/js/walle.js +34 -2
  27. package/template/claude-task-manager/server.js +780 -177
  28. package/template/claude-task-manager/session-integrity.js +58 -15
  29. package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
  30. package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
  31. package/template/package.json +1 -1
  32. package/template/wall-e/agent.js +36 -7
  33. package/template/wall-e/api-walle.js +72 -20
  34. package/template/wall-e/coding/stream-processor.js +22 -2
  35. package/template/wall-e/coding-orchestrator.js +26 -6
  36. package/template/wall-e/eval/agent-runner.js +16 -4
  37. package/template/wall-e/eval/benchmark-generator.js +21 -1
  38. package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
  39. package/template/wall-e/eval/codex-cli-baseline.js +633 -0
  40. package/template/wall-e/eval/eval-orchestrator.js +3 -3
  41. package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
  42. package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
  43. package/template/wall-e/lib/mcp-integration.js +220 -0
  44. package/template/wall-e/llm/ollama.js +47 -8
  45. package/template/wall-e/llm/ollama.plugin.json +1 -1
  46. package/template/wall-e/llm/tool-adapter.js +1 -0
  47. package/template/wall-e/loops/ingest.js +42 -8
  48. package/template/wall-e/mcp-server.js +272 -10
  49. package/template/wall-e/memory/ctm-session-context.js +910 -0
  50. package/template/wall-e/server.js +26 -1
  51. package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
  52. package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
  53. package/template/wall-e/skills/skill-planner.js +52 -3
  54. package/template/wall-e/tools/builtin-middleware.js +55 -2
  55. package/template/wall-e/tools/shell-policy.js +1 -1
  56. package/template/wall-e/tools/slack-owner.js +104 -0
  57. package/template/website/index.html +2 -2
  58. package/template/builder-journal.md +0 -17
@@ -18,11 +18,31 @@ function classifyCodingType(session) {
18
18
  return 'coding:generation';
19
19
  }
20
20
 
21
+ function isReplayableBenchmarkPrompt(prompt) {
22
+ const text = String(prompt || '').trim();
23
+ if (text.length < 25) return false;
24
+
25
+ // Session-mined prompts must be the user's task, not the assistant's first
26
+ // progress narration. Assistant prose turns the benchmark into "continue the
27
+ // previous assistant's work", which is not replayable from a fresh sandbox.
28
+ if (/^(i('|’)ll|i will|i can|i('|’)m going to|let me|sure[, ]|happy to help|i('|’)ll help|i('|’)ll start)\b/i.test(text)) {
29
+ return false;
30
+ }
31
+
32
+ if (/^\s*(go ahead|continue|proceed|do it|yes|yep|ok|okay|thanks|thank you)\b/i.test(text)) {
33
+ return false;
34
+ }
35
+
36
+ return /\b(fix|implement|add|change|update|refactor|test|debug|make|write|delete|remove|harden|wire|bug|failing|error|regression|feature|endpoint|api|ui|server|component|code review|review.*code)\b/i.test(text);
37
+ }
38
+
21
39
  /**
22
40
  * Convert a coding agent session object to a benchmark entry.
23
41
  * Returns null if the session would be a duplicate (id already in existingIds).
24
42
  */
25
43
  function sessionToBenchmark(session, existingIds = new Set()) {
44
+ if (!isReplayableBenchmarkPrompt(session.prompt)) return null;
45
+
26
46
  const id = `agent-session-${crypto.createHash('sha256').update(session.prompt || '').digest('hex').slice(0, 8)}`;
27
47
  if (existingIds.has(id)) return null; // dedup
28
48
 
@@ -90,4 +110,4 @@ async function generateBenchmarks(brain, { minSignificance = 0.5, limit = 50 } =
90
110
  return benchmarks;
91
111
  }
92
112
 
93
- module.exports = { classifyCodingType, sessionToBenchmark, generateBenchmarks };
113
+ module.exports = { classifyCodingType, isReplayableBenchmarkPrompt, sessionToBenchmark, generateBenchmarks };
@@ -1577,601 +1577,5 @@
1577
1577
  "expectedFileChanges": [],
1578
1578
  "projectFixture": "fullstack-app"
1579
1579
  }
1580
- },
1581
- {
1582
- "id": "agent-session-e8413ef1",
1583
- "prompt": "I'll start by exploring the project structure to understand the Express app setup.",
1584
- "taskType": "coding-agent",
1585
- "difficulty": "hard",
1586
- "expectedTraits": [
1587
- "reads before writing",
1588
- "uses edit over write"
1589
- ],
1590
- "agentExpectations": {
1591
- "expectedToolCalls": [
1592
- "list_directory",
1593
- "read_file",
1594
- "edit_file",
1595
- "run_shell"
1596
- ],
1597
- "maxTurns": 22,
1598
- "expectedFileChanges": [
1599
- "server.js",
1600
- "test.js"
1601
- ]
1602
- },
1603
- "sourceSessionId": "8a31c591-bbaf-403f-b82e-3ec093618584",
1604
- "classifiedType": "coding:generation",
1605
- "expectedDiff": null,
1606
- "complexityIndicator": 2
1607
- },
1608
- {
1609
- "id": "agent-session-02a0e2b9",
1610
- "prompt": "I'll help you rename the 'getData' function to 'fetchRecords' across the project. Let me start by exploring the project structure and finding all occurrences.",
1611
- "taskType": "coding-agent",
1612
- "difficulty": "hard",
1613
- "expectedTraits": [
1614
- "reads before writing",
1615
- "uses edit over write"
1616
- ],
1617
- "agentExpectations": {
1618
- "expectedToolCalls": [
1619
- "list_directory",
1620
- "read_file",
1621
- "grep_files",
1622
- "glob",
1623
- "ask_user",
1624
- "edit_file",
1625
- "run_shell"
1626
- ],
1627
- "maxTurns": 50,
1628
- "expectedFileChanges": [
1629
- "server.js",
1630
- "test.js"
1631
- ]
1632
- },
1633
- "sourceSessionId": "07b948cd-2324-442b-849b-ff88de26783d",
1634
- "classifiedType": "coding:refactoring",
1635
- "expectedDiff": null,
1636
- "complexityIndicator": 2
1637
- },
1638
- {
1639
- "id": "agent-session-a78f1dc7",
1640
- "prompt": "I'll help you find and fix the SQL injection vulnerability. Let me start by exploring the project structure.",
1641
- "taskType": "coding-agent",
1642
- "difficulty": "hard",
1643
- "expectedTraits": [
1644
- "reads before writing",
1645
- "uses edit over write"
1646
- ],
1647
- "agentExpectations": {
1648
- "expectedToolCalls": [
1649
- "list_directory",
1650
- "read_file",
1651
- "grep_files",
1652
- "edit_file",
1653
- "run_shell"
1654
- ],
1655
- "maxTurns": 30,
1656
- "expectedFileChanges": [
1657
- "server.js",
1658
- "test.js"
1659
- ]
1660
- },
1661
- "sourceSessionId": "dcce8928-0376-4058-b5fd-321f50c39783",
1662
- "classifiedType": "coding:debugging",
1663
- "expectedDiff": null,
1664
- "complexityIndicator": 2
1665
- },
1666
- {
1667
- "id": "agent-session-1348957e",
1668
- "prompt": "I'll help you add tests for both ascending and descending sort. Let me start by exploring the project structure to understand what we're working with.",
1669
- "taskType": "coding-agent",
1670
- "difficulty": "medium",
1671
- "expectedTraits": [
1672
- "reads before writing",
1673
- "uses edit over write"
1674
- ],
1675
- "agentExpectations": {
1676
- "expectedToolCalls": [
1677
- "list_directory",
1678
- "read_file",
1679
- "run_shell",
1680
- "ask_user",
1681
- "edit_file"
1682
- ],
1683
- "maxTurns": 20,
1684
- "expectedFileChanges": [
1685
- "server.js",
1686
- "test.js"
1687
- ]
1688
- },
1689
- "sourceSessionId": "31c2980b-95ce-4685-bdd5-46c92fe9aa36",
1690
- "classifiedType": "coding:testing",
1691
- "expectedDiff": null,
1692
- "complexityIndicator": 2
1693
- },
1694
- {
1695
- "id": "agent-session-f7edd420",
1696
- "prompt": "I'll start by exploring the project structure to understand the CLI tool setup.",
1697
- "taskType": "coding-agent",
1698
- "difficulty": "hard",
1699
- "expectedTraits": [
1700
- "reads before writing",
1701
- "uses edit over write"
1702
- ],
1703
- "agentExpectations": {
1704
- "expectedToolCalls": [
1705
- "list_directory",
1706
- "read_file",
1707
- "edit_file",
1708
- "run_shell"
1709
- ],
1710
- "maxTurns": 22,
1711
- "expectedFileChanges": [
1712
- "index.js",
1713
- "test.js"
1714
- ]
1715
- },
1716
- "sourceSessionId": "5719d4dd-fcdb-47b6-9874-60f4b9fbfb71",
1717
- "classifiedType": "coding:generation",
1718
- "expectedDiff": null,
1719
- "complexityIndicator": 2
1720
- },
1721
- {
1722
- "id": "agent-session-b36c5122",
1723
- "prompt": "I'll start by exploring the project structure to understand what we're working with.",
1724
- "taskType": "coding-agent",
1725
- "difficulty": "hard",
1726
- "expectedTraits": [
1727
- "reads before writing",
1728
- "uses edit over write",
1729
- "plans before executing"
1730
- ],
1731
- "agentExpectations": {
1732
- "expectedToolCalls": [
1733
- "list_directory",
1734
- "read_file",
1735
- "run_shell",
1736
- "update_todos",
1737
- "edit_file"
1738
- ],
1739
- "maxTurns": 36,
1740
- "expectedFileChanges": [
1741
- "server/db.js",
1742
- "server/index.js"
1743
- ]
1744
- },
1745
- "sourceSessionId": "f78fa371-7e29-4a52-8f4f-b992fdaccc0d",
1746
- "classifiedType": "coding:generation",
1747
- "expectedDiff": null,
1748
- "complexityIndicator": 2
1749
- },
1750
- {
1751
- "id": "agent-session-29b64889",
1752
- "prompt": "I'll start by exploring the project structure and understanding what tests exist.",
1753
- "taskType": "coding-agent",
1754
- "difficulty": "hard",
1755
- "expectedTraits": [
1756
- "reads before writing",
1757
- "uses edit over write"
1758
- ],
1759
- "agentExpectations": {
1760
- "expectedToolCalls": [
1761
- "list_directory",
1762
- "read_file",
1763
- "run_shell",
1764
- "grep_files",
1765
- "edit_file"
1766
- ],
1767
- "maxTurns": 50,
1768
- "expectedFileChanges": [
1769
- "server/test.js",
1770
- "server/db.js"
1771
- ]
1772
- },
1773
- "sourceSessionId": "25da8640-eaba-42a6-9eb2-a5831c2162fd",
1774
- "classifiedType": "coding:testing",
1775
- "expectedDiff": null,
1776
- "complexityIndicator": 2
1777
- },
1778
- {
1779
- "id": "agent-session-2494dd1d",
1780
- "prompt": "I'll help you extract inline SQL queries from route handlers into a separate db.js module. Let me start by exploring the project structure.",
1781
- "taskType": "coding-agent",
1782
- "difficulty": "hard",
1783
- "expectedTraits": [
1784
- "reads before writing",
1785
- "uses edit over write",
1786
- "plans before executing"
1787
- ],
1788
- "agentExpectations": {
1789
- "expectedToolCalls": [
1790
- "list_directory",
1791
- "read_file",
1792
- "run_shell",
1793
- "update_todos",
1794
- "lsp_references",
1795
- "grep_files",
1796
- "edit_file"
1797
- ],
1798
- "maxTurns": 50,
1799
- "expectedFileChanges": [
1800
- "server/db.js",
1801
- "server/routes/auth.js",
1802
- "server/routes/users.js",
1803
- "server/routes/items.js",
1804
- "server/routes/health.js",
1805
- "server/middleware/auth.js"
1806
- ]
1807
- },
1808
- "sourceSessionId": "08d10d2e-3e63-4354-bf05-8786388b2420",
1809
- "classifiedType": "coding:refactoring",
1810
- "expectedDiff": null,
1811
- "complexityIndicator": 6
1812
- },
1813
- {
1814
- "id": "agent-session-5fcb2df4",
1815
- "prompt": "I'll start by exploring the project structure to understand the fullstack app, then create the CI/CD pipeline and containerization files.",
1816
- "taskType": "coding-agent",
1817
- "difficulty": "hard",
1818
- "expectedTraits": [
1819
- "reads before writing"
1820
- ],
1821
- "agentExpectations": {
1822
- "expectedToolCalls": [
1823
- "list_directory",
1824
- "read_file",
1825
- "glob",
1826
- "write_file",
1827
- "edit_file",
1828
- "run_shell"
1829
- ],
1830
- "maxTurns": 50,
1831
- "expectedFileChanges": [
1832
- ".github/workflows/ci.yml",
1833
- "Dockerfile",
1834
- "docker-compose.yml",
1835
- ".dockerignore",
1836
- "config/default.js",
1837
- "DOCKER.md",
1838
- "CI_CD_SETUP.md",
1839
- "QUICKSTART.md",
1840
- "IMPLEMENTATION_SUMMARY.md",
1841
- "DEPLOYMENT_CHECKLIST.md",
1842
- "CHANGES.md",
1843
- "README_CI_CD.md"
1844
- ]
1845
- },
1846
- "sourceSessionId": "9055411d-427f-498b-b501-b7f695b22214",
1847
- "classifiedType": "coding:generation",
1848
- "expectedDiff": null,
1849
- "complexityIndicator": 12
1850
- },
1851
- {
1852
- "id": "agent-session-1c9213cc",
1853
- "prompt": "I'll start by exploring the project structure to understand the current implementation.",
1854
- "taskType": "coding-agent",
1855
- "difficulty": "hard",
1856
- "expectedTraits": [
1857
- "reads before writing",
1858
- "uses edit over write"
1859
- ],
1860
- "agentExpectations": {
1861
- "expectedToolCalls": [
1862
- "list_directory",
1863
- "read_file",
1864
- "edit_file",
1865
- "run_shell"
1866
- ],
1867
- "maxTurns": 50,
1868
- "expectedFileChanges": [
1869
- "server/routes/items.js",
1870
- "public/js/items.js",
1871
- "public/js/app.js",
1872
- "server/test.js"
1873
- ]
1874
- },
1875
- "sourceSessionId": "8a0a1609-9031-4bcc-86ff-e690fef3b290",
1876
- "classifiedType": "coding:generation",
1877
- "expectedDiff": null,
1878
- "complexityIndicator": 4
1879
- },
1880
- {
1881
- "id": "agent-session-63ada37e",
1882
- "prompt": "I'll help you rename the function 'processData' to 'transformPayload' across the entire project. Let me start by exploring the project structure and finding all usages.",
1883
- "taskType": "coding-agent",
1884
- "difficulty": "hard",
1885
- "expectedTraits": [
1886
- "reads before writing",
1887
- "uses edit over write"
1888
- ],
1889
- "agentExpectations": {
1890
- "expectedToolCalls": [
1891
- "list_directory",
1892
- "read_file",
1893
- "grep_files",
1894
- "run_shell",
1895
- "ask_user",
1896
- "edit_file",
1897
- "lsp_references"
1898
- ],
1899
- "maxTurns": 50,
1900
- "expectedFileChanges": [
1901
- "server.js",
1902
- "test.js"
1903
- ]
1904
- },
1905
- "sourceSessionId": "a7b388ad-58e9-48ea-9278-b817ba300726",
1906
- "classifiedType": "coding:refactoring",
1907
- "expectedDiff": null,
1908
- "complexityIndicator": 2
1909
- },
1910
- {
1911
- "id": "agent-session-3a3b9214",
1912
- "prompt": "I'll start by exploring the project structure to understand the Express app and then implement rate limiting and CSRF protection.",
1913
- "taskType": "coding-agent",
1914
- "difficulty": "hard",
1915
- "expectedTraits": [
1916
- "reads before writing",
1917
- "uses edit over write"
1918
- ],
1919
- "agentExpectations": {
1920
- "expectedToolCalls": [
1921
- "list_directory",
1922
- "read_file",
1923
- "edit_file",
1924
- "run_shell"
1925
- ],
1926
- "maxTurns": 36,
1927
- "expectedFileChanges": [
1928
- "package.json",
1929
- "server.js",
1930
- "test.js"
1931
- ]
1932
- },
1933
- "sourceSessionId": "4a7b82b6-88ce-4bb3-a41a-d5ba58f2c83b",
1934
- "classifiedType": "coding:generation",
1935
- "expectedDiff": null,
1936
- "complexityIndicator": 3
1937
- },
1938
- {
1939
- "id": "agent-session-c28e561a",
1940
- "prompt": "I'll help you add a test for pagination with 25 users. Let me start by exploring the project structure to understand the codebase.",
1941
- "taskType": "coding-agent",
1942
- "difficulty": "medium",
1943
- "expectedTraits": [
1944
- "reads before writing",
1945
- "uses edit over write"
1946
- ],
1947
- "agentExpectations": {
1948
- "expectedToolCalls": [
1949
- "list_directory",
1950
- "read_file",
1951
- "edit_file",
1952
- "run_shell"
1953
- ],
1954
- "maxTurns": 14,
1955
- "expectedFileChanges": [
1956
- "server.js",
1957
- "test.js"
1958
- ]
1959
- },
1960
- "sourceSessionId": "91ccf531-0e27-45be-880b-9e37fd8b8349",
1961
- "classifiedType": "coding:testing",
1962
- "expectedDiff": null,
1963
- "complexityIndicator": 2
1964
- },
1965
- {
1966
- "id": "agent-session-ecc4ff3f",
1967
- "prompt": "I'll start by exploring the project structure to understand the codebase and locate the relevant files.",
1968
- "taskType": "coding-agent",
1969
- "difficulty": "hard",
1970
- "expectedTraits": [
1971
- "reads before writing",
1972
- "uses edit over write"
1973
- ],
1974
- "agentExpectations": {
1975
- "expectedToolCalls": [
1976
- "list_directory",
1977
- "read_file",
1978
- "run_shell",
1979
- "edit_file"
1980
- ],
1981
- "maxTurns": 48,
1982
- "expectedFileChanges": [
1983
- "brain.js",
1984
- "test.js"
1985
- ]
1986
- },
1987
- "sourceSessionId": "aeae6b54-c335-4fe4-a903-3e6b440fbe66",
1988
- "classifiedType": "coding:generation",
1989
- "expectedDiff": null,
1990
- "complexityIndicator": 2
1991
- },
1992
- {
1993
- "id": "agent-session-a29571f9",
1994
- "prompt": "I'll help you add tests for both ascending and descending sort. Let me start by exploring the project structure.",
1995
- "taskType": "coding-agent",
1996
- "difficulty": "medium",
1997
- "expectedTraits": [
1998
- "reads before writing",
1999
- "uses edit over write"
2000
- ],
2001
- "agentExpectations": {
2002
- "expectedToolCalls": [
2003
- "list_directory",
2004
- "read_file",
2005
- "edit_file",
2006
- "run_shell"
2007
- ],
2008
- "maxTurns": 14,
2009
- "expectedFileChanges": [
2010
- "server.js",
2011
- "test.js"
2012
- ]
2013
- },
2014
- "sourceSessionId": "bdc47279-f629-465b-bc8e-eaa22cb65266",
2015
- "classifiedType": "coding:testing",
2016
- "expectedDiff": null,
2017
- "complexityIndicator": 2
2018
- },
2019
- {
2020
- "id": "agent-session-8abb12e4",
2021
- "prompt": "I'll start by exploring the project structure to understand the codebase.",
2022
- "taskType": "coding-agent",
2023
- "difficulty": "hard",
2024
- "expectedTraits": [
2025
- "reads before writing",
2026
- "uses edit over write"
2027
- ],
2028
- "agentExpectations": {
2029
- "expectedToolCalls": [
2030
- "list_directory",
2031
- "read_file",
2032
- "edit_file",
2033
- "run_shell"
2034
- ],
2035
- "maxTurns": 50,
2036
- "expectedFileChanges": [
2037
- "server/routes/items.js",
2038
- "public/js/items.js",
2039
- "public/js/app.js",
2040
- "public/css/style.css",
2041
- "server/test.js",
2042
- "server/index.js"
2043
- ]
2044
- },
2045
- "sourceSessionId": "d25075ab-ec95-4e1d-813d-79c5dbf5bd68",
2046
- "classifiedType": "coding:generation",
2047
- "expectedDiff": null,
2048
- "complexityIndicator": 6
2049
- },
2050
- {
2051
- "id": "agent-session-cfd12d6c",
2052
- "prompt": "I'll help you convert this monorepo from CommonJS to ES Modules. Let me start by exploring the project structure.",
2053
- "taskType": "coding-agent",
2054
- "difficulty": "hard",
2055
- "expectedTraits": [
2056
- "reads before writing",
2057
- "plans before executing"
2058
- ],
2059
- "agentExpectations": {
2060
- "expectedToolCalls": [
2061
- "list_directory",
2062
- "read_file",
2063
- "run_shell",
2064
- "update_todos",
2065
- "edit_file",
2066
- "write_file"
2067
- ],
2068
- "maxTurns": 50,
2069
- "expectedFileChanges": [
2070
- "package.json",
2071
- "packages/shared/package.json",
2072
- "packages/api/package.json",
2073
- "packages/cli/package.json",
2074
- "packages/shared/formatters.js",
2075
- "packages/shared/validators.js",
2076
- "packages/shared/index.js",
2077
- "packages/shared/test.js",
2078
- "packages/api/data.js",
2079
- "packages/api/middleware.js",
2080
- "packages/api/routes.js",
2081
- "packages/api/server.js",
2082
- "packages/api/test.js",
2083
- "packages/cli/output.js",
2084
- "packages/cli/commands.js",
2085
- "packages/cli/index.js",
2086
- "packages/cli/test.js",
2087
- "test.js"
2088
- ]
2089
- },
2090
- "sourceSessionId": "ab4c19bb-d588-43dd-994b-fdf05431e525",
2091
- "classifiedType": "coding:generation",
2092
- "expectedDiff": null,
2093
- "complexityIndicator": 18
2094
- },
2095
- {
2096
- "id": "agent-session-f21db69e",
2097
- "prompt": "I'll start by exploring the project structure to understand the Flask app setup.",
2098
- "taskType": "coding-agent",
2099
- "difficulty": "hard",
2100
- "expectedTraits": [
2101
- "reads before writing",
2102
- "uses edit over write"
2103
- ],
2104
- "agentExpectations": {
2105
- "expectedToolCalls": [
2106
- "list_directory",
2107
- "read_file",
2108
- "edit_file",
2109
- "run_shell"
2110
- ],
2111
- "maxTurns": 30,
2112
- "expectedFileChanges": [
2113
- "app.py",
2114
- "test_app.py"
2115
- ]
2116
- },
2117
- "sourceSessionId": "d44e8d0e-ef75-4f49-8340-2c4628858fe9",
2118
- "classifiedType": "coding:generation",
2119
- "expectedDiff": null,
2120
- "complexityIndicator": 2
2121
- },
2122
- {
2123
- "id": "agent-session-e8eb3472",
2124
- "prompt": "I'll help you add a test for pagination with 25 users. Let me start by exploring the project structure.",
2125
- "taskType": "coding-agent",
2126
- "difficulty": "medium",
2127
- "expectedTraits": [
2128
- "reads before writing",
2129
- "uses edit over write"
2130
- ],
2131
- "agentExpectations": {
2132
- "expectedToolCalls": [
2133
- "list_directory",
2134
- "read_file",
2135
- "edit_file",
2136
- "run_shell"
2137
- ],
2138
- "maxTurns": 14,
2139
- "expectedFileChanges": [
2140
- "server.js",
2141
- "test.js"
2142
- ]
2143
- },
2144
- "sourceSessionId": "991abf0f-4bb1-48fa-8aa8-ddd97d45d6f8",
2145
- "classifiedType": "coding:testing",
2146
- "expectedDiff": null,
2147
- "complexityIndicator": 2
2148
- },
2149
- {
2150
- "id": "agent-session-bd5f99de",
2151
- "prompt": "I'll start by exploring the project structure and understanding the codebase, then run diagnostics to find type-related issues.",
2152
- "taskType": "coding-agent",
2153
- "difficulty": "hard",
2154
- "expectedTraits": [
2155
- "reads before writing",
2156
- "uses edit over write"
2157
- ],
2158
- "agentExpectations": {
2159
- "expectedToolCalls": [
2160
- "list_directory",
2161
- "read_file",
2162
- "lsp_diagnostics",
2163
- "run_shell",
2164
- "edit_file"
2165
- ],
2166
- "maxTurns": 32,
2167
- "expectedFileChanges": [
2168
- "src/handlers.js",
2169
- "src/utils.js"
2170
- ]
2171
- },
2172
- "sourceSessionId": "abeceb68-cda7-4791-a201-b0d8e00bbb90",
2173
- "classifiedType": "coding:generation",
2174
- "expectedDiff": null,
2175
- "complexityIndicator": 2
2176
1580
  }
2177
1581
  ]