agent-harness 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 11c53accd50a5842f5f67a3ef1adb97e2d25539966e08cf5ed1a2659b047ce0b
4
- data.tar.gz: 562f0baad4bdc24dda2dcb7d65ef93d7657be1e64e2460400afc28e7ac419afa
3
+ metadata.gz: 0a5be3b23b73351341808a0f62fb6e060f226c309ceb508c378ad3d549273e61
4
+ data.tar.gz: aa6f754664cd08deeb36f8a54ec3531249a10c80bbe6561706a59db1191afe4f
5
5
  SHA512:
6
- metadata.gz: c9e7fb58eb6298e79f193b9de47c5fff995f92be7a9aff396277cf64489022d92030464b5cc08da506cc4496a4872dfc1fea1ca880f3d4cb82ad5133eaa65622
7
- data.tar.gz: 40ca98102aedafdefb12d00794b721b0eb04f376eb7695034ca840ce17582abfb0741cd689f5c1a27b516acffa85399f9b58550fb516fe8e9e5652c3bb01d8d0
6
+ metadata.gz: d28ce3a2ecc67df9f125c1bb82f51192811fd5cd8394252f9450ec746876d125d2487a65eef6bbf925bdb4438bd74e554213d9ac5aacbbca398357ba32ae1570
7
+ data.tar.gz: 63f514add56b1975e1d1a70f1962f8512a9af6eb4427ce893d4fd1c364bc054824fd5208198d61a6ecab0162cd4d382351bb09bcec7c4c0c7f63c5804ac007cd
@@ -1,3 +1,3 @@
1
1
  {
2
- ".": "0.6.0"
2
+ ".": "0.7.0"
3
3
  }
data/CHANGELOG.md CHANGED
@@ -1,5 +1,158 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.7.0](https://github.com/viamin/agent-harness/compare/agent-harness/v0.6.0...agent-harness/v0.7.0) (2026-04-13)
4
+
5
+
6
+ ### Features
7
+
8
+ * **copilot:** add JSON output parsing and token extraction ([4f5fc5a](https://github.com/viamin/agent-harness/commit/4f5fc5acd8d45ac8563998a132a0c4878f3b9e0a))
9
+ * **kilocode:** extract token usage from Kilo CLI structured JSON output ([b5384f8](https://github.com/viamin/agent-harness/commit/b5384f8be52431f95d8aa3524a33ceed6bf094eb)), closes [#97](https://github.com/viamin/agent-harness/issues/97)
10
+
11
+
12
+ ### Bug Fixes
13
+
14
+ * **copilot:** add nil guard for stdout and improve error string construction ([6a30ce3](https://github.com/viamin/agent-harness/commit/6a30ce342100b27c0b16fc8c2abdce48bbf10ef7))
15
+ * **copilot:** align error ordering with base parser ([0a02d34](https://github.com/viamin/agent-harness/commit/0a02d34cbf07e4f4ecb4d3efc6d69b5b072c6114))
16
+ * **copilot:** align metadata and reply parsing ([e5c3387](https://github.com/viamin/agent-harness/commit/e5c338743dbb5ec8eea5b1a8de5a515f1df7e141))
17
+ * **copilot:** avoid double-counting token aliases ([40e78f3](https://github.com/viamin/agent-harness/commit/40e78f34a6304a5ae21e26c6c291eed773618bea))
18
+ * **copilot:** avoid mixing shutdown token totals ([c4bdfb8](https://github.com/viamin/agent-harness/commit/c4bdfb8fd4c20781ef4621cf421947b14514cb45))
19
+ * **copilot:** drop superseded delta chunks ([769acd6](https://github.com/viamin/agent-harness/commit/769acd6a45f1037f11fc83ea23f5ddeda9aadd17))
20
+ * **copilot:** fall back across malformed token aliases ([9c9f5f8](https://github.com/viamin/agent-harness/commit/9c9f5f8048f63407b6ffc14fb339c26158f74dab))
21
+ * **copilot:** fall back from empty nested message content ([ecd9f49](https://github.com/viamin/agent-harness/commit/ecd9f497e0ef9812f2df363b02679b5842cf668c))
22
+ * **copilot:** fall back from empty shutdown metrics ([0397f1e](https://github.com/viamin/agent-harness/commit/0397f1e5f9442d0d6489c9fe7744d31a0ff48965))
23
+ * **copilot:** fall back from malformed nested message content ([a313487](https://github.com/viamin/agent-harness/commit/a313487f2c1bb9fbf5e7a60e5dc08e7a7079447c))
24
+ * **copilot:** fall back from malformed usage payloads ([733599c](https://github.com/viamin/agent-harness/commit/733599c599cda90873cec823c245b13c81f74ee6))
25
+ * **copilot:** gate json output by cli version ([528d03b](https://github.com/viamin/agent-harness/commit/528d03bed7506996cf9cfd6c4cf54807da254260))
26
+ * **copilot:** guard scalar json events ([13a4131](https://github.com/viamin/agent-harness/commit/13a413157cc159cfc4fd6b7e7ab7fdbb948d07b6))
27
+ * **copilot:** handle JSON event envelopes and camelCase token fields ([e0ee83e](https://github.com/viamin/agent-harness/commit/e0ee83ed73d715d9c67806b5253abab33cce9e19))
28
+ * **copilot:** hash unresolved probe path keys ([ea9aca2](https://github.com/viamin/agent-harness/commit/ea9aca215b000833be18bf035cba6c0bf029615d))
29
+ * **copilot:** hide structured control events from output ([81c108d](https://github.com/viamin/agent-harness/commit/81c108d521e8f72522dc1dc604cccdf46f0d01d4))
30
+ * **copilot:** ignore delta chunks after final reply ([6aef1ca](https://github.com/viamin/agent-harness/commit/6aef1ca4459a5588fc53b70692f95c6eff3b9d88))
31
+ * **copilot:** ignore empty delta chunks ([dca6395](https://github.com/viamin/agent-harness/commit/dca63951866a4dc65b2adca3b834d05a6716f298))
32
+ * **copilot:** ignore failed version probes ([fa6ba35](https://github.com/viamin/agent-harness/commit/fa6ba35e1e36f153f47bccf9c423cea7927176e7))
33
+ * **copilot:** ignore malformed delta content fallback ([0c9211b](https://github.com/viamin/agent-harness/commit/0c9211bb16e713f4b7185c1b9ccda5d065b9d405))
34
+ * **copilot:** ignore malformed token payloads ([0f2f06b](https://github.com/viamin/agent-harness/commit/0f2f06b9a8c0b4244c7b8040e5590cc6e4710143))
35
+ * **copilot:** ignore malformed typed json fallbacks ([0cd5535](https://github.com/viamin/agent-harness/commit/0cd553594aff277ce07abd3d151028ad22b3f597))
36
+ * **copilot:** ignore nested non-assistant fallback text ([799f976](https://github.com/viamin/agent-harness/commit/799f976ef19618acaae1a6e0ecf7faa76b9ff37f))
37
+ * **copilot:** ignore non-assistant top-level messages ([119c854](https://github.com/viamin/agent-harness/commit/119c8540e7db9f1827eac75e73403b638cf8db23))
38
+ * **copilot:** ignore non-assistant top-level token payloads ([23c05b9](https://github.com/viamin/agent-harness/commit/23c05b9dbc6231755783f4a8eefd5099067f9389))
39
+ * **copilot:** ignore partial invalid token aliases ([3344c07](https://github.com/viamin/agent-harness/commit/3344c078d291277b8831ab69fa5d2a40ca95135b))
40
+ * **copilot:** isolate probe cache for PATH overrides ([5fa79a9](https://github.com/viamin/agent-harness/commit/5fa79a91fd90dd33a15d5c6a0c25c2619b8f0ac3))
41
+ * **copilot:** keep delta output on empty final reply ([64fbf68](https://github.com/viamin/agent-harness/commit/64fbf68ffb66b58d7546bc1841c78fec86201acb))
42
+ * **copilot:** keep preflight errors inside base handler ([49c9075](https://github.com/viamin/agent-harness/commit/49c9075d4a1fb9ff42d633723cb5f375e8c2721c))
43
+ * **copilot:** merge partial shutdown token totals ([49d6b2b](https://github.com/viamin/agent-harness/commit/49d6b2b6727297a58e9aa265347c781405012aa0))
44
+ * **copilot:** merge top-level token fallbacks ([826c1f9](https://github.com/viamin/agent-harness/commit/826c1f9fd427b10c40fb34c5b47f4c9fb06f1e64))
45
+ * **copilot:** parse session shutdown token totals ([0148afd](https://github.com/viamin/agent-harness/commit/0148afd2f95312eae687799541545dd8ece185f3))
46
+ * **copilot:** parse streamed delta reply events ([55f553f](https://github.com/viamin/agent-harness/commit/55f553f39dbe729588641b86168f8c36500ec872))
47
+ * **copilot:** prefer final replies and trim probe cache keys ([38dc20e](https://github.com/viamin/agent-harness/commit/38dc20edac2326265c8f9d149394650b70c43e90))
48
+ * **copilot:** prefer final reply over delta chunks ([955654d](https://github.com/viamin/agent-harness/commit/955654db325cc47c8cf0ac992de55e574a45c641))
49
+ * **copilot:** prefer nested assistant message fallback ([15aa6db](https://github.com/viamin/agent-harness/commit/15aa6dbf4bb171eaaa5b85c552008953141ba622))
50
+ * **copilot:** prefer per-turn usage over shutdown totals ([38bd47c](https://github.com/viamin/agent-harness/commit/38bd47c00e5d74ef365a74dc61df9e2fdc5b9c04))
51
+ * **copilot:** prefer populated top-level usage payloads ([8b3aac0](https://github.com/viamin/agent-harness/commit/8b3aac029004f65efe26e8b3fc27f62ac2008dca))
52
+ * **copilot:** preserve blank mixed output lines ([e9830fc](https://github.com/viamin/agent-harness/commit/e9830fcfcceeb7fc67caac658ed16867e1f303d0))
53
+ * **copilot:** preserve empty nested message payloads ([edf27bc](https://github.com/viamin/agent-harness/commit/edf27bcafbfb29dc0f5baf54fcedb8e959e20bba))
54
+ * **copilot:** preserve empty top-level fallback payloads ([1863854](https://github.com/viamin/agent-harness/commit/1863854af8545573a6c9a80cc47297487ee6d2ac))
55
+ * **copilot:** preserve legitimate exit codes in responses ([d1a3cc0](https://github.com/viamin/agent-harness/commit/d1a3cc0f21ea1a45fee0dbf181462efc8b414fc8))
56
+ * **copilot:** preserve literal json stdout ([7d7862c](https://github.com/viamin/agent-harness/commit/7d7862c4da83ccf9bafbbca19b595911923edeed))
57
+ * **copilot:** preserve malformed top-level json output ([f7c5bec](https://github.com/viamin/agent-harness/commit/f7c5becaa01584c53f1327be0afbd2fcebe7f3e8))
58
+ * **copilot:** preserve malformed usage hashes ([0eef69b](https://github.com/viamin/agent-harness/commit/0eef69b470927412c405a774125ca0aa9a58e302))
59
+ * **copilot:** preserve mixed json and text output ([95de0f7](https://github.com/viamin/agent-harness/commit/95de0f72a2bd8a86d33ab0806fb1baf481e52d03))
60
+ * **copilot:** preserve mixed output line boundaries ([b285526](https://github.com/viamin/agent-harness/commit/b28552627192aa613c73c7bf2c8c8afed6811c36))
61
+ * **copilot:** preserve mixed plain-text output ([d277f3a](https://github.com/viamin/agent-harness/commit/d277f3a4ed2799182a2083989a0bdaf9960df16b))
62
+ * **copilot:** preserve non-event typed json output ([27d01c6](https://github.com/viamin/agent-harness/commit/27d01c649cab65ba2da206636f45170af5abbcc9))
63
+ * **copilot:** preserve scalar json stdout ([7c5e74c](https://github.com/viamin/agent-harness/commit/7c5e74cd9aed9de17f572b98faac9c09b8cd9707))
64
+ * **copilot:** preserve unknown typed json output ([942edac](https://github.com/viamin/agent-harness/commit/942edac7bc36d9e26a7c02945f15503ce7faeb73))
65
+ * **copilot:** preserve zero token aliases ([853d251](https://github.com/viamin/agent-harness/commit/853d251869adb3cc36bebc5b6d750a58c70843f7))
66
+ * **copilot:** probe json support per request env ([da94082](https://github.com/viamin/agent-harness/commit/da94082a79a044eb89b470b26eeda29196e7ac95))
67
+ * **copilot:** reject invalid token counts ([106c386](https://github.com/viamin/agent-harness/commit/106c3867d024751e77aa0fc215c651de87230a13))
68
+ * **copilot:** restore reply token fallback ([c9d7182](https://github.com/viamin/agent-harness/commit/c9d71820b0becc80e44a77169c2990be20469be2))
69
+ * **copilot:** restrict token accumulation to usage event types only ([80c979b](https://github.com/viamin/agent-harness/commit/80c979b9952defa05923920a737bb74e1c8885e4))
70
+ * **copilot:** skip blank assistant boundaries ([b9dec9a](https://github.com/viamin/agent-harness/commit/b9dec9af0c6084e1809dc1fb1c50535b987037a1))
71
+ * **copilot:** skip json parsing in legacy mode ([af9ac12](https://github.com/viamin/agent-harness/commit/af9ac1253aa4246c92727bc04f23d6704ec8926e))
72
+ * **copilot:** store probe env per thread ([301f2aa](https://github.com/viamin/agent-harness/commit/301f2aa4c43b183dea8450d21eb7fec799e92ec6))
73
+ * **copilot:** stub json support in parser specs ([96a945e](https://github.com/viamin/agent-harness/commit/96a945ec1572e10231b696008c7514d1acb92c11))
74
+ * **copilot:** sum reply token fallback ([82c5097](https://github.com/viamin/agent-harness/commit/82c5097b73f17e64cd3015c34aa92bfd713c0428))
75
+ * **copilot:** support snake_case delta chunks ([d33ecbc](https://github.com/viamin/agent-harness/commit/d33ecbcc446d895d35f03a7513ad7b16f0f57b1d))
76
+ * **copilot:** support snake_case shutdown metrics ([8c6e93e](https://github.com/viamin/agent-harness/commit/8c6e93e70e0927c95c63811b247291faaceacab0))
77
+ * **copilot:** suppress additional control events ([b14b8bb](https://github.com/viamin/agent-harness/commit/b14b8bbebb2fcc0c975788c1168965bee3281a79))
78
+ * **copilot:** suppress control event namespaces ([7b31bb2](https://github.com/viamin/agent-harness/commit/7b31bb293fb858808e493a23e268d89a590741a0))
79
+ * **copilot:** suppress root control events ([5d4d3ec](https://github.com/viamin/agent-harness/commit/5d4d3ecb80864ad4d20e0da1083ac02d90773fbf))
80
+ * **copilot:** update output_format metadata and add missing parse_response tests ([7fdedde](https://github.com/viamin/agent-harness/commit/7fdeddee3bbe5eca96f29f5613a5c36189695ec4))
81
+ * **kilocode:** aggregate token counts across multiple step_finish events ([23c8c55](https://github.com/viamin/agent-harness/commit/23c8c55def0452de1e0b25765c4f6d1fcd3474d4))
82
+ * **kilocode:** avoid raw ndjson in structured failures ([932e56e](https://github.com/viamin/agent-harness/commit/932e56e06567095750a7bcf15bf5adea065eab44))
83
+ * **kilocode:** clear stale extra usage categories ([2ff7079](https://github.com/viamin/agent-harness/commit/2ff70795a9e3397d5844de92c47a59b49a599426))
84
+ * **kilocode:** count extra result usage tokens ([f65dd3d](https://github.com/viamin/agent-harness/commit/f65dd3d8e4f54174b18233ddd55dba2d3c3fd1f3))
85
+ * **kilocode:** count reasoning and cache step tokens ([c46d089](https://github.com/viamin/agent-harness/commit/c46d089d4d0492534d98736dcdcb1ff82a67d0c1))
86
+ * **kilocode:** fail on structured error events ([48f5378](https://github.com/viamin/agent-harness/commit/48f5378f37cf64af28238c67314a01a3758aab05))
87
+ * **kilocode:** fall back to result text after blank chunks ([7508c58](https://github.com/viamin/agent-harness/commit/7508c58a84f365a680578891dd582a6f13484f1e))
88
+ * **kilocode:** fall back to step token totals when usage is incomplete ([8534691](https://github.com/viamin/agent-harness/commit/8534691ff74ba4efa1ae5d50de329bc53e863bcf))
89
+ * **kilocode:** fall through blank part message aliases ([85db058](https://github.com/viamin/agent-harness/commit/85db05824684cde93755c4d87323bce79e0a1dd7))
90
+ * **kilocode:** fall through blank part text chunks ([417a4c8](https://github.com/viamin/agent-harness/commit/417a4c8005f23c28f5a68a834890614ca49c5dca))
91
+ * **kilocode:** fall through blank text aliases ([63ed6a1](https://github.com/viamin/agent-harness/commit/63ed6a1bb002329ab823a8844e0ac81ef065938b))
92
+ * **kilocode:** guard malformed structured output payloads ([30a59a2](https://github.com/viamin/agent-harness/commit/30a59a21d3b4012cec4dbfa27b5471b9dee329bd))
93
+ * **kilocode:** guard scalar structured error payloads ([26843a9](https://github.com/viamin/agent-harness/commit/26843a90019d3f29f15eb363d098f0bba0f00582))
94
+ * **kilocode:** guard step_finish part.tokens against non-Hash values ([ff2d841](https://github.com/viamin/agent-harness/commit/ff2d8414eb928bd679d9754a71d0849963298a4b))
95
+ * **kilocode:** honor explicit usage totals ([d1e5275](https://github.com/viamin/agent-harness/commit/d1e5275f3515ae40db4b523e0efe2ce8a3d169a1))
96
+ * **kilocode:** honor later explicit total alias updates ([496df42](https://github.com/viamin/agent-harness/commit/496df425a8a5e76e97134dea8b1f74a0067aad1b))
97
+ * **kilocode:** honor synthesized result usage totals ([b0779a7](https://github.com/viamin/agent-harness/commit/b0779a78400c1a3b6aa38610582c5cf96446b28e))
98
+ * **kilocode:** honor valid total fallback aliases ([d0ae122](https://github.com/viamin/agent-harness/commit/d0ae12245be3e606d0919bf78dcc858525c64036))
99
+ * **kilocode:** ignore blank terminal result strings ([ebb34b6](https://github.com/viamin/agent-harness/commit/ebb34b663c7c0a933aedc17efba444263a891b80))
100
+ * **kilocode:** ignore negative token counts ([aee8d1e](https://github.com/viamin/agent-harness/commit/aee8d1e35e438ae25bf3a37571d273d891b89b24))
101
+ * **kilocode:** ignore non-string text payloads ([4a35a6f](https://github.com/viamin/agent-harness/commit/4a35a6fb1555923b3d1555bc895051bbaa7db26c))
102
+ * **kilocode:** ignore scalar json fallback noise ([1f62463](https://github.com/viamin/agent-harness/commit/1f624631477557ae533d605ecf37a5fb40c39544))
103
+ * **kilocode:** ignore usage on non-usage events ([b23ac10](https://github.com/viamin/agent-harness/commit/b23ac10c0d4107e05a50a2dc83204c0088bddbbb))
104
+ * **kilocode:** ignore whitespace-only text alias placeholders ([62bb641](https://github.com/viamin/agent-harness/commit/62bb641a7b7ac5434b2e905c6b297b2abf989020))
105
+ * **kilocode:** ignore whitespace-only text chunks ([699cccc](https://github.com/viamin/agent-harness/commit/699cccc1025b83a3850b7ed2670ff60de39e6adf))
106
+ * **kilocode:** keep last usable structured usage payload ([1b9d9bd](https://github.com/viamin/agent-harness/commit/1b9d9bd7ebbaa0ae9ed6dd7379f9b8c67de19025))
107
+ * **kilocode:** keep stdout diagnostics with structured errors ([76446fc](https://github.com/viamin/agent-harness/commit/76446fc8a95c89c9944b47db794ddb776e29686f))
108
+ * **kilocode:** merge partial structured usage events ([2de37e2](https://github.com/viamin/agent-harness/commit/2de37e246317650f60a0399c2f731f03ecf28ec9))
109
+ * **kilocode:** normalize malformed token counts ([ef4c3dc](https://github.com/viamin/agent-harness/commit/ef4c3dce783e71aae1b60c543da59f64b4efdb6c))
110
+ * **kilocode:** parse hash-shaped structured error aliases ([431f8c4](https://github.com/viamin/agent-harness/commit/431f8c438d7b13bf45d3416e947d3ad34b8b4eca))
111
+ * **kilocode:** parse NDJSON event stream instead of single JSON object ([4e1252f](https://github.com/viamin/agent-harness/commit/4e1252fc384c51d118b43647a7026281927b6794))
112
+ * **kilocode:** parse nested part error messages ([8d49794](https://github.com/viamin/agent-harness/commit/8d49794f79bbe553cfe8f569867508bb80a86805))
113
+ * **kilocode:** parse nested structured error messages ([b9fee6a](https://github.com/viamin/agent-harness/commit/b9fee6a8814045f0c5329bac23b0f0b424b17566))
114
+ * **kilocode:** parse token usage from step_finish.part.tokens ([bbf5a58](https://github.com/viamin/agent-harness/commit/bbf5a58fc6bc631a483a3e71bc0d8b99744e9f7b))
115
+ * **kilocode:** pass legitimate_exit_codes in Response metadata ([dac77c2](https://github.com/viamin/agent-harness/commit/dac77c24711bf717b7d98d847a21d3922889026b))
116
+ * **kilocode:** preserve base error stream ordering ([5281d78](https://github.com/viamin/agent-harness/commit/5281d7875c54cd010364b251069d921c4c5e4838))
117
+ * **kilocode:** preserve extra usage totals without io counts ([b4fb265](https://github.com/viamin/agent-harness/commit/b4fb2657aa56ad42e514a42dd0176742a48fdbc4))
118
+ * **kilocode:** preserve json array fallback output ([16cb566](https://github.com/viamin/agent-harness/commit/16cb5668e1442a9232ca5fcc38d0b196eb673956))
119
+ * **kilocode:** preserve mixed structured failure diagnostics ([5ec3be7](https://github.com/viamin/agent-harness/commit/5ec3be7ff143803c8e9602a1d76b6c7c4c0beb12))
120
+ * **kilocode:** preserve mixed structured success output ([ea64e2e](https://github.com/viamin/agent-harness/commit/ea64e2ea7154c15193ce35c115874bb898dcdc84))
121
+ * **kilocode:** preserve provider token totals ([b62d749](https://github.com/viamin/agent-harness/commit/b62d7499070aa5233c56b7207b6a2aede97a7dfc))
122
+ * **kilocode:** preserve raw mixed stdout spacing ([881c51e](https://github.com/viamin/agent-harness/commit/881c51e9a88705e0d8e28e06cb7fd7381f55feed))
123
+ * **kilocode:** preserve raw output for non-event json ([ce9be0f](https://github.com/viamin/agent-harness/commit/ce9be0f5f8d807dda995202fbe2dc26cddee32b2))
124
+ * **kilocode:** preserve step extras with partial usage ([14eeddb](https://github.com/viamin/agent-harness/commit/14eeddb6ec9e056c8b20ee9251e42a26de260079))
125
+ * **kilocode:** preserve step token totals for partial usage ([5efe2f4](https://github.com/viamin/agent-harness/commit/5efe2f4d3804f52cfae1b6ee63da87d3459a8c0f))
126
+ * **kilocode:** preserve terminal result payload spacing ([977faa7](https://github.com/viamin/agent-harness/commit/977faa7eed454c6070503589cc995f3021d19476))
127
+ * **kilocode:** preserve terminal result text ([282ae35](https://github.com/viamin/agent-harness/commit/282ae35051b715f0210534f31adb87152c75b6b7))
128
+ * **kilocode:** preserve terminal result text across result events ([5652453](https://github.com/viamin/agent-harness/commit/56524531434d9d7c71f7af3acd17d25dbf9656ca))
129
+ * **kilocode:** preserve unreconstructable step totals ([a906ee9](https://github.com/viamin/agent-harness/commit/a906ee951d6dfa5aa74e149f2e41fa214ffad2cf))
130
+ * **kilocode:** preserve unreconstructable totals with result extras ([7585e29](https://github.com/viamin/agent-harness/commit/7585e29d0eb6668f48b6db3ab033272b74d4831f))
131
+ * **kilocode:** preserve whitespace in text alias chunks ([a6592c1](https://github.com/viamin/agent-harness/commit/a6592c12e753a91cdd8f6c0041100c2a0c01d89a))
132
+ * **kilocode:** read terminal result text from hash payloads ([2ac7012](https://github.com/viamin/agent-harness/commit/2ac701278133e2f0fee64a7cb33f854ecff0754b))
133
+ * **kilocode:** recompute totals from updated usage ([aafabfd](https://github.com/viamin/agent-harness/commit/aafabfd972c56e35bcaa36eb92b1658cb6579a6c))
134
+ * **kilocode:** reject fractional token counts ([935a41d](https://github.com/viamin/agent-harness/commit/935a41d327f7d9433368db40da93ab1983e14cbc))
135
+ * **kilocode:** reject non-decimal string token counts ([0be6ddf](https://github.com/viamin/agent-harness/commit/0be6ddf31a306face9b61498d1a77d08a94a02a5))
136
+ * **kilocode:** remove stray binstubs and add missing test coverage ([25287f7](https://github.com/viamin/agent-harness/commit/25287f736c8bd81820553fc5ca2517e1c64f261c))
137
+ * **kilocode:** replace stale partial extra usage fields ([4b89027](https://github.com/viamin/agent-harness/commit/4b8902768620ffb5787e194753b14f6eb7c496d7))
138
+ * **kilocode:** skip scalar JSON lines before reading event fields ([b8fce27](https://github.com/viamin/agent-harness/commit/b8fce27bceb998a6a4b2a3de7ed95f96a2794c3d))
139
+ * **kilocode:** support hash-shaped part text aliases ([778e25c](https://github.com/viamin/agent-harness/commit/778e25c62cf32bafd1c0fb4bae32b2ea13cf1df9))
140
+ * **kilocode:** support nested result message aliases ([e8a5988](https://github.com/viamin/agent-harness/commit/e8a5988e34dfb1afd70eccb551985fa7e273192e))
141
+ * **kilocode:** support result text aliases ([63ccff9](https://github.com/viamin/agent-harness/commit/63ccff9be0c32ffcfd04e3745a441d6d2370eeac))
142
+ * **kilocode:** support scalar part structured errors ([2dd4740](https://github.com/viamin/agent-harness/commit/2dd47408f39340b075815ce64ac02c588fc445de))
143
+ * **kilocode:** support scalar text part payloads ([b9e9013](https://github.com/viamin/agent-harness/commit/b9e90137e6b43fb2eb789857da319221203a7f76))
144
+ * **kilocode:** support text event alias payloads ([4c425ff](https://github.com/viamin/agent-harness/commit/4c425fff93dcd872e2640743cb21da9668f952b7))
145
+ * **kilocode:** support top-level result text aliases ([9ce7c02](https://github.com/viamin/agent-harness/commit/9ce7c0249acaf3b44731271fc8b50d8fbb14cedc))
146
+ * **kilocode:** support top-level structured error text ([cad1d31](https://github.com/viamin/agent-harness/commit/cad1d31007d79b75daecb61b4f1ed8573f9e70a0))
147
+ * **kilocode:** suppress raw ndjson output for structured events ([d7a07d7](https://github.com/viamin/agent-harness/commit/d7a07d70812b40a75f6a09924d1fd609e99a61df))
148
+ * **kilocode:** treat missing usage tokens as unknown ([5e6335f](https://github.com/viamin/agent-harness/commit/5e6335f901e8f197de9c8f4df5727f90fc06ee43))
149
+ * **kilocode:** whitelist structured event types ([d6d8a7d](https://github.com/viamin/agent-harness/commit/d6d8a7d5bf6268050bf2083494360debeec80fab))
150
+
151
+
152
+ ### Improvements
153
+
154
+ * **kilocode:** remove unreachable structured error branch ([3d7ec3d](https://github.com/viamin/agent-harness/commit/3d7ec3d94e7be6b6f2c2c92d0daf0ce0590c4067))
155
+
3
156
  ## [0.6.0](https://github.com/viamin/agent-harness/compare/agent-harness/v0.5.9...agent-harness/v0.6.0) (2026-04-12)
4
157
 
5
158
 
@@ -1,11 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "digest"
4
+ require "json"
5
+
3
6
  module AgentHarness
4
7
  module Providers
5
8
  # GitHub Copilot CLI provider
6
9
  #
7
10
  # Provides integration with the GitHub Copilot CLI tool.
8
11
  class GithubCopilot < Base
12
+ MIN_JSON_OUTPUT_VERSION = Gem::Version.new("0.0.422").freeze
13
+ REQUEST_PROBE_ENV_STACK_KEY = :agent_harness_github_copilot_request_probe_env_stack
14
+
9
15
  # Model name pattern for GitHub Copilot (uses OpenAI models)
10
16
  MODEL_PATTERN = /^gpt-[\d.o-]+(?:-turbo)?(?:-mini)?$/i
11
17
 
@@ -144,10 +150,16 @@ module AgentHarness
144
150
  :oauth
145
151
  end
146
152
 
153
+ def send_message(prompt:, **options)
154
+ with_request_probe_env(request_probe_env_from_raw_runtime(options[:provider_runtime])) do
155
+ super(prompt: prompt, **options)
156
+ end
157
+ end
158
+
147
159
  def execution_semantics
148
160
  {
149
161
  prompt_delivery: :arg,
150
- output_format: :text,
162
+ output_format: copilot_cli_supports_json_output? ? :json : :text,
151
163
  sandbox_aware: false,
152
164
  uses_subcommand: true,
153
165
  non_interactive_flag: nil,
@@ -186,6 +198,7 @@ module AgentHarness
186
198
 
187
199
  def build_command(prompt, options)
188
200
  cmd = [self.class.binary_name, "what-the-shell", prompt]
201
+ cmd += ["--output-format", "json"] if copilot_cli_supports_json_output?
189
202
 
190
203
  # Opt in to unrestricted tool access explicitly to preserve a safe default.
191
204
  if supports_dangerous_mode? && options[:dangerous_mode]
@@ -203,6 +216,586 @@ module AgentHarness
203
216
  def default_timeout
204
217
  300
205
218
  end
219
+
220
+ def parse_response(result, duration:)
221
+ return super unless copilot_cli_supports_json_output?
222
+
223
+ output = result.stdout.to_s
224
+ error = nil
225
+
226
+ legitimate = execution_semantics[:legitimate_exit_codes] || [0]
227
+ unless legitimate.include?(result.exit_code)
228
+ combined = [result.stderr.to_s, output].map(&:strip).reject(&:empty?).join("\n")
229
+ error = combined unless combined.empty?
230
+ end
231
+
232
+ structured_json_seen = false
233
+ shutdown_tokens = empty_token_totals
234
+ usage_tokens = empty_token_totals
235
+ fallback_tokens = empty_token_totals
236
+ output_segments = []
237
+ authoritative_reply_seen = false
238
+ output.lines.each do |line|
239
+ stripped_line = line.strip
240
+ if stripped_line.empty?
241
+ output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
242
+ next
243
+ end
244
+ begin
245
+ obj = JSON.parse(stripped_line)
246
+ rescue JSON::ParserError
247
+ output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
248
+ next
249
+ end
250
+
251
+ structured_json_seen ||= obj.is_a?(Hash)
252
+
253
+ text, text_kind = extract_event_text(obj)
254
+ if text
255
+ if text_kind == :assistant_delta
256
+ next if authoritative_reply_seen
257
+
258
+ append_delta_segment!(output_segments, text, terminated: line.end_with?("\n"))
259
+ elsif !text.empty?
260
+ replace_assistant_segments!(output_segments, text, terminated: line.end_with?("\n"))
261
+ authoritative_reply_seen = true
262
+ end
263
+ elsif preserve_raw_json_line?(obj) || !obj.is_a?(Hash)
264
+ output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
265
+ end
266
+
267
+ token_usage = extract_token_usage(obj)
268
+ next unless token_usage
269
+
270
+ if token_usage[:source] == :shutdown
271
+ accumulate_token_totals!(shutdown_tokens, token_usage)
272
+ elsif token_usage[:source] == :usage
273
+ accumulate_token_totals!(usage_tokens, token_usage)
274
+ else
275
+ accumulate_token_totals!(fallback_tokens, token_usage)
276
+ end
277
+ end
278
+ tokens = build_tokens(shutdown_tokens: shutdown_tokens, usage_tokens: usage_tokens, fallback_tokens: fallback_tokens)
279
+ final_output = structured_json_seen ? render_output_segments(output_segments) : output
280
+
281
+ Response.new(
282
+ output: final_output,
283
+ exit_code: result.exit_code,
284
+ duration: duration,
285
+ provider: self.class.provider_name,
286
+ model: @config.model,
287
+ tokens: tokens,
288
+ error: error,
289
+ metadata: {
290
+ legitimate_exit_codes: legitimate
291
+ }
292
+ )
293
+ end
294
+
295
+ ASSISTANT_OUTPUT_EVENT_TYPES = %w[assistant assistant.message assistant.message_delta].freeze
296
+ ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES = %w[assistant assistant.message].freeze
297
+ SESSION_SHUTDOWN_EVENT_TYPES = ["session.shutdown"].freeze
298
+ USAGE_EVENT_TYPES = %w[usage assistant.usage].freeze
299
+ COPILOT_EVENT_TYPE_PREFIXES = %w[
300
+ assistant.
301
+ user.
302
+ user_input.
303
+ system.
304
+ session.
305
+ tool.
306
+ permission.
307
+ elicitation.
308
+ exit_plan_mode.
309
+ skill.
310
+ subagent.
311
+ external_tool.
312
+ command.
313
+ ].freeze
314
+ COPILOT_EVENT_TYPES = %w[
315
+ abort
316
+ command
317
+ elicitation
318
+ exit_plan_mode
319
+ external_tool
320
+ permission
321
+ session
322
+ skill
323
+ subagent
324
+ system
325
+ tool
326
+ user
327
+ user_input
328
+ ].freeze
329
+
330
+ def extract_event_text(obj)
331
+ return [nil, nil] unless obj.is_a?(Hash)
332
+
333
+ if obj.key?("type")
334
+ return [nil, nil] unless obj["data"].is_a?(Hash)
335
+ return [nil, nil] unless ASSISTANT_OUTPUT_EVENT_TYPES.include?(obj["type"])
336
+
337
+ data = obj["data"]
338
+ if obj["type"] == "assistant.message_delta"
339
+ delta_content = string_content(data["deltaContent"])
340
+ delta_content = string_content(data["delta_content"]) if delta_content.nil? || delta_content.empty?
341
+ return [delta_content, :assistant_delta] if delta_content && !delta_content.empty?
342
+
343
+ return [nil, nil]
344
+ end
345
+
346
+ return [string_content(data["content"]), :assistant] if data.key?("content")
347
+
348
+ return [nil, nil]
349
+ end
350
+
351
+ return [nil, nil] if obj.key?("role") && !assistant_role?(obj["role"])
352
+ return [nil, nil] if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
353
+ !assistant_role?(obj["message"]["role"])
354
+
355
+ if obj["message"].is_a?(Hash) && obj["message"].key?("content")
356
+ nested_content = string_content(obj["message"]["content"])
357
+ return [nested_content, :assistant] if nested_content && !nested_content.empty?
358
+ end
359
+
360
+ output = string_content(obj["output"])
361
+ return [output, :assistant] if output && !output.empty?
362
+
363
+ content = string_content(obj["content"])
364
+ return [content, :assistant] if content && !content.empty?
365
+
366
+ [nil, nil]
367
+ end
368
+
369
+ def string_content(value)
370
+ return value if value.is_a?(String)
371
+
372
+ nil
373
+ end
374
+
375
+ def preserve_raw_json_line?(obj)
376
+ return false unless obj.is_a?(Hash)
377
+ return false if obj.key?("type") && copilot_event_type?(obj["type"])
378
+ return true if obj.key?("type")
379
+ return false if obj.key?("role") && !assistant_role?(obj["role"])
380
+ return false if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
381
+ !assistant_role?(obj["message"]["role"])
382
+ return false if extract_token_usage(obj)
383
+ return false if (output = string_content(obj["output"])) && !output.empty?
384
+ return false if (content = string_content(obj["content"])) && !content.empty?
385
+ return false if obj["message"].is_a?(Hash) &&
386
+ (message_content = string_content(obj["message"]["content"])) &&
387
+ !message_content.empty?
388
+
389
+ true
390
+ end
391
+
392
+ def assistant_role?(role)
393
+ role == "assistant"
394
+ end
395
+
396
+ def copilot_event_type?(event_type)
397
+ return true if ASSISTANT_OUTPUT_EVENT_TYPES.include?(event_type)
398
+ return true if ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES.include?(event_type)
399
+ return true if SESSION_SHUTDOWN_EVENT_TYPES.include?(event_type)
400
+ return true if USAGE_EVENT_TYPES.include?(event_type)
401
+ return false unless event_type.is_a?(String)
402
+ return true if COPILOT_EVENT_TYPES.include?(event_type)
403
+
404
+ COPILOT_EVENT_TYPE_PREFIXES.any? { |prefix| event_type.start_with?(prefix) }
405
+ end
406
+
407
+ def extract_token_usage(obj)
408
+ return nil unless obj.is_a?(Hash)
409
+
410
+ if obj.key?("type")
411
+ return nil unless obj["data"].is_a?(Hash)
412
+
413
+ data = obj["data"]
414
+
415
+ if SESSION_SHUTDOWN_EVENT_TYPES.include?(obj["type"])
416
+ return extract_shutdown_token_usage(data)
417
+ end
418
+
419
+ if USAGE_EVENT_TYPES.include?(obj["type"])
420
+ return extract_payload_token_usage(
421
+ data,
422
+ source: :usage,
423
+ input_keys: ["inputTokens", "input_tokens"],
424
+ output_keys: ["outputTokens", "output_tokens"]
425
+ )
426
+ end
427
+
428
+ if ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES.include?(obj["type"])
429
+ return extract_payload_token_usage(
430
+ data,
431
+ source: :assistant,
432
+ input_keys: ["inputTokens", "input_tokens"],
433
+ output_keys: ["outputTokens", "output_tokens"]
434
+ )
435
+ end
436
+
437
+ return nil
438
+ end
439
+
440
+ extract_top_level_token_usage(obj)
441
+ end
442
+
443
+ def extract_shutdown_token_usage(data)
444
+ model_metrics = extract_shutdown_model_metrics_usage(data["modelMetrics"])
445
+ snake_case_model_metrics = extract_shutdown_model_metrics_usage(data["model_metrics"])
446
+
447
+ input, input_present = merged_token_metric(model_metrics, snake_case_model_metrics, :input)
448
+ output, output_present = merged_token_metric(model_metrics, snake_case_model_metrics, :output)
449
+ return nil unless input_present || output_present
450
+
451
+ {
452
+ source: :shutdown,
453
+ input: input,
454
+ output: output,
455
+ input_present: input_present,
456
+ output_present: output_present
457
+ }
458
+ end
459
+
460
+ def extract_shutdown_model_metrics_usage(model_metrics)
461
+ return nil unless model_metrics.is_a?(Hash)
462
+
463
+ totals = empty_token_totals
464
+
465
+ model_metrics.each_value do |metric|
466
+ next unless metric.is_a?(Hash)
467
+
468
+ usage = metric["usage"]
469
+ next unless usage.is_a?(Hash)
470
+
471
+ metric_usage = extract_payload_token_usage(
472
+ usage,
473
+ source: :shutdown,
474
+ input_keys: ["inputTokens", "input_tokens", "input"],
475
+ output_keys: ["outputTokens", "output_tokens", "output"]
476
+ )
477
+ next unless metric_usage
478
+
479
+ accumulate_token_totals!(totals, metric_usage)
480
+ end
481
+
482
+ return nil unless totals[:input_present] || totals[:output_present]
483
+
484
+ totals
485
+ end
486
+
487
+ def extract_payload_token_usage(payload, source:, input_keys:, output_keys:)
488
+ return nil unless payload.is_a?(Hash)
489
+
490
+ input, input_present = token_value(payload, *input_keys)
491
+ output, output_present = token_value(payload, *output_keys)
492
+ return nil unless input_present || output_present
493
+
494
+ {
495
+ source: source,
496
+ input: input,
497
+ output: output,
498
+ input_present: input_present,
499
+ output_present: output_present
500
+ }
501
+ end
502
+
503
+ def extract_top_level_token_usage(obj)
504
+ return nil if obj.key?("role") && !assistant_role?(obj["role"])
505
+ return nil if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
506
+ !assistant_role?(obj["message"]["role"])
507
+
508
+ usage = extract_payload_token_usage(
509
+ obj["usage"],
510
+ source: :usage,
511
+ input_keys: ["input_tokens", "inputTokens", "input"],
512
+ output_keys: ["output_tokens", "outputTokens", "output"]
513
+ )
514
+ tokens = extract_payload_token_usage(
515
+ obj["tokens"],
516
+ source: :usage,
517
+ input_keys: ["input_tokens", "inputTokens", "input"],
518
+ output_keys: ["output_tokens", "outputTokens", "output"]
519
+ )
520
+ return nil unless usage || tokens
521
+
522
+ input, input_present = merged_token_metric(usage, tokens, :input)
523
+ output, output_present = merged_token_metric(usage, tokens, :output)
524
+ return nil unless input_present || output_present
525
+
526
+ {
527
+ source: :usage,
528
+ input: input,
529
+ output: output,
530
+ input_present: input_present,
531
+ output_present: output_present
532
+ }
533
+ end
534
+
535
+ def merged_token_metric(primary, fallback, metric)
536
+ present_key = :"#{metric}_present"
537
+ return [primary[metric], true] if primary&.[](present_key)
538
+ return [fallback[metric], true] if fallback&.[](present_key)
539
+
540
+ [0, false]
541
+ end
542
+
543
+ def empty_token_totals
544
+ {
545
+ input: 0,
546
+ output: 0,
547
+ input_present: false,
548
+ output_present: false
549
+ }
550
+ end
551
+
552
+ def accumulate_token_totals!(totals, token_usage)
553
+ if token_usage[:input_present]
554
+ totals[:input_present] = true
555
+ totals[:input] += token_usage[:input]
556
+ end
557
+
558
+ return unless token_usage[:output_present]
559
+
560
+ totals[:output_present] = true
561
+ totals[:output] += token_usage[:output]
562
+ end
563
+
564
+ def token_value(obj, *keys)
565
+ keys.each do |candidate|
566
+ next unless obj.key?(candidate)
567
+
568
+ value, valid = coerce_token_value(obj[candidate])
569
+ return [value, true] if valid
570
+ end
571
+
572
+ [0, false]
573
+ end
574
+
575
+ def build_tokens(shutdown_tokens:, usage_tokens:, fallback_tokens:)
576
+ input, input_present = first_present_token_metric(usage_tokens, fallback_tokens, :input)
577
+ output, output_present = first_present_token_metric(usage_tokens, fallback_tokens, :output)
578
+ return token_hash(input, output, input_present, output_present) if input_present || output_present
579
+
580
+ input, input_present = first_present_token_metric(shutdown_tokens, :input)
581
+ output, output_present = first_present_token_metric(shutdown_tokens, :output)
582
+ token_hash(input, output, input_present, output_present)
583
+ end
584
+
585
+ def token_hash(input, output, input_present, output_present)
586
+ return nil unless input_present || output_present
587
+
588
+ {input: input, output: output, total: input + output}
589
+ end
590
+
591
+ def first_present_token_metric(*sources, metric)
592
+ present_key = :"#{metric}_present"
593
+
594
+ sources.each do |source|
595
+ next unless source[present_key]
596
+
597
+ return [source[metric], true]
598
+ end
599
+
600
+ [0, false]
601
+ end
602
+
603
+ def render_output_segments(segments)
604
+ rendered = +""
605
+ previous_kind = nil
606
+ previous_terminated = false
607
+
608
+ segments.each do |segment|
609
+ if previous_terminated && previous_kind == :assistant &&
610
+ segment[:kind] != :assistant &&
611
+ !rendered.empty? &&
612
+ !rendered.end_with?("\n")
613
+ rendered << "\n"
614
+ end
615
+
616
+ rendered << segment[:content]
617
+ previous_kind = segment[:kind]
618
+ previous_terminated = segment[:terminated]
619
+ end
620
+
621
+ rendered
622
+ end
623
+
624
+ def append_delta_segment!(segments, text, terminated:)
625
+ previous_segment = segments.last
626
+ if previous_segment&.[](:provisional) && previous_segment[:kind] == :assistant
627
+ previous_segment[:content] << text
628
+ previous_segment[:terminated] = terminated
629
+ return
630
+ end
631
+
632
+ segments << {
633
+ kind: :assistant,
634
+ content: +text,
635
+ terminated: terminated,
636
+ provisional: true
637
+ }
638
+ end
639
+
640
+ def replace_assistant_segments!(segments, text, terminated:)
641
+ drop_assistant_segments!(segments)
642
+ segments << {kind: :assistant, content: text, terminated: terminated}
643
+ end
644
+
645
+ def drop_assistant_segments!(segments)
646
+ segments.reject! { |segment| segment[:kind] == :assistant }
647
+ end
648
+
649
+ def with_request_probe_env(env)
650
+ stack = writable_request_probe_env_stack
651
+ stack << env
652
+ yield
653
+ ensure
654
+ stack&.pop
655
+ clear_request_probe_env_stack! if stack&.empty?
656
+ end
657
+
658
+ def current_probe_env
659
+ stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
660
+ stack = stacks && stacks[object_id]
661
+ stack&.last || {}
662
+ end
663
+
664
+ def version_probe_env_cache_key(env)
665
+ resolved_binary_path_for_env(env) ||
666
+ if env.key?("PATH")
667
+ [:path_override, cacheable_path_override(env["PATH"])]
668
+ else
669
+ self.class.binary_name
670
+ end
671
+ end
672
+
673
+ def cacheable_path_override(path)
674
+ return nil unless path.is_a?(String)
675
+
676
+ Digest::SHA256.hexdigest(path)
677
+ end
678
+
679
+ def resolved_binary_path_for_env(env)
680
+ path = if env.key?("PATH")
681
+ env["PATH"]
682
+ else
683
+ ENV["PATH"]
684
+ end
685
+ return nil unless path.is_a?(String) && !path.empty?
686
+
687
+ path.split(File::PATH_SEPARATOR).each do |entry|
688
+ full_path = File.join(entry, self.class.binary_name)
689
+ return full_path if File.executable?(full_path)
690
+ end
691
+
692
+ nil
693
+ end
694
+
695
+ def request_probe_env_from_raw_runtime(runtime)
696
+ case runtime
697
+ when nil
698
+ {}
699
+ when ProviderRuntime
700
+ runtime.env.merge(runtime.unset_env.to_h { |key| [key, nil] })
701
+ when Hash
702
+ request_probe_env_from_raw_hash(runtime)
703
+ else
704
+ {}
705
+ end
706
+ end
707
+
708
+ def request_probe_env_from_raw_hash(runtime_hash)
709
+ env = stringify_probe_env(runtime_hash[:env] || runtime_hash["env"])
710
+ unset_env = stringify_probe_unset_env(runtime_hash[:unset_env] || runtime_hash["unset_env"])
711
+ return {} unless env && unset_env
712
+
713
+ env.merge(unset_env.to_h { |key| [key, nil] })
714
+ end
715
+
716
+ def stringify_probe_env(raw_env)
717
+ return {} if raw_env.nil?
718
+ return nil unless raw_env.is_a?(Hash)
719
+
720
+ raw_env.each_with_object({}) do |(key, value), env|
721
+ return nil unless value.is_a?(String)
722
+
723
+ env[key.to_s] = value
724
+ end
725
+ end
726
+
727
+ def stringify_probe_unset_env(raw_unset_env)
728
+ return [] if raw_unset_env.nil?
729
+ return nil unless raw_unset_env.is_a?(Array)
730
+
731
+ raw_unset_env.map(&:to_s)
732
+ rescue NoMethodError
733
+ nil
734
+ end
735
+
736
+ def writable_request_probe_env_stack
737
+ stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
738
+ unless stacks
739
+ stacks = {}
740
+ Thread.current.thread_variable_set(REQUEST_PROBE_ENV_STACK_KEY, stacks)
741
+ end
742
+
743
+ stacks[object_id] ||= []
744
+ end
745
+
746
+ def clear_request_probe_env_stack!
747
+ stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
748
+ return unless stacks
749
+
750
+ stacks.delete(object_id)
751
+ Thread.current.thread_variable_set(REQUEST_PROBE_ENV_STACK_KEY, nil) if stacks.empty?
752
+ end
753
+
754
+ def copilot_cli_supports_json_output?(env: current_probe_env)
755
+ @copilot_cli_supports_json_output ||= {}
756
+ cache_key = version_probe_env_cache_key(env)
757
+ return @copilot_cli_supports_json_output[cache_key] if @copilot_cli_supports_json_output.key?(cache_key)
758
+
759
+ version = copilot_cli_version(env: env)
760
+ @copilot_cli_supports_json_output[cache_key] = !version.nil? && version >= MIN_JSON_OUTPUT_VERSION
761
+ rescue
762
+ @copilot_cli_supports_json_output[cache_key] = false
763
+ end
764
+
765
+ def copilot_cli_version(env: current_probe_env)
766
+ @copilot_cli_version ||= {}
767
+ cache_key = version_probe_env_cache_key(env)
768
+ return @copilot_cli_version[cache_key] if @copilot_cli_version.key?(cache_key)
769
+
770
+ result = @executor.execute([self.class.binary_name, "--version"], timeout: 5, env: env)
771
+ return @copilot_cli_version[cache_key] = nil unless result.exit_code.zero?
772
+
773
+ @copilot_cli_version[cache_key] = parse_copilot_cli_version(result.stdout) || parse_copilot_cli_version(result.stderr)
774
+ rescue
775
+ @copilot_cli_version[cache_key] = nil
776
+ end
777
+
778
+ def parse_copilot_cli_version(output)
779
+ match = output.to_s.match(/(\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?)/)
780
+ return nil unless match
781
+
782
+ Gem::Version.new(match[1])
783
+ rescue ArgumentError
784
+ nil
785
+ end
786
+
787
+ def coerce_token_value(value)
788
+ case value
789
+ when Integer
790
+ return [value, true] if value >= 0
791
+ when Float
792
+ return [value.to_i, true] if value.finite? && value >= 0 && value == value.to_i
793
+ when String
794
+ return [value.to_i, true] if /\A\+?\d+\z/.match?(value)
795
+ end
796
+
797
+ [0, false]
798
+ end
206
799
  end
207
800
  end
208
801
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
4
+
3
5
  module AgentHarness
4
6
  module Providers
5
7
  # Kilocode CLI provider
@@ -9,6 +11,18 @@ module AgentHarness
9
11
  PACKAGE_NAME = "@kilocode/cli"
10
12
  DEFAULT_VERSION = "7.1.3"
11
13
  SUPPORTED_VERSION_REQUIREMENT = "= #{DEFAULT_VERSION}"
14
+ STRUCTURED_EVENT_TYPES = %w[text error step_finish result usage].freeze
15
+ USAGE_EVENT_TYPES = %w[result usage].freeze
16
+ TOKEN_USAGE_KEYS = %w[
17
+ input_tokens
18
+ output_tokens
19
+ total_tokens
20
+ total
21
+ reasoning_tokens
22
+ cache_creation_input_tokens
23
+ cache_read_input_tokens
24
+ cache_write_input_tokens
25
+ ].freeze
12
26
 
13
27
  class << self
14
28
  def provider_name
@@ -118,7 +132,7 @@ module AgentHarness
118
132
  def execution_semantics
119
133
  {
120
134
  prompt_delivery: :arg,
121
- output_format: :text,
135
+ output_format: :json,
122
136
  sandbox_aware: false,
123
137
  uses_subcommand: true,
124
138
  non_interactive_flag: nil,
@@ -131,14 +145,495 @@ module AgentHarness
131
145
  protected
132
146
 
133
147
  def build_command(prompt, options)
134
- cmd = [self.class.binary_name, "run"]
148
+ cmd = [self.class.binary_name, "run", "--format", "json"]
135
149
  cmd << prompt
136
150
  cmd
137
151
  end
138
152
 
153
+ def parse_response(result, duration:)
154
+ output = result.stdout
155
+ tokens = nil
156
+ structured_errors = []
157
+ error = nil
158
+ unstructured_output = nil
159
+
160
+ if result.failed?
161
+ combined = [result.stderr, result.stdout]
162
+ .map { |s| s.to_s.strip }
163
+ .reject(&:empty?)
164
+ .join("\n")
165
+ error = combined unless combined.empty?
166
+ end
167
+
168
+ text_parts = []
169
+ accumulated_input = 0
170
+ accumulated_output = 0
171
+ accumulated_total = 0
172
+ accumulated_extra_total = 0
173
+ has_step_tokens = false
174
+ result_usage = nil
175
+ result_text = nil
176
+ saw_structured_event = false
177
+
178
+ each_json_event(output) do |event|
179
+ next unless structured_event?(event)
180
+
181
+ saw_structured_event = true
182
+ part = event["part"]
183
+
184
+ if event["type"] == "text"
185
+ text = extract_text_chunk(event, part)
186
+ text_parts << text if text.is_a?(String)
187
+ end
188
+
189
+ if event["type"] == "result"
190
+ extracted_result_text = extract_terminal_result_text(event["result"]) ||
191
+ extract_terminal_result_text(part) ||
192
+ extract_terminal_result_text(event["text"]) ||
193
+ extract_terminal_result_text(event["message"])
194
+ result_text = extracted_result_text if extracted_result_text
195
+ end
196
+
197
+ if event["type"] == "error"
198
+ structured_error = extract_error_message(event)
199
+ structured_errors << structured_error if structured_error
200
+ end
201
+
202
+ if event["type"] == "step_finish"
203
+ part_tokens = part["tokens"] if part.is_a?(Hash)
204
+ if part_tokens.is_a?(Hash)
205
+ step_total = coerce_step_total_token_count(part_tokens)
206
+ step_token_counts = build_token_counts({
207
+ "input_tokens" => part_tokens["input"],
208
+ "output_tokens" => part_tokens["output"],
209
+ "total_tokens" => step_total
210
+ })
211
+
212
+ if step_token_counts
213
+ accumulated_input += step_token_counts[:input]
214
+ accumulated_output += step_token_counts[:output]
215
+ accumulated_total += step_token_counts[:total]
216
+ accumulated_extra_total += portable_step_extra_total(part_tokens, step_token_counts[:total])
217
+ has_step_tokens = true
218
+ end
219
+ end
220
+ end
221
+
222
+ usage = event["usage"]
223
+ if USAGE_EVENT_TYPES.include?(event["type"]) && usage.is_a?(Hash) && usage_has_token_data?(usage)
224
+ result_usage = merge_usage_data(result_usage, usage)
225
+ end
226
+ end
227
+
228
+ if saw_structured_event
229
+ unstructured_output = extract_unstructured_output(result.stdout)
230
+ joined_text = text_parts.join if text_parts.any?
231
+ output = if joined_text && !joined_text.strip.empty?
232
+ joined_text
233
+ else
234
+ result_text || unstructured_output
235
+ end
236
+ if result.failed? || structured_errors.any?
237
+ error = build_structured_error(
238
+ result,
239
+ structured_errors,
240
+ unstructured_output:
241
+ )
242
+ end
243
+ end
244
+ step_tokens = nil
245
+ if has_step_tokens
246
+ step_tokens = build_token_counts({
247
+ "input_tokens" => accumulated_input,
248
+ "output_tokens" => accumulated_output,
249
+ "total_tokens" => accumulated_total
250
+ })
251
+ end
252
+ fallback_total_remainder = [accumulated_total - accumulated_input - accumulated_output - accumulated_extra_total, 0].max
253
+ if result_usage
254
+ tokens = resolve_token_counts(
255
+ result_usage,
256
+ fallback: step_tokens,
257
+ fallback_extra_total: accumulated_extra_total,
258
+ fallback_total_remainder:
259
+ )
260
+ end
261
+ tokens ||= step_tokens
262
+
263
+ Response.new(
264
+ output: output,
265
+ exit_code: result.exit_code,
266
+ duration: duration,
267
+ provider: self.class.provider_name,
268
+ model: @config.model,
269
+ tokens: tokens,
270
+ error: error,
271
+ metadata: {
272
+ legitimate_exit_codes: execution_semantics[:legitimate_exit_codes]
273
+ }
274
+ )
275
+ end
276
+
139
277
  def default_timeout
140
278
  300
141
279
  end
280
+
281
+ private
282
+
283
+ def each_json_event(output)
284
+ return if output.nil? || output.empty?
285
+
286
+ output.each_line do |line|
287
+ line = line.strip
288
+ next if line.empty?
289
+
290
+ event = JSON.parse(line)
291
+ next unless event.is_a?(Hash)
292
+
293
+ yield event
294
+ rescue JSON::ParserError
295
+ next
296
+ end
297
+ end
298
+
299
+ def build_token_counts(usage)
300
+ input = coerce_token_count(usage["input_tokens"])
301
+ output = coerce_token_count(usage["output_tokens"])
302
+ total = coerce_total_token_count(usage, input:, output:)
303
+ return nil unless input || output || total
304
+
305
+ input ||= 0
306
+ output ||= 0
307
+
308
+ {input: input, output: output, total: total}
309
+ end
310
+
311
+ def resolve_token_counts(usage, fallback: nil, fallback_extra_total: 0, fallback_total_remainder: 0)
312
+ input = coerce_token_count(usage["input_tokens"])
313
+ output = coerce_token_count(usage["output_tokens"])
314
+ explicit_total = extract_explicit_total_token_count(usage)
315
+ usage_extra_total = usage_extra_token_total(usage)
316
+
317
+ input_from_fallback = input.nil? && fallback && !fallback[:input].nil?
318
+ output_from_fallback = output.nil? && fallback && !fallback[:output].nil?
319
+ fallback_total = fallback[:total] if fallback
320
+ input = fallback[:input] if input_from_fallback
321
+ output = fallback[:output] if output_from_fallback
322
+ return nil unless input || output || explicit_total || usage_extra_total || fallback_total
323
+
324
+ input ||= 0
325
+ output ||= 0
326
+
327
+ total = if explicit_total
328
+ explicit_total
329
+ elsif usage_extra_total
330
+ resolved_total = input + output + usage_extra_total
331
+ if (input_from_fallback || output_from_fallback) && fallback_total_remainder.positive?
332
+ [resolved_total, fallback_total].compact.max
333
+ else
334
+ resolved_total
335
+ end
336
+ elsif input_from_fallback || output_from_fallback
337
+ resolved_total = input + output + fallback_extra_total
338
+ fallback_total_remainder.positive? ? [resolved_total, fallback_total].compact.max : resolved_total
339
+ else
340
+ input + output
341
+ end
342
+
343
+ {input: input, output: output, total: total}
344
+ end
345
+
346
+ def usage_has_token_data?(usage)
347
+ input = coerce_token_count(usage["input_tokens"])
348
+ output = coerce_token_count(usage["output_tokens"])
349
+ explicit_total = extract_explicit_total_token_count(usage)
350
+ usage_extra_total = usage_extra_token_total(usage)
351
+
352
+ input || output || explicit_total || usage_extra_total
353
+ end
354
+
355
+ def merge_usage_data(previous_usage, current_usage)
356
+ return current_usage if previous_usage.nil?
357
+
358
+ merged_usage = previous_usage.slice(*TOKEN_USAGE_KEYS)
359
+ if usage_updates_explicit_total?(current_usage)
360
+ merged_usage.delete("total_tokens")
361
+ merged_usage.delete("total")
362
+ end
363
+
364
+ if usage_replaces_extra_fields?(current_usage)
365
+ merged_usage.delete("reasoning_tokens")
366
+ merged_usage.delete("cache_creation_input_tokens")
367
+ merged_usage.delete("cache_read_input_tokens")
368
+ merged_usage.delete("cache_write_input_tokens")
369
+ end
370
+
371
+ merged_usage.merge!(
372
+ current_usage.slice(*TOKEN_USAGE_KEYS).select { |key, value| usable_usage_token_field?(key, value) }
373
+ )
374
+
375
+ if usage_updates_non_total_fields?(current_usage) && !usage_updates_explicit_total?(current_usage)
376
+ merged_usage.delete("total_tokens")
377
+ merged_usage.delete("total")
378
+ end
379
+
380
+ merged_usage
381
+ end
382
+
383
+ def usable_usage_token_field?(key, value)
384
+ case key
385
+ when "input_tokens", "output_tokens", "total_tokens", "total", "reasoning_tokens",
386
+ "cache_creation_input_tokens", "cache_read_input_tokens", "cache_write_input_tokens"
387
+ !coerce_token_count(value).nil?
388
+ else
389
+ false
390
+ end
391
+ end
392
+
393
+ def usage_updates_non_total_fields?(usage)
394
+ %w[
395
+ input_tokens
396
+ output_tokens
397
+ reasoning_tokens
398
+ cache_creation_input_tokens
399
+ cache_read_input_tokens
400
+ cache_write_input_tokens
401
+ ].any? { |key| usable_usage_token_field?(key, usage[key]) }
402
+ end
403
+
404
+ def usage_updates_explicit_total?(usage)
405
+ %w[total_tokens total].any? { |key| usable_usage_token_field?(key, usage[key]) }
406
+ end
407
+
408
+ def usage_replaces_extra_fields?(usage)
409
+ usable_usage_token_field?("input_tokens", usage["input_tokens"]) &&
410
+ usable_usage_token_field?("output_tokens", usage["output_tokens"])
411
+ end
412
+
413
+ def extract_error_message(event)
414
+ error_payload = event["error"]
415
+ part = event["part"]
416
+ part_error_payload = part["error"] if part.is_a?(Hash)
417
+ candidates = [
418
+ extract_result_text(event["message"]),
419
+ extract_result_text(event["text"]),
420
+ extract_result_text(error_payload),
421
+ extract_result_text(error_payload.is_a?(Hash) ? error_payload["message"] : nil),
422
+ extract_result_text(error_payload.is_a?(Hash) ? error_payload["data"] : nil),
423
+ extract_result_text(part_error_payload),
424
+ extract_result_text(part_error_payload.is_a?(Hash) ? part_error_payload["message"] : nil),
425
+ extract_result_text(part_error_payload.is_a?(Hash) ? part_error_payload["data"] : nil),
426
+ extract_result_text(part.is_a?(Hash) ? nil : part),
427
+ extract_result_text(part.is_a?(Hash) ? part["text"] : nil),
428
+ extract_result_text(part.is_a?(Hash) ? part["message"] : nil)
429
+ ]
430
+
431
+ message = candidates.find { |value| value }
432
+ return message if message
433
+
434
+ JSON.generate(event)
435
+ end
436
+
437
+ def extract_result_text(payload)
438
+ case payload
439
+ when String
440
+ return if payload.strip.empty?
441
+
442
+ payload.strip
443
+ when Hash
444
+ extract_result_text(payload["text"]) || extract_result_text(payload["message"])
445
+ end
446
+ end
447
+
448
+ def extract_terminal_result_text(payload)
449
+ if payload.is_a?(String)
450
+ return if payload.strip.empty?
451
+
452
+ return payload
453
+ end
454
+
455
+ return unless payload.is_a?(Hash)
456
+
457
+ text = extract_terminal_result_text(payload["text"])
458
+ return text if text.is_a?(String) && !text.strip.empty?
459
+
460
+ extract_terminal_result_text(payload["message"]) || text
461
+ end
462
+
463
+ def extract_text_chunk(event, part)
464
+ scalar_part_chunk = extract_text_alias_chunk(part.is_a?(String) ? part : nil)
465
+ return scalar_part_chunk if scalar_part_chunk.is_a?(String) && !scalar_part_chunk.strip.empty?
466
+
467
+ part_text_chunk = extract_text_alias_chunk(part.is_a?(Hash) ? part["text"] : nil)
468
+ return part_text_chunk if part_text_chunk.is_a?(String) && !part_text_chunk.strip.empty?
469
+
470
+ part_message_chunk = extract_text_alias_chunk(part.is_a?(Hash) ? part["message"] : nil)
471
+ return part_message_chunk if part_message_chunk.is_a?(String) && !part_message_chunk.strip.empty?
472
+
473
+ text_chunk = extract_text_alias_chunk(event["text"])
474
+ return text_chunk if text_chunk.is_a?(String) && !text_chunk.strip.empty?
475
+
476
+ message_chunk = extract_text_alias_chunk(event["message"])
477
+ return message_chunk if message_chunk.is_a?(String) && !message_chunk.strip.empty?
478
+
479
+ scalar_part_chunk || part_text_chunk || part_message_chunk || text_chunk || message_chunk
480
+ end
481
+
482
+ def extract_text_alias_chunk(payload)
483
+ if payload.is_a?(String)
484
+ return if payload.empty?
485
+
486
+ return payload
487
+ end
488
+
489
+ return unless payload.is_a?(Hash)
490
+
491
+ text_chunk = extract_text_alias_chunk(payload["text"])
492
+ return text_chunk if text_chunk.is_a?(String) && !text_chunk.strip.empty?
493
+
494
+ extract_text_alias_chunk(payload["message"]) || text_chunk
495
+ end
496
+
497
+ def build_structured_error(result, structured_errors, unstructured_output:)
498
+ stderr = result.stderr.to_s.strip
499
+ error_lines = [stderr, *structured_errors, unstructured_output].compact.reject(&:empty?).uniq
500
+ return error_lines.join("\n") if error_lines.any?
501
+
502
+ return "Kilocode exited with code #{result.exit_code}" if result.failed?
503
+
504
+ nil
505
+ end
506
+
507
+ def extract_unstructured_output(output)
508
+ return if output.nil? || output.empty?
509
+
510
+ lines = output.each_line.filter_map do |line|
511
+ stripped_line = line.strip
512
+ next if stripped_line.empty?
513
+
514
+ parsed_line = JSON.parse(stripped_line)
515
+ next if parsed_structured_event?(parsed_line)
516
+ next if parsed_json_scalar?(parsed_line)
517
+
518
+ line.chomp
519
+ rescue JSON::ParserError
520
+ line.chomp
521
+ end
522
+
523
+ lines.empty? ? nil : lines.join("\n")
524
+ end
525
+
526
+ def structured_event?(event)
527
+ STRUCTURED_EVENT_TYPES.include?(event["type"])
528
+ end
529
+
530
+ def parsed_structured_event?(parsed_line)
531
+ parsed_line.is_a?(Hash) && structured_event?(parsed_line)
532
+ end
533
+
534
+ def parsed_json_scalar?(parsed_line)
535
+ !parsed_line.is_a?(Hash) && !parsed_line.is_a?(Array)
536
+ end
537
+
538
+ def coerce_token_count(value)
539
+ if value.is_a?(Integer)
540
+ return value if value >= 0
541
+
542
+ return nil
543
+ end
544
+
545
+ if value.is_a?(Float) && value.finite?
546
+ return nil unless value == value.to_i
547
+
548
+ coerced = value.to_i
549
+ return coerced if coerced >= 0
550
+
551
+ return nil
552
+ end
553
+
554
+ return if value.nil?
555
+
556
+ if value.is_a?(String)
557
+ return nil unless value.match?(/\A\d+\z/)
558
+
559
+ return value.to_i
560
+ end
561
+
562
+ nil
563
+ end
564
+
565
+ def coerce_total_token_count(usage, input:, output:)
566
+ explicit_total = extract_explicit_total_token_count(usage)
567
+ return explicit_total if explicit_total
568
+ return nil if input.nil? && output.nil?
569
+
570
+ (input || 0) + (output || 0)
571
+ end
572
+
573
+ def coerce_step_total_token_count(tokens)
574
+ explicit_total = extract_explicit_total_token_count(tokens)
575
+ return explicit_total if explicit_total
576
+
577
+ counts = [
578
+ coerce_token_count(tokens["input"]),
579
+ coerce_token_count(tokens["output"]),
580
+ coerce_token_count(tokens["reasoning"])
581
+ ]
582
+
583
+ cache = tokens["cache"]
584
+ if cache.is_a?(Hash)
585
+ counts << coerce_token_count(cache["read"])
586
+ counts << coerce_token_count(cache["write"])
587
+ end
588
+
589
+ counts.compact!
590
+ return nil if counts.empty?
591
+
592
+ counts.sum
593
+ end
594
+
595
+ def portable_step_extra_total(tokens, total)
596
+ return 0 unless step_component_tokens_present?(tokens)
597
+
598
+ input = coerce_token_count(tokens["input"]) || 0
599
+ output = coerce_token_count(tokens["output"]) || 0
600
+
601
+ [total - input - output, 0].max
602
+ end
603
+
604
+ def step_component_tokens_present?(tokens)
605
+ counts = [
606
+ coerce_token_count(tokens["input"]),
607
+ coerce_token_count(tokens["output"]),
608
+ coerce_token_count(tokens["reasoning"])
609
+ ]
610
+
611
+ cache = tokens["cache"]
612
+ if cache.is_a?(Hash)
613
+ counts << coerce_token_count(cache["read"])
614
+ counts << coerce_token_count(cache["write"])
615
+ end
616
+
617
+ counts.any?
618
+ end
619
+
620
+ def extract_explicit_total_token_count(usage)
621
+ coerce_token_count(usage["total_tokens"]) || coerce_token_count(usage["total"])
622
+ end
623
+
624
+ def usage_extra_token_total(usage)
625
+ counts = [
626
+ coerce_token_count(usage["reasoning_tokens"]),
627
+ coerce_token_count(usage["cache_creation_input_tokens"]),
628
+ coerce_token_count(usage["cache_read_input_tokens"]),
629
+ coerce_token_count(usage["cache_write_input_tokens"])
630
+ ]
631
+
632
+ counts.compact!
633
+ return nil if counts.empty?
634
+
635
+ counts.sum
636
+ end
142
637
  end
143
638
  end
144
639
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AgentHarness
4
- VERSION = "0.6.0"
4
+ VERSION = "0.7.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agent-harness
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bart Agapinan