agent-harness 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 972e7e3144da1a59c0a25dbf4668766d07693870dde63c74dece4842c960dfe5
|
|
4
|
+
data.tar.gz: 3988ed1d19d61ce9144224302c0246c2c32442abdfa1d1c9bc4616abcb4e11c3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 97d598d30445ef7617c172b692d43f3c8d8c896b9c12f28f7f0e56f822128e298312129bdae67adbbfc81eb407686587db677787bda70a2292c3a1f07aea9a60
|
|
7
|
+
data.tar.gz: 863739d9ace22d47b37799e44b36c19cacf221a4042947a627525f542838ccaafb933e03c3e0c10141c0d30971283046791b44d4ca8cc7c84c4a0e50184010a8
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.14.1](https://github.com/viamin/agent-harness/compare/agent-harness/v0.14.0...agent-harness/v0.14.1) (2026-05-03)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Bug Fixes
|
|
7
|
+
|
|
8
|
+
* **kilocode:** test_command_overrides never wired into smoke test — kilo hangs without --auto ([#191](https://github.com/viamin/agent-harness/issues/191)) ([7c01d49](https://github.com/viamin/agent-harness/commit/7c01d49713cbedb6fb93758be95b7d92aa4599d3))
|
|
9
|
+
|
|
3
10
|
## [0.14.0](https://github.com/viamin/agent-harness/compare/agent-harness/v0.13.1...agent-harness/v0.14.0) (2026-05-03)
|
|
4
11
|
|
|
5
12
|
|
|
@@ -765,6 +765,11 @@ module AgentHarness
|
|
|
765
765
|
# For providers that delegate to Providers::Base#send_message, a plain Hash
|
|
766
766
|
# is automatically coerced into a ProviderRuntime. Providers that override
|
|
767
767
|
# #send_message directly are responsible for handling this option.
|
|
768
|
+
# @option options [Boolean] :smoke_test when +true+, signals that this
|
|
769
|
+
# invocation is a lightweight connectivity/health check issued by
|
|
770
|
+
# {#smoke_test}. Providers may use this flag to adjust command-line
|
|
771
|
+
# arguments (e.g. Kilocode appends +--auto --print-logs+) or skip
|
|
772
|
+
# interactive features that would cause the process to hang.
|
|
768
773
|
# @return [Response] response object with output and metadata
|
|
769
774
|
def send_message(prompt:, **options)
|
|
770
775
|
raise NotImplementedError, "#{self.class} must implement #send_message"
|
|
@@ -1061,7 +1066,8 @@ module AgentHarness
|
|
|
1061
1066
|
response = send_message(
|
|
1062
1067
|
prompt: prompt,
|
|
1063
1068
|
timeout: timeout || contract[:timeout],
|
|
1064
|
-
provider_runtime: provider_runtime
|
|
1069
|
+
provider_runtime: provider_runtime,
|
|
1070
|
+
smoke_test: true
|
|
1065
1071
|
)
|
|
1066
1072
|
|
|
1067
1073
|
output = response.output.to_s.strip
|
|
@@ -42,6 +42,62 @@ module AgentHarness
|
|
|
42
42
|
/failed to refresh token\b.*service(?:\s+(?:is|was))?\s+(?:temporarily\s+)?unavailable/im
|
|
43
43
|
].freeze
|
|
44
44
|
|
|
45
|
+
SHARED_OUTPUT_ERROR_PATTERNS = {
|
|
46
|
+
quota_exceeded: [
|
|
47
|
+
/free tier limit reached/i,
|
|
48
|
+
/please upgrade to a paid plan/i,
|
|
49
|
+
/quota.*exceeded/i,
|
|
50
|
+
/insufficient.*quota/i,
|
|
51
|
+
/billing/i
|
|
52
|
+
],
|
|
53
|
+
rate_limited: [
|
|
54
|
+
/rate.?limit/i,
|
|
55
|
+
/too.?many.?requests/i,
|
|
56
|
+
/\b429\b/
|
|
57
|
+
],
|
|
58
|
+
auth_expired: [
|
|
59
|
+
/authentication_error/i,
|
|
60
|
+
/invalid_grant/i,
|
|
61
|
+
/Token is expired or invalid/i,
|
|
62
|
+
/unauthorized/i
|
|
63
|
+
],
|
|
64
|
+
sandbox_failure: [
|
|
65
|
+
/bwrap.*no permissions/i,
|
|
66
|
+
/no permissions to create a new namespace/i,
|
|
67
|
+
/unprivileged.*namespace/i
|
|
68
|
+
],
|
|
69
|
+
transient_error: [
|
|
70
|
+
/timeout/i,
|
|
71
|
+
/connection.*error/i,
|
|
72
|
+
/service.*unavailable/i,
|
|
73
|
+
/\b503\b/,
|
|
74
|
+
/\b502\b/,
|
|
75
|
+
/connection.*reset/i
|
|
76
|
+
]
|
|
77
|
+
}.tap { |h| h.each_value(&:freeze) }.freeze
|
|
78
|
+
|
|
79
|
+
STDOUT_ERROR_PATTERNS = SHARED_OUTPUT_ERROR_PATTERNS.merge(
|
|
80
|
+
auth_expired: [
|
|
81
|
+
/authentication_error/i,
|
|
82
|
+
/invalid_grant/i,
|
|
83
|
+
/Token is expired or invalid/i,
|
|
84
|
+
/unauthorized/i
|
|
85
|
+
]
|
|
86
|
+
).tap { |h| h.each_value(&:freeze) }.freeze
|
|
87
|
+
|
|
88
|
+
STDERR_ERROR_PATTERNS = SHARED_OUTPUT_ERROR_PATTERNS.merge(
|
|
89
|
+
auth_expired: OAUTH_REFRESH_FAILURE_PATTERNS + [
|
|
90
|
+
/invalid.*api.*key/i,
|
|
91
|
+
/unauthorized/i,
|
|
92
|
+
/authentication_error/i,
|
|
93
|
+
/invalid_grant/i,
|
|
94
|
+
/Token is expired or invalid/i,
|
|
95
|
+
/\b401\b/,
|
|
96
|
+
/incorrect.*api.*key/i
|
|
97
|
+
],
|
|
98
|
+
transient_error: OAUTH_REFRESH_TRANSIENT_PATTERNS + SHARED_OUTPUT_ERROR_PATTERNS[:transient_error]
|
|
99
|
+
).tap { |h| h.each_value(&:freeze) }.freeze
|
|
100
|
+
|
|
45
101
|
class << self
|
|
46
102
|
def provider_name
|
|
47
103
|
:codex
|
|
@@ -51,6 +107,34 @@ module AgentHarness
|
|
|
51
107
|
"codex"
|
|
52
108
|
end
|
|
53
109
|
|
|
110
|
+
# Classify a chunk of output text from the provider CLI in real-time
|
|
111
|
+
#
|
|
112
|
+
# Can be called during streaming to classify both stdout and stderr
|
|
113
|
+
# chunks as they arrive. For stdout, attempts to parse JSONL events
|
|
114
|
+
# and extract error information from structured output.
|
|
115
|
+
#
|
|
116
|
+
# Because CommandExecutor reads arbitrary 4096-byte chunks, a single
|
|
117
|
+
# JSONL event may be split across consecutive calls. Pass a String
|
|
118
|
+
# buffer via +stdout_buffer+ that persists across calls so incomplete
|
|
119
|
+
# trailing lines are re-assembled before parsing.
|
|
120
|
+
#
|
|
121
|
+
# @param text [String] the output chunk to classify
|
|
122
|
+
# @param stream [:stdout, :stderr] which stream the text came from
|
|
123
|
+
# @param stdout_buffer [String, nil] mutable String accumulator for
|
|
124
|
+
# incomplete stdout lines across calls (ignored for stderr)
|
|
125
|
+
# @return [nil, Hash] nil if no error detected, or a Hash with
|
|
126
|
+
# :reason (Symbol)
|
|
127
|
+
def classify_output_chunk(text, stream:, stdout_buffer: nil)
|
|
128
|
+
return nil if text.nil? || text.strip.empty?
|
|
129
|
+
|
|
130
|
+
case normalize_output_stream(stream)
|
|
131
|
+
when :stdout
|
|
132
|
+
classify_stdout_chunk(text, stdout_buffer)
|
|
133
|
+
when :stderr
|
|
134
|
+
classify_stderr_chunk(text)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
54
138
|
def available?
|
|
55
139
|
executor = AgentHarness.configuration.command_executor
|
|
56
140
|
!!executor.which(binary_name)
|
|
@@ -168,10 +252,129 @@ module AgentHarness
|
|
|
168
252
|
|
|
169
253
|
private
|
|
170
254
|
|
|
255
|
+
def classify_stdout_chunk(text, buffer)
|
|
256
|
+
# Prepend any leftover data from a previous partial chunk.
|
|
257
|
+
data = buffer ? (buffer.slice!(0..-1) + text) : text
|
|
258
|
+
|
|
259
|
+
lines = data.split("\n", -1)
|
|
260
|
+
|
|
261
|
+
# If the chunk does not end with a newline the last element is an
|
|
262
|
+
# incomplete line — stash it in the buffer for the next call.
|
|
263
|
+
if buffer && !data.end_with?("\n")
|
|
264
|
+
buffer.replace(lines.pop.to_s)
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
lines.each do |line|
|
|
268
|
+
stripped = line.strip
|
|
269
|
+
next if stripped.empty?
|
|
270
|
+
|
|
271
|
+
event = parse_stdout_jsonl_event(stripped)
|
|
272
|
+
next unless event
|
|
273
|
+
|
|
274
|
+
result = classify_jsonl_event(event)
|
|
275
|
+
return result if result
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
nil
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def classify_stderr_chunk(text)
|
|
282
|
+
match_patterns(text, STDERR_ERROR_PATTERNS)
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def normalize_output_stream(stream)
|
|
286
|
+
normalized_stream = case stream
|
|
287
|
+
when Symbol
|
|
288
|
+
stream
|
|
289
|
+
when String
|
|
290
|
+
stream.strip.to_sym
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
return normalized_stream if %i[stdout stderr].include?(normalized_stream)
|
|
294
|
+
|
|
295
|
+
raise ArgumentError, "Unknown stream: #{stream.inspect}"
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def parse_stdout_jsonl_event(text)
|
|
299
|
+
escaped_newline_trimmed = text.sub(/(?:\\r)?\\n\z/, "")
|
|
300
|
+
candidates = if escaped_newline_trimmed == text
|
|
301
|
+
[text]
|
|
302
|
+
else
|
|
303
|
+
[text, escaped_newline_trimmed]
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
candidates.each do |candidate|
|
|
307
|
+
return JSON.parse(candidate)
|
|
308
|
+
rescue JSON::ParserError
|
|
309
|
+
next
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Non-JSON stdout line — skip, only classify explicit error events
|
|
313
|
+
nil
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def classify_jsonl_event(event)
|
|
317
|
+
return nil unless event.is_a?(Hash)
|
|
318
|
+
|
|
319
|
+
payload = unwrap_classification_event(event)
|
|
320
|
+
event = payload if payload.is_a?(Hash)
|
|
321
|
+
|
|
322
|
+
# Only classify events with explicit error payloads — not normal
|
|
323
|
+
# assistant messages whose text happens to contain error-ish words.
|
|
324
|
+
error_text = extract_jsonl_error_text(event)
|
|
325
|
+
return nil unless error_text
|
|
326
|
+
|
|
327
|
+
match_patterns(error_text, STDOUT_ERROR_PATTERNS)
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def extract_jsonl_error_text(event)
|
|
331
|
+
# Direct error field (top-level "error" key)
|
|
332
|
+
error = event["error"]
|
|
333
|
+
return error if error.is_a?(String) && !error.empty?
|
|
334
|
+
|
|
335
|
+
if error.is_a?(Hash)
|
|
336
|
+
msg = error["message"]
|
|
337
|
+
return msg if msg.is_a?(String) && !msg.empty?
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
return nil unless explicit_jsonl_error_event?(event["type"])
|
|
341
|
+
|
|
342
|
+
# "message" appears on both error events and normal assistant output.
|
|
343
|
+
# Restricting message-based extraction to explicit error event types
|
|
344
|
+
# avoids false positives from user-facing assistant content.
|
|
345
|
+
message = event["message"]
|
|
346
|
+
return message if message.is_a?(String) && !message.empty?
|
|
347
|
+
|
|
348
|
+
nil
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def match_patterns(text, pattern_groups)
|
|
352
|
+
pattern_groups.each do |category, patterns|
|
|
353
|
+
if patterns.any? { |p| text.match?(p) }
|
|
354
|
+
return {reason: category}
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
nil
|
|
359
|
+
end
|
|
360
|
+
|
|
171
361
|
def parser_instance
|
|
172
362
|
@parser_instance ||= allocate.freeze
|
|
173
363
|
end
|
|
174
364
|
|
|
365
|
+
def unwrap_classification_event(event)
|
|
366
|
+
case event["type"]
|
|
367
|
+
when "event_msg", "response_item"
|
|
368
|
+
event["payload"]
|
|
369
|
+
else
|
|
370
|
+
event
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def explicit_jsonl_error_event?(event_type)
|
|
375
|
+
%w[error turn.failed].include?(event_type)
|
|
376
|
+
end
|
|
377
|
+
|
|
175
378
|
def tail_nonempty_lines(text, limit:)
|
|
176
379
|
return [] if limit <= 0
|
|
177
380
|
|
|
@@ -317,7 +520,10 @@ module AgentHarness
|
|
|
317
520
|
],
|
|
318
521
|
abort: [
|
|
319
522
|
/free tier limit reached/i,
|
|
320
|
-
/please upgrade to a paid plan/i
|
|
523
|
+
/please upgrade to a paid plan/i,
|
|
524
|
+
/bwrap.*no permissions/i,
|
|
525
|
+
/no permissions to create a new namespace/i,
|
|
526
|
+
/unprivileged.*namespace/i
|
|
321
527
|
]
|
|
322
528
|
)
|
|
323
529
|
end
|
|
@@ -1017,7 +1223,11 @@ module AgentHarness
|
|
|
1017
1223
|
total: total_tokens
|
|
1018
1224
|
} : nil
|
|
1019
1225
|
}
|
|
1020
|
-
rescue
|
|
1226
|
+
rescue JSON::ParserError => e
|
|
1227
|
+
AgentHarness.logger&.warn("[AgentHarness::Codex] JSONL parse error: #{e.message}")
|
|
1228
|
+
nil
|
|
1229
|
+
rescue => e
|
|
1230
|
+
AgentHarness.logger&.warn("[AgentHarness::Codex] Unexpected error parsing JSONL output: #{e.class}: #{e.message}")
|
|
1021
1231
|
nil
|
|
1022
1232
|
end
|
|
1023
1233
|
|