agent-harness 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
4
+
3
5
  module AgentHarness
4
6
  module Providers
5
7
  # Kilocode CLI provider
@@ -9,6 +11,18 @@ module AgentHarness
9
11
  PACKAGE_NAME = "@kilocode/cli"
10
12
  DEFAULT_VERSION = "7.1.3"
11
13
  SUPPORTED_VERSION_REQUIREMENT = "= #{DEFAULT_VERSION}"
14
+ STRUCTURED_EVENT_TYPES = %w[text error step_finish result usage].freeze
15
+ USAGE_EVENT_TYPES = %w[result usage].freeze
16
+ TOKEN_USAGE_KEYS = %w[
17
+ input_tokens
18
+ output_tokens
19
+ total_tokens
20
+ total
21
+ reasoning_tokens
22
+ cache_creation_input_tokens
23
+ cache_read_input_tokens
24
+ cache_write_input_tokens
25
+ ].freeze
12
26
 
13
27
  class << self
14
28
  def provider_name
@@ -118,7 +132,7 @@ module AgentHarness
118
132
  def execution_semantics
119
133
  {
120
134
  prompt_delivery: :arg,
121
- output_format: :text,
135
+ output_format: :json,
122
136
  sandbox_aware: false,
123
137
  uses_subcommand: true,
124
138
  non_interactive_flag: nil,
@@ -131,14 +145,495 @@ module AgentHarness
131
145
  protected
132
146
 
133
147
  def build_command(prompt, options)
134
- cmd = [self.class.binary_name, "run"]
148
+ cmd = [self.class.binary_name, "run", "--format", "json"]
135
149
  cmd << prompt
136
150
  cmd
137
151
  end
138
152
 
153
+ def parse_response(result, duration:)
154
+ output = result.stdout
155
+ tokens = nil
156
+ structured_errors = []
157
+ error = nil
158
+ unstructured_output = nil
159
+
160
+ if result.failed?
161
+ combined = [result.stderr, result.stdout]
162
+ .map { |s| s.to_s.strip }
163
+ .reject(&:empty?)
164
+ .join("\n")
165
+ error = combined unless combined.empty?
166
+ end
167
+
168
+ text_parts = []
169
+ accumulated_input = 0
170
+ accumulated_output = 0
171
+ accumulated_total = 0
172
+ accumulated_extra_total = 0
173
+ has_step_tokens = false
174
+ result_usage = nil
175
+ result_text = nil
176
+ saw_structured_event = false
177
+
178
+ each_json_event(output) do |event|
179
+ next unless structured_event?(event)
180
+
181
+ saw_structured_event = true
182
+ part = event["part"]
183
+
184
+ if event["type"] == "text"
185
+ text = extract_text_chunk(event, part)
186
+ text_parts << text if text.is_a?(String)
187
+ end
188
+
189
+ if event["type"] == "result"
190
+ extracted_result_text = extract_terminal_result_text(event["result"]) ||
191
+ extract_terminal_result_text(part) ||
192
+ extract_terminal_result_text(event["text"]) ||
193
+ extract_terminal_result_text(event["message"])
194
+ result_text = extracted_result_text if extracted_result_text
195
+ end
196
+
197
+ if event["type"] == "error"
198
+ structured_error = extract_error_message(event)
199
+ structured_errors << structured_error if structured_error
200
+ end
201
+
202
+ if event["type"] == "step_finish"
203
+ part_tokens = part["tokens"] if part.is_a?(Hash)
204
+ if part_tokens.is_a?(Hash)
205
+ step_total = coerce_step_total_token_count(part_tokens)
206
+ step_token_counts = build_token_counts({
207
+ "input_tokens" => part_tokens["input"],
208
+ "output_tokens" => part_tokens["output"],
209
+ "total_tokens" => step_total
210
+ })
211
+
212
+ if step_token_counts
213
+ accumulated_input += step_token_counts[:input]
214
+ accumulated_output += step_token_counts[:output]
215
+ accumulated_total += step_token_counts[:total]
216
+ accumulated_extra_total += portable_step_extra_total(part_tokens, step_token_counts[:total])
217
+ has_step_tokens = true
218
+ end
219
+ end
220
+ end
221
+
222
+ usage = event["usage"]
223
+ if USAGE_EVENT_TYPES.include?(event["type"]) && usage.is_a?(Hash) && usage_has_token_data?(usage)
224
+ result_usage = merge_usage_data(result_usage, usage)
225
+ end
226
+ end
227
+
228
+ if saw_structured_event
229
+ unstructured_output = extract_unstructured_output(result.stdout)
230
+ joined_text = text_parts.join if text_parts.any?
231
+ output = if joined_text && !joined_text.strip.empty?
232
+ joined_text
233
+ else
234
+ result_text || unstructured_output
235
+ end
236
+ if result.failed? || structured_errors.any?
237
+ error = build_structured_error(
238
+ result,
239
+ structured_errors,
240
+ unstructured_output:
241
+ )
242
+ end
243
+ end
244
+ step_tokens = nil
245
+ if has_step_tokens
246
+ step_tokens = build_token_counts({
247
+ "input_tokens" => accumulated_input,
248
+ "output_tokens" => accumulated_output,
249
+ "total_tokens" => accumulated_total
250
+ })
251
+ end
252
+ fallback_total_remainder = [accumulated_total - accumulated_input - accumulated_output - accumulated_extra_total, 0].max
253
+ if result_usage
254
+ tokens = resolve_token_counts(
255
+ result_usage,
256
+ fallback: step_tokens,
257
+ fallback_extra_total: accumulated_extra_total,
258
+ fallback_total_remainder:
259
+ )
260
+ end
261
+ tokens ||= step_tokens
262
+
263
+ Response.new(
264
+ output: output,
265
+ exit_code: result.exit_code,
266
+ duration: duration,
267
+ provider: self.class.provider_name,
268
+ model: @config.model,
269
+ tokens: tokens,
270
+ error: error,
271
+ metadata: {
272
+ legitimate_exit_codes: execution_semantics[:legitimate_exit_codes]
273
+ }
274
+ )
275
+ end
276
+
139
277
  def default_timeout
140
278
  300
141
279
  end
280
+
281
+ private
282
+
283
+ def each_json_event(output)
284
+ return if output.nil? || output.empty?
285
+
286
+ output.each_line do |line|
287
+ line = line.strip
288
+ next if line.empty?
289
+
290
+ event = JSON.parse(line)
291
+ next unless event.is_a?(Hash)
292
+
293
+ yield event
294
+ rescue JSON::ParserError
295
+ next
296
+ end
297
+ end
298
+
299
+ def build_token_counts(usage)
300
+ input = coerce_token_count(usage["input_tokens"])
301
+ output = coerce_token_count(usage["output_tokens"])
302
+ total = coerce_total_token_count(usage, input:, output:)
303
+ return nil unless input || output || total
304
+
305
+ input ||= 0
306
+ output ||= 0
307
+
308
+ {input: input, output: output, total: total}
309
+ end
310
+
311
+ def resolve_token_counts(usage, fallback: nil, fallback_extra_total: 0, fallback_total_remainder: 0)
312
+ input = coerce_token_count(usage["input_tokens"])
313
+ output = coerce_token_count(usage["output_tokens"])
314
+ explicit_total = extract_explicit_total_token_count(usage)
315
+ usage_extra_total = usage_extra_token_total(usage)
316
+
317
+ input_from_fallback = input.nil? && fallback && !fallback[:input].nil?
318
+ output_from_fallback = output.nil? && fallback && !fallback[:output].nil?
319
+ fallback_total = fallback[:total] if fallback
320
+ input = fallback[:input] if input_from_fallback
321
+ output = fallback[:output] if output_from_fallback
322
+ return nil unless input || output || explicit_total || usage_extra_total || fallback_total
323
+
324
+ input ||= 0
325
+ output ||= 0
326
+
327
+ total = if explicit_total
328
+ explicit_total
329
+ elsif usage_extra_total
330
+ resolved_total = input + output + usage_extra_total
331
+ if (input_from_fallback || output_from_fallback) && fallback_total_remainder.positive?
332
+ [resolved_total, fallback_total].compact.max
333
+ else
334
+ resolved_total
335
+ end
336
+ elsif input_from_fallback || output_from_fallback
337
+ resolved_total = input + output + fallback_extra_total
338
+ fallback_total_remainder.positive? ? [resolved_total, fallback_total].compact.max : resolved_total
339
+ else
340
+ input + output
341
+ end
342
+
343
+ {input: input, output: output, total: total}
344
+ end
345
+
346
+ def usage_has_token_data?(usage)
347
+ input = coerce_token_count(usage["input_tokens"])
348
+ output = coerce_token_count(usage["output_tokens"])
349
+ explicit_total = extract_explicit_total_token_count(usage)
350
+ usage_extra_total = usage_extra_token_total(usage)
351
+
352
+ input || output || explicit_total || usage_extra_total
353
+ end
354
+
355
+ def merge_usage_data(previous_usage, current_usage)
356
+ return current_usage if previous_usage.nil?
357
+
358
+ merged_usage = previous_usage.slice(*TOKEN_USAGE_KEYS)
359
+ if usage_updates_explicit_total?(current_usage)
360
+ merged_usage.delete("total_tokens")
361
+ merged_usage.delete("total")
362
+ end
363
+
364
+ if usage_replaces_extra_fields?(current_usage)
365
+ merged_usage.delete("reasoning_tokens")
366
+ merged_usage.delete("cache_creation_input_tokens")
367
+ merged_usage.delete("cache_read_input_tokens")
368
+ merged_usage.delete("cache_write_input_tokens")
369
+ end
370
+
371
+ merged_usage.merge!(
372
+ current_usage.slice(*TOKEN_USAGE_KEYS).select { |key, value| usable_usage_token_field?(key, value) }
373
+ )
374
+
375
+ if usage_updates_non_total_fields?(current_usage) && !usage_updates_explicit_total?(current_usage)
376
+ merged_usage.delete("total_tokens")
377
+ merged_usage.delete("total")
378
+ end
379
+
380
+ merged_usage
381
+ end
382
+
383
+ def usable_usage_token_field?(key, value)
384
+ case key
385
+ when "input_tokens", "output_tokens", "total_tokens", "total", "reasoning_tokens",
386
+ "cache_creation_input_tokens", "cache_read_input_tokens", "cache_write_input_tokens"
387
+ !coerce_token_count(value).nil?
388
+ else
389
+ false
390
+ end
391
+ end
392
+
393
+ def usage_updates_non_total_fields?(usage)
394
+ %w[
395
+ input_tokens
396
+ output_tokens
397
+ reasoning_tokens
398
+ cache_creation_input_tokens
399
+ cache_read_input_tokens
400
+ cache_write_input_tokens
401
+ ].any? { |key| usable_usage_token_field?(key, usage[key]) }
402
+ end
403
+
404
+ def usage_updates_explicit_total?(usage)
405
+ %w[total_tokens total].any? { |key| usable_usage_token_field?(key, usage[key]) }
406
+ end
407
+
408
+ def usage_replaces_extra_fields?(usage)
409
+ usable_usage_token_field?("input_tokens", usage["input_tokens"]) &&
410
+ usable_usage_token_field?("output_tokens", usage["output_tokens"])
411
+ end
412
+
413
+ def extract_error_message(event)
414
+ error_payload = event["error"]
415
+ part = event["part"]
416
+ part_error_payload = part["error"] if part.is_a?(Hash)
417
+ candidates = [
418
+ extract_result_text(event["message"]),
419
+ extract_result_text(event["text"]),
420
+ extract_result_text(error_payload),
421
+ extract_result_text(error_payload.is_a?(Hash) ? error_payload["message"] : nil),
422
+ extract_result_text(error_payload.is_a?(Hash) ? error_payload["data"] : nil),
423
+ extract_result_text(part_error_payload),
424
+ extract_result_text(part_error_payload.is_a?(Hash) ? part_error_payload["message"] : nil),
425
+ extract_result_text(part_error_payload.is_a?(Hash) ? part_error_payload["data"] : nil),
426
+ extract_result_text(part.is_a?(Hash) ? nil : part),
427
+ extract_result_text(part.is_a?(Hash) ? part["text"] : nil),
428
+ extract_result_text(part.is_a?(Hash) ? part["message"] : nil)
429
+ ]
430
+
431
+ message = candidates.find { |value| value }
432
+ return message if message
433
+
434
+ JSON.generate(event)
435
+ end
436
+
437
+ def extract_result_text(payload)
438
+ case payload
439
+ when String
440
+ return if payload.strip.empty?
441
+
442
+ payload.strip
443
+ when Hash
444
+ extract_result_text(payload["text"]) || extract_result_text(payload["message"])
445
+ end
446
+ end
447
+
448
+ def extract_terminal_result_text(payload)
449
+ if payload.is_a?(String)
450
+ return if payload.strip.empty?
451
+
452
+ return payload
453
+ end
454
+
455
+ return unless payload.is_a?(Hash)
456
+
457
+ text = extract_terminal_result_text(payload["text"])
458
+ return text if text.is_a?(String) && !text.strip.empty?
459
+
460
+ extract_terminal_result_text(payload["message"]) || text
461
+ end
462
+
463
+ def extract_text_chunk(event, part)
464
+ scalar_part_chunk = extract_text_alias_chunk(part.is_a?(String) ? part : nil)
465
+ return scalar_part_chunk if scalar_part_chunk.is_a?(String) && !scalar_part_chunk.strip.empty?
466
+
467
+ part_text_chunk = extract_text_alias_chunk(part.is_a?(Hash) ? part["text"] : nil)
468
+ return part_text_chunk if part_text_chunk.is_a?(String) && !part_text_chunk.strip.empty?
469
+
470
+ part_message_chunk = extract_text_alias_chunk(part.is_a?(Hash) ? part["message"] : nil)
471
+ return part_message_chunk if part_message_chunk.is_a?(String) && !part_message_chunk.strip.empty?
472
+
473
+ text_chunk = extract_text_alias_chunk(event["text"])
474
+ return text_chunk if text_chunk.is_a?(String) && !text_chunk.strip.empty?
475
+
476
+ message_chunk = extract_text_alias_chunk(event["message"])
477
+ return message_chunk if message_chunk.is_a?(String) && !message_chunk.strip.empty?
478
+
479
+ scalar_part_chunk || part_text_chunk || part_message_chunk || text_chunk || message_chunk
480
+ end
481
+
482
+ def extract_text_alias_chunk(payload)
483
+ if payload.is_a?(String)
484
+ return if payload.empty?
485
+
486
+ return payload
487
+ end
488
+
489
+ return unless payload.is_a?(Hash)
490
+
491
+ text_chunk = extract_text_alias_chunk(payload["text"])
492
+ return text_chunk if text_chunk.is_a?(String) && !text_chunk.strip.empty?
493
+
494
+ extract_text_alias_chunk(payload["message"]) || text_chunk
495
+ end
496
+
497
+ def build_structured_error(result, structured_errors, unstructured_output:)
498
+ stderr = result.stderr.to_s.strip
499
+ error_lines = [stderr, *structured_errors, unstructured_output].compact.reject(&:empty?).uniq
500
+ return error_lines.join("\n") if error_lines.any?
501
+
502
+ return "Kilocode exited with code #{result.exit_code}" if result.failed?
503
+
504
+ nil
505
+ end
506
+
507
+ def extract_unstructured_output(output)
508
+ return if output.nil? || output.empty?
509
+
510
+ lines = output.each_line.filter_map do |line|
511
+ stripped_line = line.strip
512
+ next if stripped_line.empty?
513
+
514
+ parsed_line = JSON.parse(stripped_line)
515
+ next if parsed_structured_event?(parsed_line)
516
+ next if parsed_json_scalar?(parsed_line)
517
+
518
+ line.chomp
519
+ rescue JSON::ParserError
520
+ line.chomp
521
+ end
522
+
523
+ lines.empty? ? nil : lines.join("\n")
524
+ end
525
+
526
+ def structured_event?(event)
527
+ STRUCTURED_EVENT_TYPES.include?(event["type"])
528
+ end
529
+
530
+ def parsed_structured_event?(parsed_line)
531
+ parsed_line.is_a?(Hash) && structured_event?(parsed_line)
532
+ end
533
+
534
+ def parsed_json_scalar?(parsed_line)
535
+ !parsed_line.is_a?(Hash) && !parsed_line.is_a?(Array)
536
+ end
537
+
538
+ def coerce_token_count(value)
539
+ if value.is_a?(Integer)
540
+ return value if value >= 0
541
+
542
+ return nil
543
+ end
544
+
545
+ if value.is_a?(Float) && value.finite?
546
+ return nil unless value == value.to_i
547
+
548
+ coerced = value.to_i
549
+ return coerced if coerced >= 0
550
+
551
+ return nil
552
+ end
553
+
554
+ return if value.nil?
555
+
556
+ if value.is_a?(String)
557
+ return nil unless value.match?(/\A\d+\z/)
558
+
559
+ return value.to_i
560
+ end
561
+
562
+ nil
563
+ end
564
+
565
+ def coerce_total_token_count(usage, input:, output:)
566
+ explicit_total = extract_explicit_total_token_count(usage)
567
+ return explicit_total if explicit_total
568
+ return nil if input.nil? && output.nil?
569
+
570
+ (input || 0) + (output || 0)
571
+ end
572
+
573
+ def coerce_step_total_token_count(tokens)
574
+ explicit_total = extract_explicit_total_token_count(tokens)
575
+ return explicit_total if explicit_total
576
+
577
+ counts = [
578
+ coerce_token_count(tokens["input"]),
579
+ coerce_token_count(tokens["output"]),
580
+ coerce_token_count(tokens["reasoning"])
581
+ ]
582
+
583
+ cache = tokens["cache"]
584
+ if cache.is_a?(Hash)
585
+ counts << coerce_token_count(cache["read"])
586
+ counts << coerce_token_count(cache["write"])
587
+ end
588
+
589
+ counts.compact!
590
+ return nil if counts.empty?
591
+
592
+ counts.sum
593
+ end
594
+
595
+ def portable_step_extra_total(tokens, total)
596
+ return 0 unless step_component_tokens_present?(tokens)
597
+
598
+ input = coerce_token_count(tokens["input"]) || 0
599
+ output = coerce_token_count(tokens["output"]) || 0
600
+
601
+ [total - input - output, 0].max
602
+ end
603
+
604
+ def step_component_tokens_present?(tokens)
605
+ counts = [
606
+ coerce_token_count(tokens["input"]),
607
+ coerce_token_count(tokens["output"]),
608
+ coerce_token_count(tokens["reasoning"])
609
+ ]
610
+
611
+ cache = tokens["cache"]
612
+ if cache.is_a?(Hash)
613
+ counts << coerce_token_count(cache["read"])
614
+ counts << coerce_token_count(cache["write"])
615
+ end
616
+
617
+ counts.any?
618
+ end
619
+
620
+ def extract_explicit_total_token_count(usage)
621
+ coerce_token_count(usage["total_tokens"]) || coerce_token_count(usage["total"])
622
+ end
623
+
624
+ def usage_extra_token_total(usage)
625
+ counts = [
626
+ coerce_token_count(usage["reasoning_tokens"]),
627
+ coerce_token_count(usage["cache_creation_input_tokens"]),
628
+ coerce_token_count(usage["cache_read_input_tokens"]),
629
+ coerce_token_count(usage["cache_write_input_tokens"])
630
+ ]
631
+
632
+ counts.compact!
633
+ return nil if counts.empty?
634
+
635
+ counts.sum
636
+ end
142
637
  end
143
638
  end
144
639
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AgentHarness
4
- VERSION = "0.6.0"
4
+ VERSION = "0.7.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agent-harness
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bart Agapinan