agent-harness 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "digest"
4
+ require "json"
5
+
3
6
  module AgentHarness
4
7
  module Providers
5
8
  # GitHub Copilot CLI provider
6
9
  #
7
10
  # Provides integration with the GitHub Copilot CLI tool.
8
11
  class GithubCopilot < Base
12
+ MIN_JSON_OUTPUT_VERSION = Gem::Version.new("0.0.422").freeze
13
+ REQUEST_PROBE_ENV_STACK_KEY = :agent_harness_github_copilot_request_probe_env_stack
14
+
9
15
  # Model name pattern for GitHub Copilot (uses OpenAI models)
10
16
  MODEL_PATTERN = /^gpt-[\d.o-]+(?:-turbo)?(?:-mini)?$/i
11
17
 
@@ -144,10 +150,16 @@ module AgentHarness
144
150
  :oauth
145
151
  end
146
152
 
153
+ def send_message(prompt:, **options)
154
+ with_request_probe_env(request_probe_env_from_raw_runtime(options[:provider_runtime])) do
155
+ super(prompt: prompt, **options)
156
+ end
157
+ end
158
+
147
159
  def execution_semantics
148
160
  {
149
161
  prompt_delivery: :arg,
150
- output_format: :text,
162
+ output_format: copilot_cli_supports_json_output? ? :json : :text,
151
163
  sandbox_aware: false,
152
164
  uses_subcommand: true,
153
165
  non_interactive_flag: nil,
@@ -186,6 +198,7 @@ module AgentHarness
186
198
 
187
199
  def build_command(prompt, options)
188
200
  cmd = [self.class.binary_name, "what-the-shell", prompt]
201
+ cmd += ["--output-format", "json"] if copilot_cli_supports_json_output?
189
202
 
190
203
  # Opt in to unrestricted tool access explicitly to preserve a safe default.
191
204
  if supports_dangerous_mode? && options[:dangerous_mode]
@@ -203,6 +216,586 @@ module AgentHarness
203
216
  def default_timeout
204
217
  300
205
218
  end
219
+
220
+ def parse_response(result, duration:)
221
+ return super unless copilot_cli_supports_json_output?
222
+
223
+ output = result.stdout.to_s
224
+ error = nil
225
+
226
+ legitimate = execution_semantics[:legitimate_exit_codes] || [0]
227
+ unless legitimate.include?(result.exit_code)
228
+ combined = [result.stderr.to_s, output].map(&:strip).reject(&:empty?).join("\n")
229
+ error = combined unless combined.empty?
230
+ end
231
+
232
+ structured_json_seen = false
233
+ shutdown_tokens = empty_token_totals
234
+ usage_tokens = empty_token_totals
235
+ fallback_tokens = empty_token_totals
236
+ output_segments = []
237
+ authoritative_reply_seen = false
238
+ output.lines.each do |line|
239
+ stripped_line = line.strip
240
+ if stripped_line.empty?
241
+ output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
242
+ next
243
+ end
244
+ begin
245
+ obj = JSON.parse(stripped_line)
246
+ rescue JSON::ParserError
247
+ output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
248
+ next
249
+ end
250
+
251
+ structured_json_seen ||= obj.is_a?(Hash)
252
+
253
+ text, text_kind = extract_event_text(obj)
254
+ if text
255
+ if text_kind == :assistant_delta
256
+ next if authoritative_reply_seen
257
+
258
+ append_delta_segment!(output_segments, text, terminated: line.end_with?("\n"))
259
+ elsif !text.empty?
260
+ replace_assistant_segments!(output_segments, text, terminated: line.end_with?("\n"))
261
+ authoritative_reply_seen = true
262
+ end
263
+ elsif preserve_raw_json_line?(obj) || !obj.is_a?(Hash)
264
+ output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
265
+ end
266
+
267
+ token_usage = extract_token_usage(obj)
268
+ next unless token_usage
269
+
270
+ if token_usage[:source] == :shutdown
271
+ accumulate_token_totals!(shutdown_tokens, token_usage)
272
+ elsif token_usage[:source] == :usage
273
+ accumulate_token_totals!(usage_tokens, token_usage)
274
+ else
275
+ accumulate_token_totals!(fallback_tokens, token_usage)
276
+ end
277
+ end
278
+ tokens = build_tokens(shutdown_tokens: shutdown_tokens, usage_tokens: usage_tokens, fallback_tokens: fallback_tokens)
279
+ final_output = structured_json_seen ? render_output_segments(output_segments) : output
280
+
281
+ Response.new(
282
+ output: final_output,
283
+ exit_code: result.exit_code,
284
+ duration: duration,
285
+ provider: self.class.provider_name,
286
+ model: @config.model,
287
+ tokens: tokens,
288
+ error: error,
289
+ metadata: {
290
+ legitimate_exit_codes: legitimate
291
+ }
292
+ )
293
+ end
294
+
295
+ ASSISTANT_OUTPUT_EVENT_TYPES = %w[assistant assistant.message assistant.message_delta].freeze
296
+ ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES = %w[assistant assistant.message].freeze
297
+ SESSION_SHUTDOWN_EVENT_TYPES = ["session.shutdown"].freeze
298
+ USAGE_EVENT_TYPES = %w[usage assistant.usage].freeze
299
+ COPILOT_EVENT_TYPE_PREFIXES = %w[
300
+ assistant.
301
+ user.
302
+ user_input.
303
+ system.
304
+ session.
305
+ tool.
306
+ permission.
307
+ elicitation.
308
+ exit_plan_mode.
309
+ skill.
310
+ subagent.
311
+ external_tool.
312
+ command.
313
+ ].freeze
314
+ COPILOT_EVENT_TYPES = %w[
315
+ abort
316
+ command
317
+ elicitation
318
+ exit_plan_mode
319
+ external_tool
320
+ permission
321
+ session
322
+ skill
323
+ subagent
324
+ system
325
+ tool
326
+ user
327
+ user_input
328
+ ].freeze
329
+
330
+ def extract_event_text(obj)
331
+ return [nil, nil] unless obj.is_a?(Hash)
332
+
333
+ if obj.key?("type")
334
+ return [nil, nil] unless obj["data"].is_a?(Hash)
335
+ return [nil, nil] unless ASSISTANT_OUTPUT_EVENT_TYPES.include?(obj["type"])
336
+
337
+ data = obj["data"]
338
+ if obj["type"] == "assistant.message_delta"
339
+ delta_content = string_content(data["deltaContent"])
340
+ delta_content = string_content(data["delta_content"]) if delta_content.nil? || delta_content.empty?
341
+ return [delta_content, :assistant_delta] if delta_content && !delta_content.empty?
342
+
343
+ return [nil, nil]
344
+ end
345
+
346
+ return [string_content(data["content"]), :assistant] if data.key?("content")
347
+
348
+ return [nil, nil]
349
+ end
350
+
351
+ return [nil, nil] if obj.key?("role") && !assistant_role?(obj["role"])
352
+ return [nil, nil] if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
353
+ !assistant_role?(obj["message"]["role"])
354
+
355
+ if obj["message"].is_a?(Hash) && obj["message"].key?("content")
356
+ nested_content = string_content(obj["message"]["content"])
357
+ return [nested_content, :assistant] if nested_content && !nested_content.empty?
358
+ end
359
+
360
+ output = string_content(obj["output"])
361
+ return [output, :assistant] if output && !output.empty?
362
+
363
+ content = string_content(obj["content"])
364
+ return [content, :assistant] if content && !content.empty?
365
+
366
+ [nil, nil]
367
+ end
368
+
369
+ def string_content(value)
370
+ return value if value.is_a?(String)
371
+
372
+ nil
373
+ end
374
+
375
+ def preserve_raw_json_line?(obj)
376
+ return false unless obj.is_a?(Hash)
377
+ return false if obj.key?("type") && copilot_event_type?(obj["type"])
378
+ return true if obj.key?("type")
379
+ return false if obj.key?("role") && !assistant_role?(obj["role"])
380
+ return false if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
381
+ !assistant_role?(obj["message"]["role"])
382
+ return false if extract_token_usage(obj)
383
+ return false if (output = string_content(obj["output"])) && !output.empty?
384
+ return false if (content = string_content(obj["content"])) && !content.empty?
385
+ return false if obj["message"].is_a?(Hash) &&
386
+ (message_content = string_content(obj["message"]["content"])) &&
387
+ !message_content.empty?
388
+
389
+ true
390
+ end
391
+
392
+ def assistant_role?(role)
393
+ role == "assistant"
394
+ end
395
+
396
+ def copilot_event_type?(event_type)
397
+ return true if ASSISTANT_OUTPUT_EVENT_TYPES.include?(event_type)
398
+ return true if ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES.include?(event_type)
399
+ return true if SESSION_SHUTDOWN_EVENT_TYPES.include?(event_type)
400
+ return true if USAGE_EVENT_TYPES.include?(event_type)
401
+ return false unless event_type.is_a?(String)
402
+ return true if COPILOT_EVENT_TYPES.include?(event_type)
403
+
404
+ COPILOT_EVENT_TYPE_PREFIXES.any? { |prefix| event_type.start_with?(prefix) }
405
+ end
406
+
407
+ def extract_token_usage(obj)
408
+ return nil unless obj.is_a?(Hash)
409
+
410
+ if obj.key?("type")
411
+ return nil unless obj["data"].is_a?(Hash)
412
+
413
+ data = obj["data"]
414
+
415
+ if SESSION_SHUTDOWN_EVENT_TYPES.include?(obj["type"])
416
+ return extract_shutdown_token_usage(data)
417
+ end
418
+
419
+ if USAGE_EVENT_TYPES.include?(obj["type"])
420
+ return extract_payload_token_usage(
421
+ data,
422
+ source: :usage,
423
+ input_keys: ["inputTokens", "input_tokens"],
424
+ output_keys: ["outputTokens", "output_tokens"]
425
+ )
426
+ end
427
+
428
+ if ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES.include?(obj["type"])
429
+ return extract_payload_token_usage(
430
+ data,
431
+ source: :assistant,
432
+ input_keys: ["inputTokens", "input_tokens"],
433
+ output_keys: ["outputTokens", "output_tokens"]
434
+ )
435
+ end
436
+
437
+ return nil
438
+ end
439
+
440
+ extract_top_level_token_usage(obj)
441
+ end
442
+
443
+ def extract_shutdown_token_usage(data)
444
+ model_metrics = extract_shutdown_model_metrics_usage(data["modelMetrics"])
445
+ snake_case_model_metrics = extract_shutdown_model_metrics_usage(data["model_metrics"])
446
+
447
+ input, input_present = merged_token_metric(model_metrics, snake_case_model_metrics, :input)
448
+ output, output_present = merged_token_metric(model_metrics, snake_case_model_metrics, :output)
449
+ return nil unless input_present || output_present
450
+
451
+ {
452
+ source: :shutdown,
453
+ input: input,
454
+ output: output,
455
+ input_present: input_present,
456
+ output_present: output_present
457
+ }
458
+ end
459
+
460
+ def extract_shutdown_model_metrics_usage(model_metrics)
461
+ return nil unless model_metrics.is_a?(Hash)
462
+
463
+ totals = empty_token_totals
464
+
465
+ model_metrics.each_value do |metric|
466
+ next unless metric.is_a?(Hash)
467
+
468
+ usage = metric["usage"]
469
+ next unless usage.is_a?(Hash)
470
+
471
+ metric_usage = extract_payload_token_usage(
472
+ usage,
473
+ source: :shutdown,
474
+ input_keys: ["inputTokens", "input_tokens", "input"],
475
+ output_keys: ["outputTokens", "output_tokens", "output"]
476
+ )
477
+ next unless metric_usage
478
+
479
+ accumulate_token_totals!(totals, metric_usage)
480
+ end
481
+
482
+ return nil unless totals[:input_present] || totals[:output_present]
483
+
484
+ totals
485
+ end
486
+
487
+ def extract_payload_token_usage(payload, source:, input_keys:, output_keys:)
488
+ return nil unless payload.is_a?(Hash)
489
+
490
+ input, input_present = token_value(payload, *input_keys)
491
+ output, output_present = token_value(payload, *output_keys)
492
+ return nil unless input_present || output_present
493
+
494
+ {
495
+ source: source,
496
+ input: input,
497
+ output: output,
498
+ input_present: input_present,
499
+ output_present: output_present
500
+ }
501
+ end
502
+
503
+ def extract_top_level_token_usage(obj)
504
+ return nil if obj.key?("role") && !assistant_role?(obj["role"])
505
+ return nil if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
506
+ !assistant_role?(obj["message"]["role"])
507
+
508
+ usage = extract_payload_token_usage(
509
+ obj["usage"],
510
+ source: :usage,
511
+ input_keys: ["input_tokens", "inputTokens", "input"],
512
+ output_keys: ["output_tokens", "outputTokens", "output"]
513
+ )
514
+ tokens = extract_payload_token_usage(
515
+ obj["tokens"],
516
+ source: :usage,
517
+ input_keys: ["input_tokens", "inputTokens", "input"],
518
+ output_keys: ["output_tokens", "outputTokens", "output"]
519
+ )
520
+ return nil unless usage || tokens
521
+
522
+ input, input_present = merged_token_metric(usage, tokens, :input)
523
+ output, output_present = merged_token_metric(usage, tokens, :output)
524
+ return nil unless input_present || output_present
525
+
526
+ {
527
+ source: :usage,
528
+ input: input,
529
+ output: output,
530
+ input_present: input_present,
531
+ output_present: output_present
532
+ }
533
+ end
534
+
535
+ def merged_token_metric(primary, fallback, metric)
536
+ present_key = :"#{metric}_present"
537
+ return [primary[metric], true] if primary&.[](present_key)
538
+ return [fallback[metric], true] if fallback&.[](present_key)
539
+
540
+ [0, false]
541
+ end
542
+
543
+ def empty_token_totals
544
+ {
545
+ input: 0,
546
+ output: 0,
547
+ input_present: false,
548
+ output_present: false
549
+ }
550
+ end
551
+
552
+ def accumulate_token_totals!(totals, token_usage)
553
+ if token_usage[:input_present]
554
+ totals[:input_present] = true
555
+ totals[:input] += token_usage[:input]
556
+ end
557
+
558
+ return unless token_usage[:output_present]
559
+
560
+ totals[:output_present] = true
561
+ totals[:output] += token_usage[:output]
562
+ end
563
+
564
+ def token_value(obj, *keys)
565
+ keys.each do |candidate|
566
+ next unless obj.key?(candidate)
567
+
568
+ value, valid = coerce_token_value(obj[candidate])
569
+ return [value, true] if valid
570
+ end
571
+
572
+ [0, false]
573
+ end
574
+
575
+ def build_tokens(shutdown_tokens:, usage_tokens:, fallback_tokens:)
576
+ input, input_present = first_present_token_metric(usage_tokens, fallback_tokens, :input)
577
+ output, output_present = first_present_token_metric(usage_tokens, fallback_tokens, :output)
578
+ return token_hash(input, output, input_present, output_present) if input_present || output_present
579
+
580
+ input, input_present = first_present_token_metric(shutdown_tokens, :input)
581
+ output, output_present = first_present_token_metric(shutdown_tokens, :output)
582
+ token_hash(input, output, input_present, output_present)
583
+ end
584
+
585
+ def token_hash(input, output, input_present, output_present)
586
+ return nil unless input_present || output_present
587
+
588
+ {input: input, output: output, total: input + output}
589
+ end
590
+
591
+ def first_present_token_metric(*sources, metric)
592
+ present_key = :"#{metric}_present"
593
+
594
+ sources.each do |source|
595
+ next unless source[present_key]
596
+
597
+ return [source[metric], true]
598
+ end
599
+
600
+ [0, false]
601
+ end
602
+
603
+ def render_output_segments(segments)
604
+ rendered = +""
605
+ previous_kind = nil
606
+ previous_terminated = false
607
+
608
+ segments.each do |segment|
609
+ if previous_terminated && previous_kind == :assistant &&
610
+ segment[:kind] != :assistant &&
611
+ !rendered.empty? &&
612
+ !rendered.end_with?("\n")
613
+ rendered << "\n"
614
+ end
615
+
616
+ rendered << segment[:content]
617
+ previous_kind = segment[:kind]
618
+ previous_terminated = segment[:terminated]
619
+ end
620
+
621
+ rendered
622
+ end
623
+
624
+ def append_delta_segment!(segments, text, terminated:)
625
+ previous_segment = segments.last
626
+ if previous_segment&.[](:provisional) && previous_segment[:kind] == :assistant
627
+ previous_segment[:content] << text
628
+ previous_segment[:terminated] = terminated
629
+ return
630
+ end
631
+
632
+ segments << {
633
+ kind: :assistant,
634
+ content: +text,
635
+ terminated: terminated,
636
+ provisional: true
637
+ }
638
+ end
639
+
640
+ def replace_assistant_segments!(segments, text, terminated:)
641
+ drop_assistant_segments!(segments)
642
+ segments << {kind: :assistant, content: text, terminated: terminated}
643
+ end
644
+
645
+ def drop_assistant_segments!(segments)
646
+ segments.reject! { |segment| segment[:kind] == :assistant }
647
+ end
648
+
649
+ def with_request_probe_env(env)
650
+ stack = writable_request_probe_env_stack
651
+ stack << env
652
+ yield
653
+ ensure
654
+ stack&.pop
655
+ clear_request_probe_env_stack! if stack&.empty?
656
+ end
657
+
658
+ def current_probe_env
659
+ stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
660
+ stack = stacks && stacks[object_id]
661
+ stack&.last || {}
662
+ end
663
+
664
+ def version_probe_env_cache_key(env)
665
+ resolved_binary_path_for_env(env) ||
666
+ if env.key?("PATH")
667
+ [:path_override, cacheable_path_override(env["PATH"])]
668
+ else
669
+ self.class.binary_name
670
+ end
671
+ end
672
+
673
+ def cacheable_path_override(path)
674
+ return nil unless path.is_a?(String)
675
+
676
+ Digest::SHA256.hexdigest(path)
677
+ end
678
+
679
+ def resolved_binary_path_for_env(env)
680
+ path = if env.key?("PATH")
681
+ env["PATH"]
682
+ else
683
+ ENV["PATH"]
684
+ end
685
+ return nil unless path.is_a?(String) && !path.empty?
686
+
687
+ path.split(File::PATH_SEPARATOR).each do |entry|
688
+ full_path = File.join(entry, self.class.binary_name)
689
+ return full_path if File.executable?(full_path)
690
+ end
691
+
692
+ nil
693
+ end
694
+
695
+ def request_probe_env_from_raw_runtime(runtime)
696
+ case runtime
697
+ when nil
698
+ {}
699
+ when ProviderRuntime
700
+ runtime.env.merge(runtime.unset_env.to_h { |key| [key, nil] })
701
+ when Hash
702
+ request_probe_env_from_raw_hash(runtime)
703
+ else
704
+ {}
705
+ end
706
+ end
707
+
708
+ def request_probe_env_from_raw_hash(runtime_hash)
709
+ env = stringify_probe_env(runtime_hash[:env] || runtime_hash["env"])
710
+ unset_env = stringify_probe_unset_env(runtime_hash[:unset_env] || runtime_hash["unset_env"])
711
+ return {} unless env && unset_env
712
+
713
+ env.merge(unset_env.to_h { |key| [key, nil] })
714
+ end
715
+
716
+ def stringify_probe_env(raw_env)
717
+ return {} if raw_env.nil?
718
+ return nil unless raw_env.is_a?(Hash)
719
+
720
+ raw_env.each_with_object({}) do |(key, value), env|
721
+ return nil unless value.is_a?(String)
722
+
723
+ env[key.to_s] = value
724
+ end
725
+ end
726
+
727
+ def stringify_probe_unset_env(raw_unset_env)
728
+ return [] if raw_unset_env.nil?
729
+ return nil unless raw_unset_env.is_a?(Array)
730
+
731
+ raw_unset_env.map(&:to_s)
732
+ rescue NoMethodError
733
+ nil
734
+ end
735
+
736
+ def writable_request_probe_env_stack
737
+ stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
738
+ unless stacks
739
+ stacks = {}
740
+ Thread.current.thread_variable_set(REQUEST_PROBE_ENV_STACK_KEY, stacks)
741
+ end
742
+
743
+ stacks[object_id] ||= []
744
+ end
745
+
746
+ def clear_request_probe_env_stack!
747
+ stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
748
+ return unless stacks
749
+
750
+ stacks.delete(object_id)
751
+ Thread.current.thread_variable_set(REQUEST_PROBE_ENV_STACK_KEY, nil) if stacks.empty?
752
+ end
753
+
754
+ def copilot_cli_supports_json_output?(env: current_probe_env)
755
+ @copilot_cli_supports_json_output ||= {}
756
+ cache_key = version_probe_env_cache_key(env)
757
+ return @copilot_cli_supports_json_output[cache_key] if @copilot_cli_supports_json_output.key?(cache_key)
758
+
759
+ version = copilot_cli_version(env: env)
760
+ @copilot_cli_supports_json_output[cache_key] = !version.nil? && version >= MIN_JSON_OUTPUT_VERSION
761
+ rescue
762
+ @copilot_cli_supports_json_output[cache_key] = false
763
+ end
764
+
765
+ def copilot_cli_version(env: current_probe_env)
766
+ @copilot_cli_version ||= {}
767
+ cache_key = version_probe_env_cache_key(env)
768
+ return @copilot_cli_version[cache_key] if @copilot_cli_version.key?(cache_key)
769
+
770
+ result = @executor.execute([self.class.binary_name, "--version"], timeout: 5, env: env)
771
+ return @copilot_cli_version[cache_key] = nil unless result.exit_code.zero?
772
+
773
+ @copilot_cli_version[cache_key] = parse_copilot_cli_version(result.stdout) || parse_copilot_cli_version(result.stderr)
774
+ rescue
775
+ @copilot_cli_version[cache_key] = nil
776
+ end
777
+
778
+ def parse_copilot_cli_version(output)
779
+ match = output.to_s.match(/(\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?)/)
780
+ return nil unless match
781
+
782
+ Gem::Version.new(match[1])
783
+ rescue ArgumentError
784
+ nil
785
+ end
786
+
787
+ def coerce_token_value(value)
788
+ case value
789
+ when Integer
790
+ return [value, true] if value >= 0
791
+ when Float
792
+ return [value.to_i, true] if value.finite? && value >= 0 && value == value.to_i
793
+ when String
794
+ return [value.to_i, true] if /\A\+?\d+\z/.match?(value)
795
+ end
796
+
797
+ [0, false]
798
+ end
206
799
  end
207
800
  end
208
801
  end