agent-harness 0.17.2 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "digest"
4
3
  require "json"
5
4
  require "pathname"
5
+ require "securerandom"
6
+ require "tmpdir"
6
7
 
7
8
  module AgentHarness
8
9
  module Providers
9
10
  class GithubCopilot < Base
11
+ include McpConfigFileSupport
10
12
  include TokenUsageParsing
11
13
 
14
+ CLI_PACKAGE = "@github/copilot"
15
+ INSTALL_COMMAND_PREFIX = ["npm", "install", "-g"].freeze
16
+ DEFAULT_MAX_AUTOPILOT_CONTINUES = 50
17
+ LEGACY_BINARY_NAME = "github-copilot-cli"
12
18
  MODEL_PATTERN = /^gpt-[\d.o-]+(?:-turbo)?(?:-mini)?$/i
13
- JSON_OUTPUT_MIN_VERSION = Gem::Version.new("0.0.422").freeze
14
- SUBCOMMAND_CLI_MIN_VERSION = Gem::Version.new("0.1.0").freeze
15
- UNSUPPORTED_SUBCOMMAND_CLI_MESSAGE =
16
- "github-copilot-cli 0.1.x does not expose a non-interactive send interface; " \
17
- "the what-the-shell subcommand is interactive and cannot be used by AgentHarness."
19
+
20
+ GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com"
21
+ CHAT_DEFAULT_MODEL = "gpt-4o"
22
+ CHAT_MODELS = %w[gpt-4o gpt-4o-mini gpt-4-turbo].freeze
18
23
 
19
24
  SMOKE_TEST_CONTRACT = {
20
25
  prompt: "Reply with exactly OK.",
@@ -30,27 +35,41 @@ module AgentHarness
30
35
  end
31
36
 
32
37
  def binary_name
33
- "github-copilot-cli"
38
+ "copilot"
34
39
  end
35
40
 
36
41
  def available?
37
42
  executor = AgentHarness.configuration.command_executor
38
43
  return false unless executor.which(binary_name)
39
44
 
40
- !subcommand_cli_version?(copilot_cli_version(executor: executor))
45
+ true
41
46
  rescue
42
47
  false
43
48
  end
44
49
 
45
50
  def installation_contract(version: nil)
46
- # The published @githubnext/github-copilot-cli package only has
47
- # 0.1.x releases, and those expose an interactive subcommand instead
48
- # of the non-interactive -p prompt path AgentHarness uses.
49
- nil
51
+ normalized_version = normalize_install_version(version)
52
+ package = normalized_version ? "#{CLI_PACKAGE}@#{normalized_version}" : CLI_PACKAGE
53
+ install_command = (INSTALL_COMMAND_PREFIX + [package]).freeze
54
+
55
+ contract = {
56
+ source: :npm,
57
+ package: package,
58
+ package_name: CLI_PACKAGE,
59
+ version: normalized_version,
60
+ binary_name: binary_name,
61
+ install_command_prefix: INSTALL_COMMAND_PREFIX,
62
+ install_command: install_command
63
+ }
64
+
65
+ contract.each_value do |value|
66
+ value.freeze if value.is_a?(String)
67
+ end
68
+ contract.freeze
50
69
  end
51
70
 
52
71
  def install_command(version: nil)
53
- installation_contract(version: version)&.fetch(:install_command)
72
+ installation_contract(version: version)[:install_command]
54
73
  end
55
74
 
56
75
  def provider_metadata_overrides
@@ -120,26 +139,14 @@ module AgentHarness
120
139
 
121
140
  private
122
141
 
123
- def copilot_cli_version(executor:)
124
- result = executor.execute([binary_name, "--version"], timeout: 5, env: {})
125
- extract_version(result)
126
- rescue
127
- nil
128
- end
142
+ def normalize_install_version(version)
143
+ return nil if version.nil?
129
144
 
130
- def subcommand_cli_version?(version)
131
- !version.nil? && version >= SUBCOMMAND_CLI_MIN_VERSION
132
- end
133
-
134
- def extract_version(result)
135
- return nil unless result.success?
136
-
137
- version_string = [result.stdout, result.stderr].compact.join("\n")[/\d+\.\d+\.\d+(?:[-+][A-Za-z0-9.-]+)?/]
138
- return nil if version_string.nil? || version_string.empty?
145
+ unless version.is_a?(String) && !version.strip.empty?
146
+ raise ArgumentError, "Unsupported GitHub Copilot CLI version #{version.inspect}"
147
+ end
139
148
 
140
- Gem::Version.new(version_string)
141
- rescue ArgumentError
142
- nil
149
+ version.strip
143
150
  end
144
151
  end
145
152
 
@@ -174,35 +181,12 @@ module AgentHarness
174
181
  file_upload: false,
175
182
  vision: false,
176
183
  tool_use: true,
177
- json_mode: false,
178
- mcp: false,
184
+ json_mode: true,
185
+ mcp: true,
179
186
  dangerous_mode: true
180
187
  }
181
188
  end
182
189
 
183
- def dangerous_mode_flags(probe_timeout: nil, env: {}, version: nil)
184
- version ||= copilot_cli_version(probe_timeout: probe_timeout, env: env)
185
- return [] if subcommand_cli_version?(version)
186
- return [] unless supports_json_output_format?(version: version)
187
-
188
- ["--allow-all"]
189
- end
190
-
191
- def supports_sessions?(probe_timeout: nil, env: {}, version: :not_provided)
192
- legacy_prompt_cli?(version: version, probe_timeout: probe_timeout, env: env)
193
- end
194
-
195
- def session_flags(session_id, version: :not_provided, probe_timeout: nil, env: {})
196
- return [] unless session_id && !session_id.empty?
197
- return [] unless legacy_prompt_cli?(version: version, probe_timeout: probe_timeout, env: env)
198
-
199
- ["--resume", session_id]
200
- end
201
-
202
- GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com"
203
- CHAT_DEFAULT_MODEL = "gpt-4o"
204
- CHAT_MODELS = %w[gpt-4o gpt-4o-mini gpt-4-turbo].freeze
205
-
206
190
  def supports_chat?
207
191
  true
208
192
  end
@@ -233,19 +217,51 @@ module AgentHarness
233
217
  :openai_compatible
234
218
  end
235
219
 
220
+ def api_key_env_var_names
221
+ ["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"]
222
+ end
223
+
224
+ def api_key_unset_vars
225
+ ["COPILOT_PROVIDER_API_KEY", "COPILOT_PROVIDER_BASE_URL"]
226
+ end
227
+
228
+ def subscription_unset_vars
229
+ api_key_env_var_names + api_key_unset_vars
230
+ end
231
+
236
232
  def auth_type
237
233
  :oauth
238
234
  end
239
235
 
236
+ def dangerous_mode_flags
237
+ ["--yolo"]
238
+ end
239
+
240
+ def supports_mcp?
241
+ true
242
+ end
243
+
244
+ def supported_mcp_transports
245
+ %w[stdio http sse]
246
+ end
247
+
248
+ def build_mcp_flags(mcp_servers, options:)
249
+ return [] if mcp_servers.empty?
250
+
251
+ ["--additional-mcp-config", "@#{mcp_config_plan(options, mcp_servers).fetch(:path)}"]
252
+ end
253
+
254
+ def supports_sessions?
255
+ false
256
+ end
257
+
240
258
  def execution_semantics
241
259
  {
242
260
  prompt_delivery: :arg,
243
- # Older Copilot CLIs fall back to plain-text prompt mode, so metadata
244
- # must not claim JSON-only output even though newer versions support it.
245
- output_format: :text,
261
+ output_format: :json,
246
262
  sandbox_aware: false,
247
263
  uses_subcommand: false,
248
- non_interactive_flag: nil,
264
+ non_interactive_flag: "--autopilot",
249
265
  legitimate_exit_codes: [0],
250
266
  stderr_is_diagnostic: true,
251
267
  parses_rate_limit_reset: false
@@ -255,208 +271,135 @@ module AgentHarness
255
271
  def error_patterns
256
272
  {
257
273
  auth_expired: [
274
+ /not.?logged.?in/i,
258
275
  /not.?authorized/i,
259
- /access.?denied/i,
260
- /permission.?denied/i,
261
- /not.?enabled/i,
262
- /subscription.?required/i
276
+ /authentication/i,
277
+ /token.*invalid/i,
278
+ /copilot requests/i
263
279
  ],
264
280
  rate_limited: [
265
- /usage.?limit/i,
266
- /rate.?limit/i
281
+ /rate.?limit/i,
282
+ /too.?many.?requests/i,
283
+ /\b429\b/
267
284
  ],
268
285
  transient: [
269
286
  /connection.?error/i,
270
287
  /timeout/i,
271
- /try.?again/i
288
+ /try.?again/i,
289
+ /\b502\b/,
290
+ /\b503\b/
272
291
  ],
273
292
  permanent: [
274
- /invalid.?command/i,
275
- /unknown.?flag/i
293
+ /unknown.?flag/i,
294
+ /invalid.?value/i,
295
+ /continuation limit/i,
296
+ /max.?autopilot.?continues/i
276
297
  ]
277
298
  }
278
299
  end
279
300
 
280
301
  def translate_error(message)
281
302
  case message
282
- when /github-copilot-cli.*not found/i then "GitHub Copilot CLI not installed."
283
- else message
303
+ when /copilot.*not found/i, /No such file or directory - copilot/i
304
+ "GitHub Copilot CLI not installed."
305
+ else
306
+ message
284
307
  end
285
308
  end
286
309
 
287
310
  def supports_token_counting?
288
- supports_json_output_format?
311
+ true
289
312
  end
290
313
 
291
314
  def send_message(prompt:, **options)
292
- log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
293
-
294
- options = normalize_provider_runtime(options)
295
- options = normalize_mcp_servers(options)
296
- validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
297
-
298
- timeout = options[:timeout] || @config.timeout || default_timeout
299
- raise TimeoutError, "Command timed out before execution started" if timeout <= 0
315
+ super
316
+ ensure
317
+ cleanup_mcp_tempfiles!
318
+ end
300
319
 
301
- env = build_env(options)
302
- options = options.merge(_version_probe_timeout: [timeout, 5].min, _command_env: env)
320
+ def build_command(prompt, options)
321
+ runtime = options[:provider_runtime]
322
+ cmd = [
323
+ self.class.binary_name,
324
+ "--autopilot",
325
+ "--max-autopilot-continues",
326
+ max_autopilot_continues(options).to_s,
327
+ "--output-format",
328
+ "json"
329
+ ]
330
+ # Smoke tests must run non-interactively; force full-permission mode
331
+ # so autopilot does not stall on permission prompts.
332
+ cmd += dangerous_mode_flags if (options[:dangerous_mode] || options[:smoke_test]) && supports_dangerous_mode?
303
333
 
304
- start_time = Time.now
305
- command = build_command(prompt, options)
306
- preparation = build_execution_preparation(options)
307
- remaining_timeout = timeout - (Time.now - start_time)
308
- raise TimeoutError, "Command timed out before execution started" if remaining_timeout <= 0
334
+ if options[:mcp_servers]&.any?
335
+ cmd += build_mcp_flags(options[:mcp_servers], options: options)
336
+ end
309
337
 
310
- json_output_requested = command.include?("--output-format") && command.include?("json")
338
+ cmd += @config.default_flags if @config.default_flags&.any?
311
339
 
312
- result = execute_with_timeout(
313
- command,
314
- timeout: remaining_timeout,
315
- env: env,
316
- preparation: preparation,
317
- **command_execution_options(options)
318
- )
319
- duration = Time.now - start_time
340
+ model = effective_model_name(runtime)
341
+ cmd += ["--model", model] if model
320
342
 
321
- response = parse_response(result, duration: duration, json_output_requested: json_output_requested)
322
- runtime = options[:provider_runtime]
323
- effective_runtime_model = normalized_model_name(runtime&.model)
324
- if effective_runtime_model
325
- response = Response.new(
326
- output: response.output,
327
- exit_code: response.exit_code,
328
- duration: response.duration,
329
- provider: response.provider,
330
- model: effective_runtime_model,
331
- tokens: response.tokens,
332
- metadata: response.metadata,
333
- error: response.error
334
- )
343
+ if runtime
344
+ runtime_flags = runtime.flags
345
+ cmd += runtime_flags unless runtime_flags.empty?
335
346
  end
336
347
 
337
- track_tokens(response) if response.tokens
338
-
339
- log_debug("send_message_complete", duration: duration, tokens: response.tokens)
340
-
341
- response
342
- rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
343
- raise
344
- rescue => e
345
- handle_error(e, prompt: prompt, options: options)
348
+ cmd += test_command_overrides if options[:smoke_test]
349
+ cmd += ["-p", prompt]
350
+ cmd
346
351
  end
347
352
 
348
- def plan_execution(prompt:, **options)
349
- log_debug("plan_execution_start", prompt_length: prompt.length, options: options.keys)
353
+ def build_env(options)
354
+ env = super
355
+ needs_full_permissions = options[:dangerous_mode] || options[:smoke_test]
356
+ return env unless needs_full_permissions && supports_dangerous_mode?
350
357
 
351
- options = normalize_provider_runtime(options)
352
- options = normalize_mcp_servers(options)
353
- validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
358
+ env.merge("COPILOT_ALLOW_ALL" => "true")
359
+ end
354
360
 
355
- env = build_env(options)
356
- version = planned_copilot_cli_version(env)
357
- raise unsupported_subcommand_cli_error if subcommand_cli_version?(version)
361
+ def build_execution_preparation(options)
362
+ return nil unless options[:mcp_servers]&.any?
358
363
 
359
- options = options.merge(_command_env: env, _planned_cli_version: version)
364
+ plan = mcp_config_plan(options, options[:mcp_servers])
365
+ ExecutionPreparation.new(
366
+ file_writes: [
367
+ {
368
+ path: plan.fetch(:path),
369
+ content: plan.fetch(:content),
370
+ mode: 0o600
371
+ }
372
+ ]
373
+ )
374
+ end
360
375
 
361
- {
362
- command: build_command(prompt, options),
363
- env: env,
364
- preparation: build_execution_preparation(options)
365
- }
366
- rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
367
- raise
368
- rescue => e
369
- handle_error(e, prompt: prompt, options: options)
370
- end
371
-
372
- # Parse raw container output into a Response.
373
- #
374
- # Overrides the base implementation to support the
375
- # +json_output_requested+ option, which controls whether JSONL
376
- # output is parsed for token extraction.
377
- #
378
- # @param stdout [String] captured standard output
379
- # @param stderr [String] captured standard error
380
- # @param exit_code [Integer] process exit code
381
- # @param duration [Float] execution duration in seconds
382
- # @param options [Hash] additional options
383
- # @option options [Boolean] :json_output_requested whether to parse JSONL output
384
- # @return [Response] parsed response
385
- def parse_container_output(stdout:, stderr: "", exit_code: 0, duration: 0.0, **options)
376
+ def parse_container_output(stdout:, stderr: "", exit_code: 0, duration: 0.0, **_options)
386
377
  result = CommandExecutor::Result.new(
387
378
  stdout: stdout,
388
379
  stderr: stderr,
389
380
  exit_code: exit_code,
390
381
  duration: duration
391
382
  )
392
- parse_response(
393
- result,
394
- duration: duration,
395
- json_output_requested: options.fetch(:json_output_requested, false)
396
- )
383
+ parse_response(result, duration: duration)
397
384
  end
398
385
 
399
386
  protected
400
387
 
401
- def build_command(prompt, options)
402
- env = options.fetch(:_command_env) { build_env(options) }
403
- runtime = options[:provider_runtime]
404
- version = if options.key?(:_planned_cli_version)
405
- options[:_planned_cli_version]
406
- else
407
- copilot_cli_version(
408
- probe_timeout: options[:_version_probe_timeout],
409
- env: env
410
- )
411
- end
412
-
413
- raise unsupported_subcommand_cli_error if subcommand_cli_version?(version)
414
-
415
- cmd = [self.class.binary_name, "-p", prompt]
416
-
417
- if supports_json_output_format?(version: version)
418
- cmd += ["--output-format", "json"]
419
- else
420
- # Silent mode suppresses the model/stats decoration older CLIs print in
421
- # prompt mode, which keeps smoke-test output stable on the plain-text path.
422
- cmd << "-s"
423
- end
424
-
425
- model = effective_model_name(runtime)
426
- cmd += ["--model", model] if model
427
- if options[:dangerous_mode] && supports_dangerous_mode?
428
- cmd += programmatic_tool_approval_flags
429
- cmd += dangerous_mode_flags(version: version)
430
- end
431
-
432
- if options[:session] && !options[:session].empty?
433
- cmd += session_flags(options[:session], version: version)
434
- end
435
-
436
- cmd
437
- end
438
-
439
- def parse_response(result, duration:, json_output_requested: false)
440
- response = super(result, duration: duration)
441
- output = response.output
442
- tokens = nil
443
-
444
- parsed_lines = if json_output_requested && response.error.nil?
445
- parse_jsonl_output(output)
446
- end
447
- if parsed_lines
448
- output = extract_text_from_jsonl(parsed_lines) || output
449
- tokens = extract_tokens_from_jsonl(parsed_lines)
450
- end
388
+ def parse_response(result, duration:)
389
+ response = super
390
+ parsed_lines = parse_jsonl_output(response.output)
391
+ output = extract_text_from_jsonl(parsed_lines) || response.output
392
+ tokens = extract_tokens_from_jsonl(parsed_lines)
393
+ metadata = extract_metadata_from_jsonl(parsed_lines).merge(response.metadata)
451
394
 
452
395
  Response.new(
453
396
  output: output,
454
397
  exit_code: result.exit_code,
455
398
  duration: duration,
456
399
  provider: self.class.provider_name,
457
- model: effective_model_name,
400
+ model: normalized_model_name(metadata[:model]) || effective_model_name,
458
401
  tokens: tokens,
459
- metadata: response.metadata,
402
+ metadata: metadata,
460
403
  error: response.error
461
404
  )
462
405
  end
@@ -467,219 +410,74 @@ module AgentHarness
467
410
 
468
411
  private
469
412
 
470
- def programmatic_tool_approval_flags
471
- ["--allow-all-tools"]
472
- end
473
-
474
- def supports_json_output_format?(probe_timeout: nil, env: {}, version: :not_provided)
475
- version = copilot_cli_version(probe_timeout: probe_timeout, env: env) if version == :not_provided
476
- !version.nil? && !subcommand_cli_version?(version) && version >= JSON_OUTPUT_MIN_VERSION
477
- end
478
-
479
- def legacy_prompt_cli?(probe_timeout: nil, env: {}, version: :not_provided)
480
- version = copilot_cli_version(probe_timeout: probe_timeout, env: env) if version == :not_provided
481
- !version.nil? && !subcommand_cli_version?(version)
482
- end
483
-
484
- def subcommand_cli_version?(version)
485
- self.class.send(:subcommand_cli_version?, version)
486
- end
487
-
488
- def unsupported_subcommand_cli_error
489
- ProviderError.new(UNSUPPORTED_SUBCOMMAND_CLI_MESSAGE)
490
- end
491
-
492
- def copilot_cli_version(probe_timeout: nil, env: {})
493
- return nil if env.empty? && !copilot_cli_binary_available?
494
-
495
- cache_key = version_probe_cache_key(env)
496
- @copilot_cli_versions ||= {}
497
- return @copilot_cli_versions[cache_key] if @copilot_cli_versions.key?(cache_key)
498
-
499
- result = @executor.execute([self.class.binary_name, "--version"], timeout: probe_timeout || 5, env: env)
500
- version = extract_version(result)
501
- @copilot_cli_versions[cache_key] = version
502
- version
503
- rescue => e
504
- log_debug("copilot_cli_version_check_failed", error: e.message)
505
- @copilot_cli_versions ||= {}
506
- @copilot_cli_versions[cache_key] = nil if defined?(cache_key)
507
- end
508
-
509
- def planned_copilot_cli_version(env)
510
- cache_key = version_probe_cache_key(env)
511
- @copilot_cli_versions ||= {}
512
- return @copilot_cli_versions[cache_key] if @copilot_cli_versions.key?(cache_key)
513
-
514
- # When no cached version is available (cold start), return nil so
515
- # build_command falls back to the conservative -s flag path, matching
516
- # the behavior of send_message when the version probe returns nil.
517
- nil
518
- end
519
-
520
- def version_probe_cache_key(env)
521
- [
522
- probe_env_cache_component(env, "PATH", inherited_label: :inherited_path, override_label: :path_override),
523
- probe_env_cache_component(env, "PATHEXT", inherited_label: :inherited_pathext, override_label: :pathext_override)
524
- ]
525
- end
526
-
527
- def probe_env_cache_component(env, key, inherited_label:, override_label:)
528
- label, value = if env_override_present?(env, key)
529
- [override_label, env_override_value(env, key)]
530
- else
531
- [inherited_label, ENV[key]]
532
- end
533
- return [label, :unset] if value.nil?
534
-
535
- [label, Digest::SHA256.hexdigest(value)]
536
- end
537
-
538
- def env_override_present?(env, key)
539
- env.key?(key) || env.key?(key.to_sym)
540
- end
541
-
542
- def env_override_value(env, key)
543
- return env[key] if env.key?(key)
544
-
545
- env[key.to_sym]
546
- end
547
-
548
- def copilot_cli_binary_available?
549
- @executor.which(self.class.binary_name)
550
- rescue => e
551
- log_debug("copilot_cli_binary_check_failed", error: e.message)
552
- nil
553
- end
554
-
555
- def extract_version(result)
556
- self.class.send(:extract_version, result)
413
+ def max_autopilot_continues(options)
414
+ runtime = options[:provider_runtime]
415
+ candidate = runtime&.metadata&.[](:max_autopilot_continues) ||
416
+ runtime&.metadata&.[]("max_autopilot_continues") ||
417
+ options[:max_autopilot_continues]
418
+ value = Integer(candidate, exception: false)
419
+ (value && value > 0) ? value : DEFAULT_MAX_AUTOPILOT_CONTINUES
557
420
  end
558
421
 
559
422
  def parse_jsonl_output(output)
560
- return nil if output.nil? || output.strip.empty?
423
+ return [] if output.nil? || output.strip.empty?
561
424
 
562
- parsed = output.each_line(chomp: true).filter_map do |line|
425
+ output.each_line(chomp: true).filter_map do |line|
563
426
  next if line.strip.empty?
564
427
 
565
428
  JSON.parse(line)
566
429
  rescue JSON::ParserError
567
430
  next
568
431
  end
569
-
570
- parsed.empty? ? nil : parsed
571
432
  end
572
433
 
573
- def extract_text_from_jsonl(parsed_lines)
574
- output = +""
575
- saw_text = false
576
- saw_delta = false
577
-
434
+ def extract_metadata_from_jsonl(parsed_lines)
435
+ metadata = {}
578
436
  parsed_lines.each do |obj|
579
437
  next unless obj.is_a?(Hash)
580
- next unless assistant_output_event?(obj)
581
-
582
- full_text = extract_non_delta_text(obj)
583
- if full_text
584
- output = if replace_output_with_full_text?(
585
- output,
586
- full_text,
587
- saw_delta: saw_delta,
588
- authoritative_snapshot: authoritative_full_snapshot?(obj)
589
- )
590
- full_text.dup
591
- else
592
- output + full_text
593
- end
594
- saw_text = true
595
- saw_delta = false
596
- end
597
-
598
- delta_text = extract_delta_text(obj)
599
- next unless delta_text
600
-
601
- output << delta_text
602
- saw_text = true
603
- saw_delta = true
604
- end
605
-
606
- saw_text ? output : nil
607
- end
608
438
 
609
- def replace_output_with_full_text?(existing_output, full_text, saw_delta:, authoritative_snapshot:)
610
- saw_delta ||
611
- authoritative_snapshot_replacement?(existing_output, full_text, authoritative_snapshot: authoritative_snapshot) ||
612
- (!existing_output.empty? && (
613
- full_text.start_with?(existing_output) ||
614
- existing_output.start_with?(full_text)
615
- ))
616
- end
617
-
618
- def authoritative_snapshot_replacement?(existing_output, full_text, authoritative_snapshot:)
619
- authoritative_snapshot &&
620
- !existing_output.empty? &&
621
- (
622
- existing_output.length == full_text.length ||
623
- full_text.start_with?(existing_output) ||
624
- existing_output.start_with?(full_text) ||
625
- longest_common_substring_length(existing_output, full_text) >= [[existing_output.length, full_text.length].min / 2, 1].max
439
+ model = normalized_model_name(
440
+ obj["model"] ||
441
+ nested_hash_value(obj, "message", "model") ||
442
+ nested_hash_value(obj, "data", "model") ||
443
+ nested_hash_value(obj, "data", "message", "model")
626
444
  )
445
+ metadata[:model] = model if model
446
+ end
447
+ metadata
627
448
  end
628
449
 
629
- def longest_common_substring_length(left, right)
630
- return 0 if left.empty? || right.empty?
450
+ def extract_text_from_jsonl(parsed_lines)
451
+ return nil if parsed_lines.empty?
631
452
 
632
- longest = 0
633
- row = Array.new(right.length + 1, 0)
453
+ # Track snapshots and deltas with their position so we can merge
454
+ # a final snapshot with any deltas that follow it.
455
+ last_snapshot = nil
456
+ last_snapshot_index = -1
457
+ deltas = []
634
458
 
635
- left.each_char do |left_char|
636
- previous = 0
459
+ parsed_lines.each_with_index do |obj, index|
460
+ next unless assistant_output_event?(obj)
637
461
 
638
- right.each_char.with_index(1) do |right_char, index|
639
- current = row[index]
640
- row[index] = if left_char == right_char
641
- previous + 1
642
- else
643
- 0
644
- end
645
- longest = [longest, row[index]].max
646
- previous = current
462
+ snapshot = extract_non_delta_text(obj)
463
+ if snapshot && !snapshot.empty?
464
+ last_snapshot = snapshot
465
+ last_snapshot_index = index
647
466
  end
648
- end
649
-
650
- longest
651
- end
652
-
653
- def authoritative_full_snapshot?(obj)
654
- obj["type"].to_s.match?(/\A(?:assistant\.message|turn\.)/) ||
655
- obj["message"].is_a?(Hash) ||
656
- nested_hash_value(obj, "data", "message").is_a?(Hash)
657
- end
658
467
 
659
- def assistant_output_event?(obj)
660
- type = obj["type"]
661
- return true if type.nil? && !role_key_present?(obj)
468
+ delta = extract_delta_text(obj)
469
+ deltas << [index, delta] if delta && !delta.empty?
470
+ end
662
471
 
663
- role = extract_event_role(obj)
664
- return true if role.nil? && type.to_s.match?(/\A(?:assistant\.|turn\.)/)
472
+ if last_snapshot
473
+ # Append any delta events that arrived after the last snapshot
474
+ trailing = deltas.select { |i, _| i > last_snapshot_index }.map(&:last)
475
+ return trailing.any? ? last_snapshot + trailing.join : last_snapshot
476
+ end
665
477
 
666
- role == "assistant"
667
- end
478
+ return deltas.map(&:last).join if deltas.any?
668
479
 
669
- def role_key_present?(obj)
670
- obj.key?("role") ||
671
- hash_key_present?(obj["data"], "role") ||
672
- hash_key_present?(obj["message"], "role") ||
673
- hash_key_present?(nested_hash_value(obj, "data", "message"), "role")
674
- end
675
-
676
- def extract_event_role(obj)
677
- [
678
- obj["role"],
679
- nested_hash_value(obj, "data", "role"),
680
- nested_hash_value(obj, "message", "role"),
681
- nested_hash_value(obj, "data", "message", "role")
682
- ].compact.first&.to_s
480
+ nil
683
481
  end
684
482
 
685
483
  def extract_tokens_from_jsonl(parsed_lines)
@@ -711,6 +509,39 @@ module AgentHarness
711
509
  {input: input, output: output, total: input + output}
712
510
  end
713
511
 
512
+ def find_usages(obj)
513
+ return [] unless obj.is_a?(Hash)
514
+
515
+ direct_usage = select_best_usage_payload([
516
+ obj["usage"],
517
+ obj["tokens"],
518
+ usage_payload?(obj) ? obj : nil,
519
+ usage_payload?(obj["data"]) ? obj["data"] : nil,
520
+ usage_payload?(obj["message"]) ? obj["message"] : nil,
521
+ usage_payload?(nested_hash_value(obj, "data", "message")) ? nested_hash_value(obj, "data", "message") : nil,
522
+ nested_hash_value(obj, "data", "usage"),
523
+ nested_hash_value(obj, "data", "tokens"),
524
+ nested_hash_value(obj, "message", "usage"),
525
+ nested_hash_value(obj, "message", "tokens"),
526
+ nested_hash_value(obj, "data", "message", "usage"),
527
+ nested_hash_value(obj, "data", "message", "tokens")
528
+ ])
529
+ metrics_usages =
530
+ model_metrics_usages(obj["modelMetrics"]) +
531
+ model_metrics_usages(obj["model_metrics"]) +
532
+ model_metrics_usages(nested_hash_value(obj, "data", "modelMetrics")) +
533
+ model_metrics_usages(nested_hash_value(obj, "data", "model_metrics")) +
534
+ model_metrics_usages(nested_hash_value(obj, "message", "modelMetrics")) +
535
+ model_metrics_usages(nested_hash_value(obj, "message", "model_metrics")) +
536
+ model_metrics_usages(nested_hash_value(obj, "data", "message", "modelMetrics")) +
537
+ model_metrics_usages(nested_hash_value(obj, "data", "message", "model_metrics"))
538
+
539
+ return metrics_usages if prefer_usage_set?(aggregate_usage_payload(metrics_usages), direct_usage)
540
+ return [direct_usage] if direct_usage
541
+
542
+ metrics_usages
543
+ end
544
+
714
545
  def aggregate_token_totals(usages)
715
546
  total_input = 0
716
547
  total_output = 0
@@ -718,11 +549,11 @@ module AgentHarness
718
549
 
719
550
  usages.each do |usage|
720
551
  input = token_count_for(usage, "input_tokens", "prompt_tokens", "inputTokens", "promptTokens")
721
- output_tok = token_count_for(usage, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
722
- next if input.nil? && output_tok.nil?
552
+ output = token_count_for(usage, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
553
+ next if input.nil? && output.nil?
723
554
 
724
555
  total_input += input || 0
725
- total_output += output_tok || 0
556
+ total_output += output || 0
726
557
  found = true
727
558
  end
728
559
 
@@ -772,44 +603,10 @@ module AgentHarness
772
603
  nested_hash_value(obj, "data", "message", "model_metrics").is_a?(Hash)
773
604
  end
774
605
 
775
- def find_usages(obj)
776
- return [] unless obj.is_a?(Hash)
777
-
778
- direct_usage = select_best_usage_payload([
779
- obj["usage"],
780
- obj["tokens"],
781
- usage_payload?(obj) ? obj : nil,
782
- usage_payload?(obj["data"]) ? obj["data"] : nil,
783
- usage_payload?(obj["message"]) ? obj["message"] : nil,
784
- usage_payload?(nested_hash_value(obj, "data", "message")) ? nested_hash_value(obj, "data", "message") : nil,
785
- nested_hash_value(obj, "data", "usage"),
786
- nested_hash_value(obj, "data", "tokens"),
787
- nested_hash_value(obj, "message", "usage"),
788
- nested_hash_value(obj, "message", "tokens"),
789
- nested_hash_value(obj, "data", "message", "usage"),
790
- nested_hash_value(obj, "data", "message", "tokens")
791
- ])
792
- metrics_usages =
793
- model_metrics_usages(obj["modelMetrics"]) +
794
- model_metrics_usages(obj["model_metrics"]) +
795
- model_metrics_usages(nested_hash_value(obj, "data", "modelMetrics")) +
796
- model_metrics_usages(nested_hash_value(obj, "data", "model_metrics")) +
797
- model_metrics_usages(nested_hash_value(obj, "message", "modelMetrics")) +
798
- model_metrics_usages(nested_hash_value(obj, "message", "model_metrics")) +
799
- model_metrics_usages(nested_hash_value(obj, "data", "message", "modelMetrics")) +
800
- model_metrics_usages(nested_hash_value(obj, "data", "message", "model_metrics"))
801
-
802
- return metrics_usages if prefer_usage_set?(aggregate_usage_payload(metrics_usages), direct_usage)
803
- return [direct_usage] if direct_usage
804
-
805
- metrics_usages
806
- end
807
-
808
606
  MAX_METRICS_DEPTH = 5
809
607
 
810
608
  def model_metrics_usages(metrics, depth: 0)
811
609
  return [] unless metrics.is_a?(Hash)
812
-
813
610
  return [metrics] if usage_with_token_counts?(metrics)
814
611
 
815
612
  direct_usage = [
@@ -819,7 +616,6 @@ module AgentHarness
819
616
  metrics["aggregate"]
820
617
  ].find { |value| usage_with_token_counts?(value) }
821
618
  return [direct_usage] if direct_usage
822
-
823
619
  return [] if depth >= MAX_METRICS_DEPTH
824
620
 
825
621
  metrics.each_value.flat_map { |value| model_metrics_usages(value, depth: depth + 1) }
@@ -848,24 +644,16 @@ module AgentHarness
848
644
  ) == 1
849
645
  end
850
646
 
851
- def extract_text_value(value)
852
- case value
853
- when String
854
- value
855
- when Array
856
- parts = value.filter_map { |part| extract_text_value(part) }
857
- parts.empty? ? nil : parts.join
858
- when Hash
859
- extract_text_value(value["text"]) ||
860
- extract_text_value(value["content"]) ||
861
- extract_text_value(value["parts"]) ||
862
- extract_text_value(value["result"]) ||
863
- extract_text_value(value["deltaContent"]) ||
864
- extract_text_value(value["delta_content"]) ||
865
- extract_text_value(value["delta"]) ||
866
- extract_text_value(value["message"]) ||
867
- extract_text_value(value["data"])
868
- end
647
+ def assistant_output_event?(obj)
648
+ return false unless obj.is_a?(Hash)
649
+
650
+ type = obj["type"]
651
+ return true if type.nil? && !role_key_present?(obj)
652
+
653
+ role = extract_event_role(obj)
654
+ return true if role.nil? && type.to_s.match?(/\A(?:assistant\.|turn\.)/)
655
+
656
+ role == "assistant"
869
657
  end
870
658
 
871
659
  def extract_non_delta_text(obj)
@@ -902,20 +690,56 @@ module AgentHarness
902
690
  extract_text_value(nested_hash_value(obj, "data", "message", "delta"))
903
691
  end
904
692
 
693
+ def extract_text_value(value)
694
+ case value
695
+ when String
696
+ value
697
+ when Array
698
+ parts = value.filter_map { |part| extract_text_value(part) }
699
+ parts.empty? ? nil : parts.join
700
+ when Hash
701
+ extract_text_value(value["text"]) ||
702
+ extract_text_value(value["content"]) ||
703
+ extract_text_value(value["parts"]) ||
704
+ extract_text_value(value["result"]) ||
705
+ extract_text_value(value["deltaContent"]) ||
706
+ extract_text_value(value["delta_content"]) ||
707
+ extract_text_value(value["delta"]) ||
708
+ extract_text_value(value["message"]) ||
709
+ extract_text_value(value["data"])
710
+ end
711
+ end
712
+
905
713
  def usage_payload?(value)
906
714
  value.is_a?(Hash) && token_count_keys.any? { |key| value.key?(key) }
907
715
  end
908
716
 
717
+ def role_key_present?(obj)
718
+ obj.key?("role") ||
719
+ hash_key_present?(obj["data"], "role") ||
720
+ hash_key_present?(obj["message"], "role") ||
721
+ hash_key_present?(nested_hash_value(obj, "data", "message"), "role")
722
+ end
723
+
724
+ def extract_event_role(obj)
725
+ [
726
+ obj["role"],
727
+ nested_hash_value(obj, "data", "role"),
728
+ nested_hash_value(obj, "message", "role"),
729
+ nested_hash_value(obj, "data", "message", "role")
730
+ ].compact.first&.to_s
731
+ end
732
+
909
733
  def hash_key_present?(value, key)
910
734
  value.is_a?(Hash) && value.key?(key)
911
735
  end
912
736
 
913
737
  def resolve_chat_api_key
914
- key = ENV["GITHUB_TOKEN"] || ENV["GH_TOKEN"] || read_copilot_cli_access_token
738
+ key = ENV["COPILOT_GITHUB_TOKEN"] || ENV["GH_TOKEN"] || ENV["GITHUB_TOKEN"] || read_copilot_cli_access_token
915
739
 
916
740
  if key.nil? || key.strip.empty?
917
741
  raise AuthenticationError.new(
918
- "Chat mode requires a GitHub token. Set GITHUB_TOKEN or GH_TOKEN, or authenticate the Copilot CLI.",
742
+ "Chat mode requires a GitHub token. Set COPILOT_GITHUB_TOKEN, GH_TOKEN, or GITHUB_TOKEN, or authenticate the Copilot CLI.",
919
743
  provider: :github_copilot
920
744
  )
921
745
  end
@@ -924,6 +748,9 @@ module AgentHarness
924
748
  end
925
749
 
926
750
  def read_copilot_cli_access_token
751
+ token = read_token_from_copilot_config
752
+ return token if token
753
+
927
754
  path = Pathname.new(File.join(Dir.home, ".copilot-cli-access-token"))
928
755
  return nil unless path.file?
929
756
 
@@ -931,6 +758,38 @@ module AgentHarness
931
758
  rescue Errno::ENOENT, Errno::EACCES, IOError
932
759
  nil
933
760
  end
761
+
762
+ def read_token_from_copilot_config
763
+ config_home = ENV["COPILOT_HOME"]
764
+ base_dir = if config_home && !config_home.strip.empty?
765
+ config_home
766
+ else
767
+ File.join(Dir.home, ".copilot")
768
+ end
769
+ path = Pathname.new(File.join(base_dir, "config.json"))
770
+ return nil unless path.file?
771
+
772
+ config = JSON.parse(path.read)
773
+ normalized_model_name(
774
+ config["oauth_token"] ||
775
+ config["oauthToken"] ||
776
+ config["token"] ||
777
+ nested_hash_value(config, "auth", "token")
778
+ )
779
+ rescue JSON::ParserError, Errno::ENOENT, Errno::EACCES, IOError
780
+ nil
781
+ end
782
+
783
+ def mcp_provider_key
784
+ :github_copilot
785
+ end
786
+
787
+ def mcp_config_plan(options, mcp_servers)
788
+ options[:_github_copilot_mcp_config] ||= {
789
+ path: File.join(Dir.tmpdir, "agent_harness_copilot_mcp_#{SecureRandom.hex(8)}.json"),
790
+ content: JSON.generate(McpConfigTranslator.for_provider(mcp_provider_key, mcp_servers))
791
+ }
792
+ end
934
793
  end
935
794
  end
936
795
  end