agent-harness 0.17.3 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5eee3cd4f4e77fe648ed70a6c31c29af4959b90d7fb848534ecdf75558811d0f
4
- data.tar.gz: 8eca8a991857a8518bc9b192d78984bb459470725da9727ac830fd42eaf2a91b
3
+ metadata.gz: 00d76ecfbbb60dc859356d4ad02faa25b7f4c51884b258c20850f6ab76439543
4
+ data.tar.gz: 13a9150028e24d8c5b120b186cbae101ad83989a24bbe7d50189284e417dfe55
5
5
  SHA512:
6
- metadata.gz: b3d2b6cd29f4b9607b52b935d323db5217514c3e32b213f222c3fd2b4731e2407315854bdbbdd2e4857c84302b4ae21e844e611bff11e24f051a28f035da93db
7
- data.tar.gz: 979df7fc6616509e14593d0fee2f189ec4a5d7b3c34d071fe755fab31115bf932f90484f3e1483dbabad956383c8bc1528174ec08791bc32a186d814295d87ef
6
+ metadata.gz: 96e7f6d0058b89493f6263db4b9e5b7b7fd76138dcc70d45648bcb9f2b0c0e02cd2b7ffc84dbbf6d000e140d39331dd8ea652d1c7c3a290b65693cfaad881217
7
+ data.tar.gz: f7f6605dc5e4be95d1d00fedc1f1c99770dcad8ac7cec86d28ccd61111c4e6ed585c3759454e0e0ffb33ea0fb5399fd271e3ef4f8e8e10cacfae5f6cca2f1a0f
@@ -1,3 +1,3 @@
1
1
  {
2
- ".": "0.17.3"
2
+ ".": "0.18.0"
3
3
  }
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.18.0](https://github.com/viamin/agent-harness/compare/agent-harness/v0.17.3...agent-harness/v0.18.0) (2026-05-06)
4
+
5
+
6
+ ### Features
7
+
8
+ * **copilot:** add GitHub Copilot CLI (`copilot`) support with --autopilot mode ([#210](https://github.com/viamin/agent-harness/issues/210)) ([0138f3c](https://github.com/viamin/agent-harness/commit/0138f3c9f91e5e871383b771c287495217f084d8))
9
+
3
10
  ## [0.17.3](https://github.com/viamin/agent-harness/compare/agent-harness/v0.17.2...agent-harness/v0.17.3) (2026-05-06)
4
11
 
5
12
 
@@ -10,6 +10,8 @@ module AgentHarness
10
10
  case provider.to_sym
11
11
  when :anthropic, :claude, :claude_code
12
12
  translate_for_claude(servers)
13
+ when :github_copilot, :copilot
14
+ translate_for_copilot(servers)
13
15
  when :codex
14
16
  translate_for_codex(servers)
15
17
  when :openai
@@ -56,6 +58,31 @@ module AgentHarness
56
58
  }
57
59
  end
58
60
 
61
+ def translate_for_copilot(mcp_servers)
62
+ {
63
+ mcpServers: mcp_servers.each_with_object({}) do |server, memo|
64
+ entry = if server.stdio?
65
+ {
66
+ type: "local",
67
+ command: server.command,
68
+ args: server.args,
69
+ tools: ["*"]
70
+ }
71
+ else
72
+ {
73
+ type: server.transport,
74
+ url: server.url,
75
+ tools: ["*"]
76
+ }
77
+ end
78
+
79
+ entry[:env] = server.env unless server.env.empty?
80
+ entry[:headers] = server.headers if server.http? && !server.headers.empty?
81
+ memo[server.name] = entry
82
+ end
83
+ }
84
+ end
85
+
59
86
  def translate_for_openai(mcp_servers)
60
87
  mcp_servers.map do |server|
61
88
  unless server.http?
@@ -1,20 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "digest"
4
3
  require "json"
5
4
  require "pathname"
5
+ require "securerandom"
6
+ require "tmpdir"
6
7
 
7
8
  module AgentHarness
8
9
  module Providers
9
10
  class GithubCopilot < Base
11
+ include McpConfigFileSupport
10
12
  include TokenUsageParsing
11
13
 
14
+ CLI_PACKAGE = "@github/copilot"
15
+ INSTALL_COMMAND_PREFIX = ["npm", "install", "-g"].freeze
16
+ DEFAULT_MAX_AUTOPILOT_CONTINUES = 50
17
+ LEGACY_BINARY_NAME = "github-copilot-cli"
12
18
  MODEL_PATTERN = /^gpt-[\d.o-]+(?:-turbo)?(?:-mini)?$/i
13
- JSON_OUTPUT_MIN_VERSION = Gem::Version.new("0.0.422").freeze
14
- SUBCOMMAND_CLI_MIN_VERSION = Gem::Version.new("0.1.0").freeze
15
- UNSUPPORTED_SUBCOMMAND_CLI_MESSAGE =
16
- "github-copilot-cli 0.1.x does not expose a non-interactive send interface; " \
17
- "the what-the-shell subcommand is interactive and cannot be used by AgentHarness."
19
+
20
+ GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com"
21
+ CHAT_DEFAULT_MODEL = "gpt-4o"
22
+ CHAT_MODELS = %w[gpt-4o gpt-4o-mini gpt-4-turbo].freeze
18
23
 
19
24
  SMOKE_TEST_CONTRACT = {
20
25
  prompt: "Reply with exactly OK.",
@@ -30,27 +35,41 @@ module AgentHarness
30
35
  end
31
36
 
32
37
  def binary_name
33
- "github-copilot-cli"
38
+ "copilot"
34
39
  end
35
40
 
36
41
  def available?
37
42
  executor = AgentHarness.configuration.command_executor
38
43
  return false unless executor.which(binary_name)
39
44
 
40
- !subcommand_cli_version?(copilot_cli_version(executor: executor))
45
+ true
41
46
  rescue
42
47
  false
43
48
  end
44
49
 
45
50
  def installation_contract(version: nil)
46
- # The published @githubnext/github-copilot-cli package only has
47
- # 0.1.x releases, and those expose an interactive subcommand instead
48
- # of the non-interactive -p prompt path AgentHarness uses.
49
- nil
51
+ normalized_version = normalize_install_version(version)
52
+ package = normalized_version ? "#{CLI_PACKAGE}@#{normalized_version}" : CLI_PACKAGE
53
+ install_command = (INSTALL_COMMAND_PREFIX + [package]).freeze
54
+
55
+ contract = {
56
+ source: :npm,
57
+ package: package,
58
+ package_name: CLI_PACKAGE,
59
+ version: normalized_version,
60
+ binary_name: binary_name,
61
+ install_command_prefix: INSTALL_COMMAND_PREFIX,
62
+ install_command: install_command
63
+ }
64
+
65
+ contract.each_value do |value|
66
+ value.freeze if value.is_a?(String)
67
+ end
68
+ contract.freeze
50
69
  end
51
70
 
52
71
  def install_command(version: nil)
53
- installation_contract(version: version)&.fetch(:install_command)
72
+ installation_contract(version: version)[:install_command]
54
73
  end
55
74
 
56
75
  def provider_metadata_overrides
@@ -120,26 +139,14 @@ module AgentHarness
120
139
 
121
140
  private
122
141
 
123
- def copilot_cli_version(executor:)
124
- result = executor.execute([binary_name, "--version"], timeout: 5, env: {})
125
- extract_version(result)
126
- rescue
127
- nil
128
- end
129
-
130
- def subcommand_cli_version?(version)
131
- !version.nil? && version >= SUBCOMMAND_CLI_MIN_VERSION
132
- end
133
-
134
- def extract_version(result)
135
- return nil unless result.success?
142
+ def normalize_install_version(version)
143
+ return nil if version.nil?
136
144
 
137
- version_string = [result.stdout, result.stderr].compact.join("\n")[/\d+\.\d+\.\d+(?:[-+][A-Za-z0-9.-]+)?/]
138
- return nil if version_string.nil? || version_string.empty?
145
+ unless version.is_a?(String) && !version.strip.empty?
146
+ raise ArgumentError, "Unsupported GitHub Copilot CLI version #{version.inspect}"
147
+ end
139
148
 
140
- Gem::Version.new(version_string)
141
- rescue ArgumentError
142
- nil
149
+ version.strip
143
150
  end
144
151
  end
145
152
 
@@ -174,35 +181,12 @@ module AgentHarness
174
181
  file_upload: false,
175
182
  vision: false,
176
183
  tool_use: true,
177
- json_mode: false,
178
- mcp: false,
184
+ json_mode: true,
185
+ mcp: true,
179
186
  dangerous_mode: true
180
187
  }
181
188
  end
182
189
 
183
- def dangerous_mode_flags(probe_timeout: nil, env: {}, version: nil)
184
- version ||= copilot_cli_version(probe_timeout: probe_timeout, env: env)
185
- return [] if subcommand_cli_version?(version)
186
- return [] unless supports_json_output_format?(version: version)
187
-
188
- ["--allow-all"]
189
- end
190
-
191
- def supports_sessions?(probe_timeout: nil, env: {}, version: :not_provided)
192
- legacy_prompt_cli?(version: version, probe_timeout: probe_timeout, env: env)
193
- end
194
-
195
- def session_flags(session_id, version: :not_provided, probe_timeout: nil, env: {})
196
- return [] unless session_id && !session_id.empty?
197
- return [] unless legacy_prompt_cli?(version: version, probe_timeout: probe_timeout, env: env)
198
-
199
- ["--resume", session_id]
200
- end
201
-
202
- GITHUB_MODELS_BASE_URL = "https://models.inference.ai.azure.com"
203
- CHAT_DEFAULT_MODEL = "gpt-4o"
204
- CHAT_MODELS = %w[gpt-4o gpt-4o-mini gpt-4-turbo].freeze
205
-
206
190
  def supports_chat?
207
191
  true
208
192
  end
@@ -233,19 +217,51 @@ module AgentHarness
233
217
  :openai_compatible
234
218
  end
235
219
 
220
+ def api_key_env_var_names
221
+ ["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"]
222
+ end
223
+
224
+ def api_key_unset_vars
225
+ ["COPILOT_PROVIDER_API_KEY", "COPILOT_PROVIDER_BASE_URL"]
226
+ end
227
+
228
+ def subscription_unset_vars
229
+ api_key_env_var_names + api_key_unset_vars
230
+ end
231
+
236
232
  def auth_type
237
233
  :oauth
238
234
  end
239
235
 
236
+ def dangerous_mode_flags
237
+ ["--yolo"]
238
+ end
239
+
240
+ def supports_mcp?
241
+ true
242
+ end
243
+
244
+ def supported_mcp_transports
245
+ %w[stdio http sse]
246
+ end
247
+
248
+ def build_mcp_flags(mcp_servers, options:)
249
+ return [] if mcp_servers.empty?
250
+
251
+ ["--additional-mcp-config", "@#{mcp_config_plan(options, mcp_servers).fetch(:path)}"]
252
+ end
253
+
254
+ def supports_sessions?
255
+ false
256
+ end
257
+
240
258
  def execution_semantics
241
259
  {
242
260
  prompt_delivery: :arg,
243
- # Older Copilot CLIs fall back to plain-text prompt mode, so metadata
244
- # must not claim JSON-only output even though newer versions support it.
245
- output_format: :text,
261
+ output_format: :json,
246
262
  sandbox_aware: false,
247
263
  uses_subcommand: false,
248
- non_interactive_flag: nil,
264
+ non_interactive_flag: "--autopilot",
249
265
  legitimate_exit_codes: [0],
250
266
  stderr_is_diagnostic: true,
251
267
  parses_rate_limit_reset: false
@@ -255,214 +271,135 @@ module AgentHarness
255
271
  def error_patterns
256
272
  {
257
273
  auth_expired: [
274
+ /not.?logged.?in/i,
258
275
  /not.?authorized/i,
259
- /access.?denied/i,
260
- /permission.?denied/i,
261
- /not.?enabled/i,
262
- /subscription.?required/i
276
+ /authentication/i,
277
+ /token.*invalid/i,
278
+ /copilot requests/i
263
279
  ],
264
280
  rate_limited: [
265
- /usage.?limit/i,
266
- /rate.?limit/i
281
+ /rate.?limit/i,
282
+ /too.?many.?requests/i,
283
+ /\b429\b/
267
284
  ],
268
285
  transient: [
269
286
  /connection.?error/i,
270
287
  /timeout/i,
271
- /try.?again/i
288
+ /try.?again/i,
289
+ /\b502\b/,
290
+ /\b503\b/
272
291
  ],
273
292
  permanent: [
274
- /invalid.?command/i,
275
- /unknown.?flag/i
293
+ /unknown.?flag/i,
294
+ /invalid.?value/i,
295
+ /continuation limit/i,
296
+ /max.?autopilot.?continues/i
276
297
  ]
277
298
  }
278
299
  end
279
300
 
280
301
  def translate_error(message)
281
302
  case message
282
- when /github-copilot-cli.*not found/i then "GitHub Copilot CLI not installed."
283
- else message
303
+ when /copilot.*not found/i, /No such file or directory - copilot/i
304
+ "GitHub Copilot CLI not installed."
305
+ else
306
+ message
284
307
  end
285
308
  end
286
309
 
287
310
  def supports_token_counting?
288
- supports_json_output_format?
311
+ true
289
312
  end
290
313
 
291
314
  def send_message(prompt:, **options)
292
- log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
293
-
294
- options = normalize_provider_runtime(options)
295
- skill_context = resolve_skills(options)
296
- prompt = apply_skills_to_prompt(prompt, skill_context)
297
- options = skill_context[:options]
298
- options = normalize_mcp_servers(options)
299
- validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
300
-
301
- timeout = options[:timeout] || @config.timeout || default_timeout
302
- raise TimeoutError, "Command timed out before execution started" if timeout <= 0
303
-
304
- env = build_env(options)
305
- options = options.merge(_version_probe_timeout: [timeout, 5].min, _command_env: env)
306
-
307
- start_time = Time.now
308
- command = build_command(prompt, options)
309
- preparation = build_execution_preparation(options)
310
- remaining_timeout = timeout - (Time.now - start_time)
311
- raise TimeoutError, "Command timed out before execution started" if remaining_timeout <= 0
312
-
313
- json_output_requested = command.include?("--output-format") && command.include?("json")
314
-
315
- result = execute_with_timeout(
316
- command,
317
- timeout: remaining_timeout,
318
- env: env,
319
- preparation: preparation,
320
- **command_execution_options(options)
321
- )
322
- duration = Time.now - start_time
315
+ super
316
+ ensure
317
+ cleanup_mcp_tempfiles!
318
+ end
323
319
 
324
- response = parse_response(result, duration: duration, json_output_requested: json_output_requested)
320
+ def build_command(prompt, options)
325
321
  runtime = options[:provider_runtime]
326
- effective_runtime_model = normalized_model_name(runtime&.model)
327
- if effective_runtime_model
328
- response = Response.new(
329
- output: response.output,
330
- exit_code: response.exit_code,
331
- duration: response.duration,
332
- provider: response.provider,
333
- model: effective_runtime_model,
334
- tokens: response.tokens,
335
- metadata: response.metadata,
336
- error: response.error
337
- )
322
+ cmd = [
323
+ self.class.binary_name,
324
+ "--autopilot",
325
+ "--max-autopilot-continues",
326
+ max_autopilot_continues(options).to_s,
327
+ "--output-format",
328
+ "json"
329
+ ]
330
+ # Smoke tests must run non-interactively; force full-permission mode
331
+ # so autopilot does not stall on permission prompts.
332
+ cmd += dangerous_mode_flags if (options[:dangerous_mode] || options[:smoke_test]) && supports_dangerous_mode?
333
+
334
+ if options[:mcp_servers]&.any?
335
+ cmd += build_mcp_flags(options[:mcp_servers], options: options)
338
336
  end
339
337
 
340
- track_tokens(response) if response.tokens
338
+ cmd += @config.default_flags if @config.default_flags&.any?
339
+
340
+ model = effective_model_name(runtime)
341
+ cmd += ["--model", model] if model
341
342
 
342
- log_debug("send_message_complete", duration: duration, tokens: response.tokens)
343
+ if runtime
344
+ runtime_flags = runtime.flags
345
+ cmd += runtime_flags unless runtime_flags.empty?
346
+ end
343
347
 
344
- response
345
- rescue ConfigurationError, McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
346
- raise
347
- rescue => e
348
- handle_error(e, prompt: prompt, options: options)
348
+ cmd += test_command_overrides if options[:smoke_test]
349
+ cmd += ["-p", prompt]
350
+ cmd
349
351
  end
350
352
 
351
- def plan_execution(prompt:, **options)
352
- log_debug("plan_execution_start", prompt_length: prompt.length, options: options.keys)
353
+ def build_env(options)
354
+ env = super
355
+ needs_full_permissions = options[:dangerous_mode] || options[:smoke_test]
356
+ return env unless needs_full_permissions && supports_dangerous_mode?
353
357
 
354
- options = normalize_provider_runtime(options)
355
- skill_context = resolve_skills(options)
356
- prompt = apply_skills_to_prompt(prompt, skill_context)
357
- options = skill_context[:options]
358
- options = normalize_mcp_servers(options)
359
- validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
358
+ env.merge("COPILOT_ALLOW_ALL" => "true")
359
+ end
360
360
 
361
- env = build_env(options)
362
- version = planned_copilot_cli_version(env)
363
- raise unsupported_subcommand_cli_error if subcommand_cli_version?(version)
361
+ def build_execution_preparation(options)
362
+ return nil unless options[:mcp_servers]&.any?
364
363
 
365
- options = options.merge(_command_env: env, _planned_cli_version: version)
364
+ plan = mcp_config_plan(options, options[:mcp_servers])
365
+ ExecutionPreparation.new(
366
+ file_writes: [
367
+ {
368
+ path: plan.fetch(:path),
369
+ content: plan.fetch(:content),
370
+ mode: 0o600
371
+ }
372
+ ]
373
+ )
374
+ end
366
375
 
367
- {
368
- command: build_command(prompt, options),
369
- env: env,
370
- preparation: build_execution_preparation(options)
371
- }
372
- rescue ConfigurationError, McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
373
- raise
374
- rescue => e
375
- handle_error(e, prompt: prompt, options: options)
376
- end
377
-
378
- # Parse raw container output into a Response.
379
- #
380
- # Overrides the base implementation to support the
381
- # +json_output_requested+ option, which controls whether JSONL
382
- # output is parsed for token extraction.
383
- #
384
- # @param stdout [String] captured standard output
385
- # @param stderr [String] captured standard error
386
- # @param exit_code [Integer] process exit code
387
- # @param duration [Float] execution duration in seconds
388
- # @param options [Hash] additional options
389
- # @option options [Boolean] :json_output_requested whether to parse JSONL output
390
- # @return [Response] parsed response
391
- def parse_container_output(stdout:, stderr: "", exit_code: 0, duration: 0.0, **options)
376
+ def parse_container_output(stdout:, stderr: "", exit_code: 0, duration: 0.0, **_options)
392
377
  result = CommandExecutor::Result.new(
393
378
  stdout: stdout,
394
379
  stderr: stderr,
395
380
  exit_code: exit_code,
396
381
  duration: duration
397
382
  )
398
- parse_response(
399
- result,
400
- duration: duration,
401
- json_output_requested: options.fetch(:json_output_requested, false)
402
- )
383
+ parse_response(result, duration: duration)
403
384
  end
404
385
 
405
386
  protected
406
387
 
407
- def build_command(prompt, options)
408
- env = options.fetch(:_command_env) { build_env(options) }
409
- runtime = options[:provider_runtime]
410
- version = if options.key?(:_planned_cli_version)
411
- options[:_planned_cli_version]
412
- else
413
- copilot_cli_version(
414
- probe_timeout: options[:_version_probe_timeout],
415
- env: env
416
- )
417
- end
418
-
419
- raise unsupported_subcommand_cli_error if subcommand_cli_version?(version)
420
-
421
- cmd = [self.class.binary_name, "-p", prompt]
422
-
423
- if supports_json_output_format?(version: version)
424
- cmd += ["--output-format", "json"]
425
- else
426
- # Silent mode suppresses the model/stats decoration older CLIs print in
427
- # prompt mode, which keeps smoke-test output stable on the plain-text path.
428
- cmd << "-s"
429
- end
430
-
431
- model = effective_model_name(runtime)
432
- cmd += ["--model", model] if model
433
- if options[:dangerous_mode] && supports_dangerous_mode?
434
- cmd += programmatic_tool_approval_flags
435
- cmd += dangerous_mode_flags(version: version)
436
- end
437
-
438
- if options[:session] && !options[:session].empty?
439
- cmd += session_flags(options[:session], version: version)
440
- end
441
-
442
- cmd
443
- end
444
-
445
- def parse_response(result, duration:, json_output_requested: false)
446
- response = super(result, duration: duration)
447
- output = response.output
448
- tokens = nil
449
-
450
- parsed_lines = if json_output_requested && response.error.nil?
451
- parse_jsonl_output(output)
452
- end
453
- if parsed_lines
454
- output = extract_text_from_jsonl(parsed_lines) || output
455
- tokens = extract_tokens_from_jsonl(parsed_lines)
456
- end
388
+ def parse_response(result, duration:)
389
+ response = super
390
+ parsed_lines = parse_jsonl_output(response.output)
391
+ output = extract_text_from_jsonl(parsed_lines) || response.output
392
+ tokens = extract_tokens_from_jsonl(parsed_lines)
393
+ metadata = extract_metadata_from_jsonl(parsed_lines).merge(response.metadata)
457
394
 
458
395
  Response.new(
459
396
  output: output,
460
397
  exit_code: result.exit_code,
461
398
  duration: duration,
462
399
  provider: self.class.provider_name,
463
- model: effective_model_name,
400
+ model: normalized_model_name(metadata[:model]) || effective_model_name,
464
401
  tokens: tokens,
465
- metadata: response.metadata,
402
+ metadata: metadata,
466
403
  error: response.error
467
404
  )
468
405
  end
@@ -473,219 +410,74 @@ module AgentHarness
473
410
 
474
411
  private
475
412
 
476
- def programmatic_tool_approval_flags
477
- ["--allow-all-tools"]
478
- end
479
-
480
- def supports_json_output_format?(probe_timeout: nil, env: {}, version: :not_provided)
481
- version = copilot_cli_version(probe_timeout: probe_timeout, env: env) if version == :not_provided
482
- !version.nil? && !subcommand_cli_version?(version) && version >= JSON_OUTPUT_MIN_VERSION
483
- end
484
-
485
- def legacy_prompt_cli?(probe_timeout: nil, env: {}, version: :not_provided)
486
- version = copilot_cli_version(probe_timeout: probe_timeout, env: env) if version == :not_provided
487
- !version.nil? && !subcommand_cli_version?(version)
488
- end
489
-
490
- def subcommand_cli_version?(version)
491
- self.class.send(:subcommand_cli_version?, version)
492
- end
493
-
494
- def unsupported_subcommand_cli_error
495
- ProviderError.new(UNSUPPORTED_SUBCOMMAND_CLI_MESSAGE)
496
- end
497
-
498
- def copilot_cli_version(probe_timeout: nil, env: {})
499
- return nil if env.empty? && !copilot_cli_binary_available?
500
-
501
- cache_key = version_probe_cache_key(env)
502
- @copilot_cli_versions ||= {}
503
- return @copilot_cli_versions[cache_key] if @copilot_cli_versions.key?(cache_key)
504
-
505
- result = @executor.execute([self.class.binary_name, "--version"], timeout: probe_timeout || 5, env: env)
506
- version = extract_version(result)
507
- @copilot_cli_versions[cache_key] = version
508
- version
509
- rescue => e
510
- log_debug("copilot_cli_version_check_failed", error: e.message)
511
- @copilot_cli_versions ||= {}
512
- @copilot_cli_versions[cache_key] = nil if defined?(cache_key)
513
- end
514
-
515
- def planned_copilot_cli_version(env)
516
- cache_key = version_probe_cache_key(env)
517
- @copilot_cli_versions ||= {}
518
- return @copilot_cli_versions[cache_key] if @copilot_cli_versions.key?(cache_key)
519
-
520
- # When no cached version is available (cold start), return nil so
521
- # build_command falls back to the conservative -s flag path, matching
522
- # the behavior of send_message when the version probe returns nil.
523
- nil
524
- end
525
-
526
- def version_probe_cache_key(env)
527
- [
528
- probe_env_cache_component(env, "PATH", inherited_label: :inherited_path, override_label: :path_override),
529
- probe_env_cache_component(env, "PATHEXT", inherited_label: :inherited_pathext, override_label: :pathext_override)
530
- ]
531
- end
532
-
533
- def probe_env_cache_component(env, key, inherited_label:, override_label:)
534
- label, value = if env_override_present?(env, key)
535
- [override_label, env_override_value(env, key)]
536
- else
537
- [inherited_label, ENV[key]]
538
- end
539
- return [label, :unset] if value.nil?
540
-
541
- [label, Digest::SHA256.hexdigest(value)]
542
- end
543
-
544
- def env_override_present?(env, key)
545
- env.key?(key) || env.key?(key.to_sym)
546
- end
547
-
548
- def env_override_value(env, key)
549
- return env[key] if env.key?(key)
550
-
551
- env[key.to_sym]
552
- end
553
-
554
- def copilot_cli_binary_available?
555
- @executor.which(self.class.binary_name)
556
- rescue => e
557
- log_debug("copilot_cli_binary_check_failed", error: e.message)
558
- nil
559
- end
560
-
561
- def extract_version(result)
562
- self.class.send(:extract_version, result)
413
+ def max_autopilot_continues(options)
414
+ runtime = options[:provider_runtime]
415
+ candidate = runtime&.metadata&.[](:max_autopilot_continues) ||
416
+ runtime&.metadata&.[]("max_autopilot_continues") ||
417
+ options[:max_autopilot_continues]
418
+ value = Integer(candidate, exception: false)
419
+ (value && value > 0) ? value : DEFAULT_MAX_AUTOPILOT_CONTINUES
563
420
  end
564
421
 
565
422
  def parse_jsonl_output(output)
566
- return nil if output.nil? || output.strip.empty?
423
+ return [] if output.nil? || output.strip.empty?
567
424
 
568
- parsed = output.each_line(chomp: true).filter_map do |line|
425
+ output.each_line(chomp: true).filter_map do |line|
569
426
  next if line.strip.empty?
570
427
 
571
428
  JSON.parse(line)
572
429
  rescue JSON::ParserError
573
430
  next
574
431
  end
575
-
576
- parsed.empty? ? nil : parsed
577
432
  end
578
433
 
579
- def extract_text_from_jsonl(parsed_lines)
580
- output = +""
581
- saw_text = false
582
- saw_delta = false
583
-
434
+ def extract_metadata_from_jsonl(parsed_lines)
435
+ metadata = {}
584
436
  parsed_lines.each do |obj|
585
437
  next unless obj.is_a?(Hash)
586
- next unless assistant_output_event?(obj)
587
438
 
588
- full_text = extract_non_delta_text(obj)
589
- if full_text
590
- output = if replace_output_with_full_text?(
591
- output,
592
- full_text,
593
- saw_delta: saw_delta,
594
- authoritative_snapshot: authoritative_full_snapshot?(obj)
595
- )
596
- full_text.dup
597
- else
598
- output + full_text
599
- end
600
- saw_text = true
601
- saw_delta = false
602
- end
603
-
604
- delta_text = extract_delta_text(obj)
605
- next unless delta_text
606
-
607
- output << delta_text
608
- saw_text = true
609
- saw_delta = true
610
- end
611
-
612
- saw_text ? output : nil
613
- end
614
-
615
- def replace_output_with_full_text?(existing_output, full_text, saw_delta:, authoritative_snapshot:)
616
- saw_delta ||
617
- authoritative_snapshot_replacement?(existing_output, full_text, authoritative_snapshot: authoritative_snapshot) ||
618
- (!existing_output.empty? && (
619
- full_text.start_with?(existing_output) ||
620
- existing_output.start_with?(full_text)
621
- ))
622
- end
623
-
624
- def authoritative_snapshot_replacement?(existing_output, full_text, authoritative_snapshot:)
625
- authoritative_snapshot &&
626
- !existing_output.empty? &&
627
- (
628
- existing_output.length == full_text.length ||
629
- full_text.start_with?(existing_output) ||
630
- existing_output.start_with?(full_text) ||
631
- longest_common_substring_length(existing_output, full_text) >= [[existing_output.length, full_text.length].min / 2, 1].max
439
+ model = normalized_model_name(
440
+ obj["model"] ||
441
+ nested_hash_value(obj, "message", "model") ||
442
+ nested_hash_value(obj, "data", "model") ||
443
+ nested_hash_value(obj, "data", "message", "model")
632
444
  )
445
+ metadata[:model] = model if model
446
+ end
447
+ metadata
633
448
  end
634
449
 
635
- def longest_common_substring_length(left, right)
636
- return 0 if left.empty? || right.empty?
450
+ def extract_text_from_jsonl(parsed_lines)
451
+ return nil if parsed_lines.empty?
637
452
 
638
- longest = 0
639
- row = Array.new(right.length + 1, 0)
453
+ # Track snapshots and deltas with their position so we can merge
454
+ # a final snapshot with any deltas that follow it.
455
+ last_snapshot = nil
456
+ last_snapshot_index = -1
457
+ deltas = []
640
458
 
641
- left.each_char do |left_char|
642
- previous = 0
459
+ parsed_lines.each_with_index do |obj, index|
460
+ next unless assistant_output_event?(obj)
643
461
 
644
- right.each_char.with_index(1) do |right_char, index|
645
- current = row[index]
646
- row[index] = if left_char == right_char
647
- previous + 1
648
- else
649
- 0
650
- end
651
- longest = [longest, row[index]].max
652
- previous = current
462
+ snapshot = extract_non_delta_text(obj)
463
+ if snapshot && !snapshot.empty?
464
+ last_snapshot = snapshot
465
+ last_snapshot_index = index
653
466
  end
654
- end
655
-
656
- longest
657
- end
658
467
 
659
- def authoritative_full_snapshot?(obj)
660
- obj["type"].to_s.match?(/\A(?:assistant\.message|turn\.)/) ||
661
- obj["message"].is_a?(Hash) ||
662
- nested_hash_value(obj, "data", "message").is_a?(Hash)
663
- end
664
-
665
- def assistant_output_event?(obj)
666
- type = obj["type"]
667
- return true if type.nil? && !role_key_present?(obj)
468
+ delta = extract_delta_text(obj)
469
+ deltas << [index, delta] if delta && !delta.empty?
470
+ end
668
471
 
669
- role = extract_event_role(obj)
670
- return true if role.nil? && type.to_s.match?(/\A(?:assistant\.|turn\.)/)
472
+ if last_snapshot
473
+ # Append any delta events that arrived after the last snapshot
474
+ trailing = deltas.select { |i, _| i > last_snapshot_index }.map(&:last)
475
+ return trailing.any? ? last_snapshot + trailing.join : last_snapshot
476
+ end
671
477
 
672
- role == "assistant"
673
- end
478
+ return deltas.map(&:last).join if deltas.any?
674
479
 
675
- def role_key_present?(obj)
676
- obj.key?("role") ||
677
- hash_key_present?(obj["data"], "role") ||
678
- hash_key_present?(obj["message"], "role") ||
679
- hash_key_present?(nested_hash_value(obj, "data", "message"), "role")
680
- end
681
-
682
- def extract_event_role(obj)
683
- [
684
- obj["role"],
685
- nested_hash_value(obj, "data", "role"),
686
- nested_hash_value(obj, "message", "role"),
687
- nested_hash_value(obj, "data", "message", "role")
688
- ].compact.first&.to_s
480
+ nil
689
481
  end
690
482
 
691
483
  def extract_tokens_from_jsonl(parsed_lines)
@@ -717,6 +509,39 @@ module AgentHarness
717
509
  {input: input, output: output, total: input + output}
718
510
  end
719
511
 
512
+ def find_usages(obj)
513
+ return [] unless obj.is_a?(Hash)
514
+
515
+ direct_usage = select_best_usage_payload([
516
+ obj["usage"],
517
+ obj["tokens"],
518
+ usage_payload?(obj) ? obj : nil,
519
+ usage_payload?(obj["data"]) ? obj["data"] : nil,
520
+ usage_payload?(obj["message"]) ? obj["message"] : nil,
521
+ usage_payload?(nested_hash_value(obj, "data", "message")) ? nested_hash_value(obj, "data", "message") : nil,
522
+ nested_hash_value(obj, "data", "usage"),
523
+ nested_hash_value(obj, "data", "tokens"),
524
+ nested_hash_value(obj, "message", "usage"),
525
+ nested_hash_value(obj, "message", "tokens"),
526
+ nested_hash_value(obj, "data", "message", "usage"),
527
+ nested_hash_value(obj, "data", "message", "tokens")
528
+ ])
529
+ metrics_usages =
530
+ model_metrics_usages(obj["modelMetrics"]) +
531
+ model_metrics_usages(obj["model_metrics"]) +
532
+ model_metrics_usages(nested_hash_value(obj, "data", "modelMetrics")) +
533
+ model_metrics_usages(nested_hash_value(obj, "data", "model_metrics")) +
534
+ model_metrics_usages(nested_hash_value(obj, "message", "modelMetrics")) +
535
+ model_metrics_usages(nested_hash_value(obj, "message", "model_metrics")) +
536
+ model_metrics_usages(nested_hash_value(obj, "data", "message", "modelMetrics")) +
537
+ model_metrics_usages(nested_hash_value(obj, "data", "message", "model_metrics"))
538
+
539
+ return metrics_usages if prefer_usage_set?(aggregate_usage_payload(metrics_usages), direct_usage)
540
+ return [direct_usage] if direct_usage
541
+
542
+ metrics_usages
543
+ end
544
+
720
545
  def aggregate_token_totals(usages)
721
546
  total_input = 0
722
547
  total_output = 0
@@ -724,11 +549,11 @@ module AgentHarness
724
549
 
725
550
  usages.each do |usage|
726
551
  input = token_count_for(usage, "input_tokens", "prompt_tokens", "inputTokens", "promptTokens")
727
- output_tok = token_count_for(usage, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
728
- next if input.nil? && output_tok.nil?
552
+ output = token_count_for(usage, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
553
+ next if input.nil? && output.nil?
729
554
 
730
555
  total_input += input || 0
731
- total_output += output_tok || 0
556
+ total_output += output || 0
732
557
  found = true
733
558
  end
734
559
 
@@ -778,44 +603,10 @@ module AgentHarness
778
603
  nested_hash_value(obj, "data", "message", "model_metrics").is_a?(Hash)
779
604
  end
780
605
 
781
- def find_usages(obj)
782
- return [] unless obj.is_a?(Hash)
783
-
784
- direct_usage = select_best_usage_payload([
785
- obj["usage"],
786
- obj["tokens"],
787
- usage_payload?(obj) ? obj : nil,
788
- usage_payload?(obj["data"]) ? obj["data"] : nil,
789
- usage_payload?(obj["message"]) ? obj["message"] : nil,
790
- usage_payload?(nested_hash_value(obj, "data", "message")) ? nested_hash_value(obj, "data", "message") : nil,
791
- nested_hash_value(obj, "data", "usage"),
792
- nested_hash_value(obj, "data", "tokens"),
793
- nested_hash_value(obj, "message", "usage"),
794
- nested_hash_value(obj, "message", "tokens"),
795
- nested_hash_value(obj, "data", "message", "usage"),
796
- nested_hash_value(obj, "data", "message", "tokens")
797
- ])
798
- metrics_usages =
799
- model_metrics_usages(obj["modelMetrics"]) +
800
- model_metrics_usages(obj["model_metrics"]) +
801
- model_metrics_usages(nested_hash_value(obj, "data", "modelMetrics")) +
802
- model_metrics_usages(nested_hash_value(obj, "data", "model_metrics")) +
803
- model_metrics_usages(nested_hash_value(obj, "message", "modelMetrics")) +
804
- model_metrics_usages(nested_hash_value(obj, "message", "model_metrics")) +
805
- model_metrics_usages(nested_hash_value(obj, "data", "message", "modelMetrics")) +
806
- model_metrics_usages(nested_hash_value(obj, "data", "message", "model_metrics"))
807
-
808
- return metrics_usages if prefer_usage_set?(aggregate_usage_payload(metrics_usages), direct_usage)
809
- return [direct_usage] if direct_usage
810
-
811
- metrics_usages
812
- end
813
-
814
606
  MAX_METRICS_DEPTH = 5
815
607
 
816
608
  def model_metrics_usages(metrics, depth: 0)
817
609
  return [] unless metrics.is_a?(Hash)
818
-
819
610
  return [metrics] if usage_with_token_counts?(metrics)
820
611
 
821
612
  direct_usage = [
@@ -825,7 +616,6 @@ module AgentHarness
825
616
  metrics["aggregate"]
826
617
  ].find { |value| usage_with_token_counts?(value) }
827
618
  return [direct_usage] if direct_usage
828
-
829
619
  return [] if depth >= MAX_METRICS_DEPTH
830
620
 
831
621
  metrics.each_value.flat_map { |value| model_metrics_usages(value, depth: depth + 1) }
@@ -854,24 +644,16 @@ module AgentHarness
854
644
  ) == 1
855
645
  end
856
646
 
857
- def extract_text_value(value)
858
- case value
859
- when String
860
- value
861
- when Array
862
- parts = value.filter_map { |part| extract_text_value(part) }
863
- parts.empty? ? nil : parts.join
864
- when Hash
865
- extract_text_value(value["text"]) ||
866
- extract_text_value(value["content"]) ||
867
- extract_text_value(value["parts"]) ||
868
- extract_text_value(value["result"]) ||
869
- extract_text_value(value["deltaContent"]) ||
870
- extract_text_value(value["delta_content"]) ||
871
- extract_text_value(value["delta"]) ||
872
- extract_text_value(value["message"]) ||
873
- extract_text_value(value["data"])
874
- end
647
+ def assistant_output_event?(obj)
648
+ return false unless obj.is_a?(Hash)
649
+
650
+ type = obj["type"]
651
+ return true if type.nil? && !role_key_present?(obj)
652
+
653
+ role = extract_event_role(obj)
654
+ return true if role.nil? && type.to_s.match?(/\A(?:assistant\.|turn\.)/)
655
+
656
+ role == "assistant"
875
657
  end
876
658
 
877
659
  def extract_non_delta_text(obj)
@@ -908,20 +690,56 @@ module AgentHarness
908
690
  extract_text_value(nested_hash_value(obj, "data", "message", "delta"))
909
691
  end
910
692
 
693
+ def extract_text_value(value)
694
+ case value
695
+ when String
696
+ value
697
+ when Array
698
+ parts = value.filter_map { |part| extract_text_value(part) }
699
+ parts.empty? ? nil : parts.join
700
+ when Hash
701
+ extract_text_value(value["text"]) ||
702
+ extract_text_value(value["content"]) ||
703
+ extract_text_value(value["parts"]) ||
704
+ extract_text_value(value["result"]) ||
705
+ extract_text_value(value["deltaContent"]) ||
706
+ extract_text_value(value["delta_content"]) ||
707
+ extract_text_value(value["delta"]) ||
708
+ extract_text_value(value["message"]) ||
709
+ extract_text_value(value["data"])
710
+ end
711
+ end
712
+
911
713
  def usage_payload?(value)
912
714
  value.is_a?(Hash) && token_count_keys.any? { |key| value.key?(key) }
913
715
  end
914
716
 
717
+ def role_key_present?(obj)
718
+ obj.key?("role") ||
719
+ hash_key_present?(obj["data"], "role") ||
720
+ hash_key_present?(obj["message"], "role") ||
721
+ hash_key_present?(nested_hash_value(obj, "data", "message"), "role")
722
+ end
723
+
724
+ def extract_event_role(obj)
725
+ [
726
+ obj["role"],
727
+ nested_hash_value(obj, "data", "role"),
728
+ nested_hash_value(obj, "message", "role"),
729
+ nested_hash_value(obj, "data", "message", "role")
730
+ ].compact.first&.to_s
731
+ end
732
+
915
733
  def hash_key_present?(value, key)
916
734
  value.is_a?(Hash) && value.key?(key)
917
735
  end
918
736
 
919
737
  def resolve_chat_api_key
920
- key = ENV["GITHUB_TOKEN"] || ENV["GH_TOKEN"] || read_copilot_cli_access_token
738
+ key = ENV["COPILOT_GITHUB_TOKEN"] || ENV["GH_TOKEN"] || ENV["GITHUB_TOKEN"] || read_copilot_cli_access_token
921
739
 
922
740
  if key.nil? || key.strip.empty?
923
741
  raise AuthenticationError.new(
924
- "Chat mode requires a GitHub token. Set GITHUB_TOKEN or GH_TOKEN, or authenticate the Copilot CLI.",
742
+ "Chat mode requires a GitHub token. Set COPILOT_GITHUB_TOKEN, GH_TOKEN, or GITHUB_TOKEN, or authenticate the Copilot CLI.",
925
743
  provider: :github_copilot
926
744
  )
927
745
  end
@@ -930,6 +748,9 @@ module AgentHarness
930
748
  end
931
749
 
932
750
  def read_copilot_cli_access_token
751
+ token = read_token_from_copilot_config
752
+ return token if token
753
+
933
754
  path = Pathname.new(File.join(Dir.home, ".copilot-cli-access-token"))
934
755
  return nil unless path.file?
935
756
 
@@ -937,6 +758,38 @@ module AgentHarness
937
758
  rescue Errno::ENOENT, Errno::EACCES, IOError
938
759
  nil
939
760
  end
761
+
762
+ def read_token_from_copilot_config
763
+ config_home = ENV["COPILOT_HOME"]
764
+ base_dir = if config_home && !config_home.strip.empty?
765
+ config_home
766
+ else
767
+ File.join(Dir.home, ".copilot")
768
+ end
769
+ path = Pathname.new(File.join(base_dir, "config.json"))
770
+ return nil unless path.file?
771
+
772
+ config = JSON.parse(path.read)
773
+ normalized_model_name(
774
+ config["oauth_token"] ||
775
+ config["oauthToken"] ||
776
+ config["token"] ||
777
+ nested_hash_value(config, "auth", "token")
778
+ )
779
+ rescue JSON::ParserError, Errno::ENOENT, Errno::EACCES, IOError
780
+ nil
781
+ end
782
+
783
+ def mcp_provider_key
784
+ :github_copilot
785
+ end
786
+
787
+ def mcp_config_plan(options, mcp_servers)
788
+ options[:_github_copilot_mcp_config] ||= {
789
+ path: File.join(Dir.tmpdir, "agent_harness_copilot_mcp_#{SecureRandom.hex(8)}.json"),
790
+ content: JSON.generate(McpConfigTranslator.for_provider(mcp_provider_key, mcp_servers))
791
+ }
792
+ end
940
793
  end
941
794
  end
942
795
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AgentHarness
4
- VERSION = "0.17.3"
4
+ VERSION = "0.18.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: agent-harness
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.3
4
+ version: 0.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bart Agapinan