agent-harness 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,25 +5,15 @@ require "json"
5
5
 
6
6
  module AgentHarness
7
7
  module Providers
8
- # GitHub Copilot CLI provider
9
- #
10
- # Provides integration with the GitHub Copilot CLI tool.
11
8
  class GithubCopilot < Base
12
- MIN_JSON_OUTPUT_VERSION = Gem::Version.new("0.0.422").freeze
13
- REQUEST_PROBE_ENV_STACK_KEY = :agent_harness_github_copilot_request_probe_env_stack
9
+ include TokenUsageParsing
14
10
 
15
- # Model name pattern for GitHub Copilot (uses OpenAI models)
16
11
  MODEL_PATTERN = /^gpt-[\d.o-]+(?:-turbo)?(?:-mini)?$/i
12
+ JSON_OUTPUT_MIN_VERSION = Gem::Version.new("0.0.422").freeze
17
13
 
18
- # Copilot-specific smoke test contract. The `what-the-shell` subcommand
19
- # translates natural language into shell commands, so the generic
20
- # "Reply with exactly OK." prompt would produce something like
21
- # `echo "OK"` rather than the literal text "OK". We use a prompt that
22
- # is meaningful for the shell-translation path and only require
23
- # non-empty output (no exact match).
24
14
  SMOKE_TEST_CONTRACT = {
25
- prompt: "list files in the current directory",
26
- expected_output: nil,
15
+ prompt: "Reply with exactly OK.",
16
+ expected_output: "OK",
27
17
  timeout: 30,
28
18
  require_output: true,
29
19
  success_message: "Smoke test passed"
@@ -115,7 +105,16 @@ module AgentHarness
115
105
 
116
106
  def configuration_schema
117
107
  {
118
- fields: [],
108
+ fields: [
109
+ {
110
+ name: :model,
111
+ type: :string,
112
+ label: "Model",
113
+ required: false,
114
+ hint: "Copilot model identifier (for example gpt-4o or gpt-4o-mini)",
115
+ accepts_arbitrary: true
116
+ }
117
+ ],
119
118
  auth_modes: [:oauth],
120
119
  openai_compatible: false
121
120
  }
@@ -133,8 +132,10 @@ module AgentHarness
133
132
  }
134
133
  end
135
134
 
136
- def dangerous_mode_flags
137
- ["--allow-all-tools"]
135
+ def dangerous_mode_flags(probe_timeout: nil, env: {})
136
+ return [] unless supports_json_output_format?(probe_timeout: probe_timeout, env: env)
137
+
138
+ ["--allow-all"]
138
139
  end
139
140
 
140
141
  def supports_sessions?
@@ -150,19 +151,15 @@ module AgentHarness
150
151
  :oauth
151
152
  end
152
153
 
153
- def send_message(prompt:, **options)
154
- with_request_probe_env(request_probe_env_from_raw_runtime(options[:provider_runtime])) do
155
- super(prompt: prompt, **options)
156
- end
157
- end
158
-
159
154
  def execution_semantics
160
155
  {
161
156
  prompt_delivery: :arg,
162
- output_format: copilot_cli_supports_json_output? ? :json : :text,
157
+ # Older Copilot CLIs fall back to plain-text prompt mode, so metadata
158
+ # must not claim JSON-only output even though newer versions support it.
159
+ output_format: :text,
163
160
  sandbox_aware: false,
164
- uses_subcommand: true,
165
- non_interactive_flag: nil,
161
+ uses_subcommand: false,
162
+ non_interactive_flag: "-p",
166
163
  legitimate_exit_codes: [0],
167
164
  stderr_is_diagnostic: true,
168
165
  parses_rate_limit_reset: false
@@ -194,607 +191,551 @@ module AgentHarness
194
191
  }
195
192
  end
196
193
 
197
- protected
194
+ def supports_token_counting?
195
+ supports_json_output_format?
196
+ end
198
197
 
199
- def build_command(prompt, options)
200
- cmd = [self.class.binary_name, "what-the-shell", prompt]
201
- cmd += ["--output-format", "json"] if copilot_cli_supports_json_output?
198
+ def send_message(prompt:, **options)
199
+ log_debug("send_message_start", prompt_length: prompt.length, options: options.keys)
202
200
 
203
- # Opt in to unrestricted tool access explicitly to preserve a safe default.
204
- if supports_dangerous_mode? && options[:dangerous_mode]
205
- cmd += dangerous_mode_flags
206
- end
201
+ options = normalize_provider_runtime(options)
202
+ options = normalize_mcp_servers(options)
203
+ validate_mcp_servers!(options[:mcp_servers]) if options[:mcp_servers]&.any?
207
204
 
208
- # Add session support if provided
209
- if options[:session] && !options[:session].empty?
210
- cmd += session_flags(options[:session])
205
+ timeout = options[:timeout] || @config.timeout || default_timeout
206
+ raise TimeoutError, "Command timed out before execution started" if timeout <= 0
207
+
208
+ env = build_env(options)
209
+ options = options.merge(_version_probe_timeout: [timeout, 5].min, _command_env: env)
210
+
211
+ start_time = Time.now
212
+ command = build_command(prompt, options)
213
+ preparation = build_execution_preparation(options)
214
+ remaining_timeout = timeout - (Time.now - start_time)
215
+ raise TimeoutError, "Command timed out before execution started" if remaining_timeout <= 0
216
+
217
+ json_output_requested = command.include?("--output-format") && command.include?("json")
218
+
219
+ result = execute_with_timeout(
220
+ command,
221
+ timeout: remaining_timeout,
222
+ env: env,
223
+ preparation: preparation,
224
+ **command_execution_options(options)
225
+ )
226
+ duration = Time.now - start_time
227
+
228
+ response = parse_response(result, duration: duration, json_output_requested: json_output_requested)
229
+ runtime = options[:provider_runtime]
230
+ effective_runtime_model = normalized_model_name(runtime&.model)
231
+ if effective_runtime_model
232
+ response = Response.new(
233
+ output: response.output,
234
+ exit_code: response.exit_code,
235
+ duration: response.duration,
236
+ provider: response.provider,
237
+ model: effective_runtime_model,
238
+ tokens: response.tokens,
239
+ metadata: response.metadata,
240
+ error: response.error
241
+ )
211
242
  end
212
243
 
213
- cmd
214
- end
244
+ track_tokens(response) if response.tokens
215
245
 
216
- def default_timeout
217
- 300
246
+ log_debug("send_message_complete", duration: duration, tokens: response.tokens)
247
+
248
+ response
249
+ rescue McpConfigurationError, McpUnsupportedError, McpTransportUnsupportedError
250
+ raise
251
+ rescue => e
252
+ handle_error(e, prompt: prompt, options: options)
218
253
  end
219
254
 
220
- def parse_response(result, duration:)
221
- return super unless copilot_cli_supports_json_output?
255
+ protected
222
256
 
223
- output = result.stdout.to_s
224
- error = nil
257
+ def build_command(prompt, options)
258
+ cmd = [self.class.binary_name, "-p", prompt]
259
+ env = options.fetch(:_command_env) { build_env(options) }
260
+ runtime = options[:provider_runtime]
225
261
 
226
- legitimate = execution_semantics[:legitimate_exit_codes] || [0]
227
- unless legitimate.include?(result.exit_code)
228
- combined = [result.stderr.to_s, output].map(&:strip).reject(&:empty?).join("\n")
229
- error = combined unless combined.empty?
262
+ if supports_json_output_format?(probe_timeout: options[:_version_probe_timeout], env: env)
263
+ cmd += ["--output-format", "json"]
264
+ else
265
+ # Silent mode suppresses the model/stats decoration older CLIs print in
266
+ # prompt mode, which keeps smoke-test output stable on the plain-text path.
267
+ cmd << "-s"
230
268
  end
231
269
 
232
- structured_json_seen = false
233
- shutdown_tokens = empty_token_totals
234
- usage_tokens = empty_token_totals
235
- fallback_tokens = empty_token_totals
236
- output_segments = []
237
- authoritative_reply_seen = false
238
- output.lines.each do |line|
239
- stripped_line = line.strip
240
- if stripped_line.empty?
241
- output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
242
- next
243
- end
244
- begin
245
- obj = JSON.parse(stripped_line)
246
- rescue JSON::ParserError
247
- output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
248
- next
249
- end
250
-
251
- structured_json_seen ||= obj.is_a?(Hash)
270
+ model = effective_model_name(runtime)
271
+ cmd += ["--model", model] if model
272
+ if options[:dangerous_mode] && supports_dangerous_mode?
273
+ cmd += programmatic_tool_approval_flags
274
+ cmd += dangerous_mode_flags(probe_timeout: options[:_version_probe_timeout], env: env)
275
+ end
252
276
 
253
- text, text_kind = extract_event_text(obj)
254
- if text
255
- if text_kind == :assistant_delta
256
- next if authoritative_reply_seen
277
+ if options[:session] && !options[:session].empty?
278
+ cmd += session_flags(options[:session])
279
+ end
257
280
 
258
- append_delta_segment!(output_segments, text, terminated: line.end_with?("\n"))
259
- elsif !text.empty?
260
- replace_assistant_segments!(output_segments, text, terminated: line.end_with?("\n"))
261
- authoritative_reply_seen = true
262
- end
263
- elsif preserve_raw_json_line?(obj) || !obj.is_a?(Hash)
264
- output_segments << {kind: :raw, content: line, terminated: line.end_with?("\n")}
265
- end
281
+ cmd
282
+ end
266
283
 
267
- token_usage = extract_token_usage(obj)
268
- next unless token_usage
284
+ def parse_response(result, duration:, json_output_requested: false)
285
+ response = super(result, duration: duration)
286
+ output = response.output
287
+ tokens = nil
269
288
 
270
- if token_usage[:source] == :shutdown
271
- accumulate_token_totals!(shutdown_tokens, token_usage)
272
- elsif token_usage[:source] == :usage
273
- accumulate_token_totals!(usage_tokens, token_usage)
274
- else
275
- accumulate_token_totals!(fallback_tokens, token_usage)
276
- end
289
+ parsed_lines = if json_output_requested && response.error.nil?
290
+ parse_jsonl_output(output)
291
+ end
292
+ if parsed_lines
293
+ output = extract_text_from_jsonl(parsed_lines) || output
294
+ tokens = extract_tokens_from_jsonl(parsed_lines)
277
295
  end
278
- tokens = build_tokens(shutdown_tokens: shutdown_tokens, usage_tokens: usage_tokens, fallback_tokens: fallback_tokens)
279
- final_output = structured_json_seen ? render_output_segments(output_segments) : output
280
296
 
281
297
  Response.new(
282
- output: final_output,
298
+ output: output,
283
299
  exit_code: result.exit_code,
284
300
  duration: duration,
285
301
  provider: self.class.provider_name,
286
- model: @config.model,
302
+ model: effective_model_name,
287
303
  tokens: tokens,
288
- error: error,
289
- metadata: {
290
- legitimate_exit_codes: legitimate
291
- }
304
+ metadata: response.metadata,
305
+ error: response.error
292
306
  )
293
307
  end
294
308
 
295
- ASSISTANT_OUTPUT_EVENT_TYPES = %w[assistant assistant.message assistant.message_delta].freeze
296
- ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES = %w[assistant assistant.message].freeze
297
- SESSION_SHUTDOWN_EVENT_TYPES = ["session.shutdown"].freeze
298
- USAGE_EVENT_TYPES = %w[usage assistant.usage].freeze
299
- COPILOT_EVENT_TYPE_PREFIXES = %w[
300
- assistant.
301
- user.
302
- user_input.
303
- system.
304
- session.
305
- tool.
306
- permission.
307
- elicitation.
308
- exit_plan_mode.
309
- skill.
310
- subagent.
311
- external_tool.
312
- command.
313
- ].freeze
314
- COPILOT_EVENT_TYPES = %w[
315
- abort
316
- command
317
- elicitation
318
- exit_plan_mode
319
- external_tool
320
- permission
321
- session
322
- skill
323
- subagent
324
- system
325
- tool
326
- user
327
- user_input
328
- ].freeze
329
-
330
- def extract_event_text(obj)
331
- return [nil, nil] unless obj.is_a?(Hash)
332
-
333
- if obj.key?("type")
334
- return [nil, nil] unless obj["data"].is_a?(Hash)
335
- return [nil, nil] unless ASSISTANT_OUTPUT_EVENT_TYPES.include?(obj["type"])
336
-
337
- data = obj["data"]
338
- if obj["type"] == "assistant.message_delta"
339
- delta_content = string_content(data["deltaContent"])
340
- delta_content = string_content(data["delta_content"]) if delta_content.nil? || delta_content.empty?
341
- return [delta_content, :assistant_delta] if delta_content && !delta_content.empty?
342
-
343
- return [nil, nil]
344
- end
345
-
346
- return [string_content(data["content"]), :assistant] if data.key?("content")
309
+ def default_timeout
310
+ 300
311
+ end
347
312
 
348
- return [nil, nil]
349
- end
313
+ private
350
314
 
351
- return [nil, nil] if obj.key?("role") && !assistant_role?(obj["role"])
352
- return [nil, nil] if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
353
- !assistant_role?(obj["message"]["role"])
315
+ def programmatic_tool_approval_flags
316
+ ["--allow-all-tools"]
317
+ end
354
318
 
355
- if obj["message"].is_a?(Hash) && obj["message"].key?("content")
356
- nested_content = string_content(obj["message"]["content"])
357
- return [nested_content, :assistant] if nested_content && !nested_content.empty?
358
- end
319
+ def supports_json_output_format?(probe_timeout: nil, env: {})
320
+ version = copilot_cli_version(probe_timeout: probe_timeout, env: env)
321
+ !version.nil? && version >= JSON_OUTPUT_MIN_VERSION
322
+ end
359
323
 
360
- output = string_content(obj["output"])
361
- return [output, :assistant] if output && !output.empty?
324
+ def copilot_cli_version(probe_timeout: nil, env: {})
325
+ return nil if env.empty? && !copilot_cli_binary_available?
362
326
 
363
- content = string_content(obj["content"])
364
- return [content, :assistant] if content && !content.empty?
327
+ cache_key = version_probe_cache_key(env)
328
+ @copilot_cli_versions ||= {}
329
+ return @copilot_cli_versions[cache_key] if @copilot_cli_versions.key?(cache_key)
365
330
 
366
- [nil, nil]
331
+ result = @executor.execute([self.class.binary_name, "--version"], timeout: probe_timeout || 5, env: env)
332
+ version = extract_version(result)
333
+ @copilot_cli_versions[cache_key] = version
334
+ version
335
+ rescue => e
336
+ log_debug("copilot_cli_version_check_failed", error: e.message)
337
+ @copilot_cli_versions ||= {}
338
+ @copilot_cli_versions[cache_key] = nil if defined?(cache_key)
367
339
  end
368
340
 
369
- def string_content(value)
370
- return value if value.is_a?(String)
371
-
372
- nil
341
+ def version_probe_cache_key(env)
342
+ [
343
+ probe_env_cache_component(env, "PATH", inherited_label: :inherited_path, override_label: :path_override),
344
+ probe_env_cache_component(env, "PATHEXT", inherited_label: :inherited_pathext, override_label: :pathext_override)
345
+ ]
373
346
  end
374
347
 
375
- def preserve_raw_json_line?(obj)
376
- return false unless obj.is_a?(Hash)
377
- return false if obj.key?("type") && copilot_event_type?(obj["type"])
378
- return true if obj.key?("type")
379
- return false if obj.key?("role") && !assistant_role?(obj["role"])
380
- return false if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
381
- !assistant_role?(obj["message"]["role"])
382
- return false if extract_token_usage(obj)
383
- return false if (output = string_content(obj["output"])) && !output.empty?
384
- return false if (content = string_content(obj["content"])) && !content.empty?
385
- return false if obj["message"].is_a?(Hash) &&
386
- (message_content = string_content(obj["message"]["content"])) &&
387
- !message_content.empty?
348
+ def probe_env_cache_component(env, key, inherited_label:, override_label:)
349
+ label, value = if env_override_present?(env, key)
350
+ [override_label, env_override_value(env, key)]
351
+ else
352
+ [inherited_label, ENV[key]]
353
+ end
354
+ return [label, :unset] if value.nil?
388
355
 
389
- true
356
+ [label, Digest::SHA256.hexdigest(value)]
390
357
  end
391
358
 
392
- def assistant_role?(role)
393
- role == "assistant"
359
+ def env_override_present?(env, key)
360
+ env.key?(key) || env.key?(key.to_sym)
394
361
  end
395
362
 
396
- def copilot_event_type?(event_type)
397
- return true if ASSISTANT_OUTPUT_EVENT_TYPES.include?(event_type)
398
- return true if ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES.include?(event_type)
399
- return true if SESSION_SHUTDOWN_EVENT_TYPES.include?(event_type)
400
- return true if USAGE_EVENT_TYPES.include?(event_type)
401
- return false unless event_type.is_a?(String)
402
- return true if COPILOT_EVENT_TYPES.include?(event_type)
363
+ def env_override_value(env, key)
364
+ return env[key] if env.key?(key)
403
365
 
404
- COPILOT_EVENT_TYPE_PREFIXES.any? { |prefix| event_type.start_with?(prefix) }
366
+ env[key.to_sym]
405
367
  end
406
368
 
407
- def extract_token_usage(obj)
408
- return nil unless obj.is_a?(Hash)
369
+ def copilot_cli_binary_available?
370
+ @executor.which(self.class.binary_name)
371
+ rescue => e
372
+ log_debug("copilot_cli_binary_check_failed", error: e.message)
373
+ nil
374
+ end
409
375
 
410
- if obj.key?("type")
411
- return nil unless obj["data"].is_a?(Hash)
376
+ def extract_version(result)
377
+ return nil unless result.success?
412
378
 
413
- data = obj["data"]
379
+ version_string = [result.stdout, result.stderr].compact.join("\n")[/\d+\.\d+\.\d+(?:[-+][A-Za-z0-9.-]+)?/]
380
+ return nil if version_string.nil? || version_string.empty?
414
381
 
415
- if SESSION_SHUTDOWN_EVENT_TYPES.include?(obj["type"])
416
- return extract_shutdown_token_usage(data)
417
- end
382
+ Gem::Version.new(version_string)
383
+ rescue ArgumentError
384
+ nil
385
+ end
418
386
 
419
- if USAGE_EVENT_TYPES.include?(obj["type"])
420
- return extract_payload_token_usage(
421
- data,
422
- source: :usage,
423
- input_keys: ["inputTokens", "input_tokens"],
424
- output_keys: ["outputTokens", "output_tokens"]
425
- )
426
- end
387
+ def parse_jsonl_output(output)
388
+ return nil if output.nil? || output.strip.empty?
427
389
 
428
- if ASSISTANT_TOKEN_FALLBACK_EVENT_TYPES.include?(obj["type"])
429
- return extract_payload_token_usage(
430
- data,
431
- source: :assistant,
432
- input_keys: ["inputTokens", "input_tokens"],
433
- output_keys: ["outputTokens", "output_tokens"]
434
- )
435
- end
390
+ parsed = output.each_line(chomp: true).filter_map do |line|
391
+ next if line.strip.empty?
436
392
 
437
- return nil
393
+ JSON.parse(line)
394
+ rescue JSON::ParserError
395
+ next
438
396
  end
439
397
 
440
- extract_top_level_token_usage(obj)
441
- end
442
-
443
- def extract_shutdown_token_usage(data)
444
- model_metrics = extract_shutdown_model_metrics_usage(data["modelMetrics"])
445
- snake_case_model_metrics = extract_shutdown_model_metrics_usage(data["model_metrics"])
446
-
447
- input, input_present = merged_token_metric(model_metrics, snake_case_model_metrics, :input)
448
- output, output_present = merged_token_metric(model_metrics, snake_case_model_metrics, :output)
449
- return nil unless input_present || output_present
450
-
451
- {
452
- source: :shutdown,
453
- input: input,
454
- output: output,
455
- input_present: input_present,
456
- output_present: output_present
457
- }
398
+ parsed.empty? ? nil : parsed
458
399
  end
459
400
 
460
- def extract_shutdown_model_metrics_usage(model_metrics)
461
- return nil unless model_metrics.is_a?(Hash)
401
+ def extract_text_from_jsonl(parsed_lines)
402
+ output = +""
403
+ saw_text = false
404
+ saw_delta = false
462
405
 
463
- totals = empty_token_totals
406
+ parsed_lines.each do |obj|
407
+ next unless obj.is_a?(Hash)
408
+ next unless assistant_output_event?(obj)
464
409
 
465
- model_metrics.each_value do |metric|
466
- next unless metric.is_a?(Hash)
467
-
468
- usage = metric["usage"]
469
- next unless usage.is_a?(Hash)
410
+ full_text = extract_non_delta_text(obj)
411
+ if full_text
412
+ output = if replace_output_with_full_text?(
413
+ output,
414
+ full_text,
415
+ saw_delta: saw_delta,
416
+ authoritative_snapshot: authoritative_full_snapshot?(obj)
417
+ )
418
+ full_text.dup
419
+ else
420
+ output + full_text
421
+ end
422
+ saw_text = true
423
+ saw_delta = false
424
+ end
470
425
 
471
- metric_usage = extract_payload_token_usage(
472
- usage,
473
- source: :shutdown,
474
- input_keys: ["inputTokens", "input_tokens", "input"],
475
- output_keys: ["outputTokens", "output_tokens", "output"]
476
- )
477
- next unless metric_usage
426
+ delta_text = extract_delta_text(obj)
427
+ next unless delta_text
478
428
 
479
- accumulate_token_totals!(totals, metric_usage)
429
+ output << delta_text
430
+ saw_text = true
431
+ saw_delta = true
480
432
  end
481
433
 
482
- return nil unless totals[:input_present] || totals[:output_present]
483
-
484
- totals
434
+ saw_text ? output : nil
485
435
  end
486
436
 
487
- def extract_payload_token_usage(payload, source:, input_keys:, output_keys:)
488
- return nil unless payload.is_a?(Hash)
489
-
490
- input, input_present = token_value(payload, *input_keys)
491
- output, output_present = token_value(payload, *output_keys)
492
- return nil unless input_present || output_present
493
-
494
- {
495
- source: source,
496
- input: input,
497
- output: output,
498
- input_present: input_present,
499
- output_present: output_present
500
- }
437
+ def replace_output_with_full_text?(existing_output, full_text, saw_delta:, authoritative_snapshot:)
438
+ saw_delta ||
439
+ authoritative_snapshot_replacement?(existing_output, full_text, authoritative_snapshot: authoritative_snapshot) ||
440
+ (!existing_output.empty? && (
441
+ full_text.start_with?(existing_output) ||
442
+ existing_output.start_with?(full_text)
443
+ ))
501
444
  end
502
445
 
503
- def extract_top_level_token_usage(obj)
504
- return nil if obj.key?("role") && !assistant_role?(obj["role"])
505
- return nil if obj["message"].is_a?(Hash) && obj["message"].key?("role") &&
506
- !assistant_role?(obj["message"]["role"])
507
-
508
- usage = extract_payload_token_usage(
509
- obj["usage"],
510
- source: :usage,
511
- input_keys: ["input_tokens", "inputTokens", "input"],
512
- output_keys: ["output_tokens", "outputTokens", "output"]
513
- )
514
- tokens = extract_payload_token_usage(
515
- obj["tokens"],
516
- source: :usage,
517
- input_keys: ["input_tokens", "inputTokens", "input"],
518
- output_keys: ["output_tokens", "outputTokens", "output"]
519
- )
520
- return nil unless usage || tokens
521
-
522
- input, input_present = merged_token_metric(usage, tokens, :input)
523
- output, output_present = merged_token_metric(usage, tokens, :output)
524
- return nil unless input_present || output_present
525
-
526
- {
527
- source: :usage,
528
- input: input,
529
- output: output,
530
- input_present: input_present,
531
- output_present: output_present
532
- }
446
+ def authoritative_snapshot_replacement?(existing_output, full_text, authoritative_snapshot:)
447
+ authoritative_snapshot &&
448
+ !existing_output.empty? &&
449
+ (
450
+ existing_output.length == full_text.length ||
451
+ full_text.start_with?(existing_output) ||
452
+ existing_output.start_with?(full_text) ||
453
+ longest_common_substring_length(existing_output, full_text) >= [[existing_output.length, full_text.length].min / 2, 1].max
454
+ )
533
455
  end
534
456
 
535
- def merged_token_metric(primary, fallback, metric)
536
- present_key = :"#{metric}_present"
537
- return [primary[metric], true] if primary&.[](present_key)
538
- return [fallback[metric], true] if fallback&.[](present_key)
457
+ def longest_common_substring_length(left, right)
458
+ return 0 if left.empty? || right.empty?
539
459
 
540
- [0, false]
541
- end
460
+ longest = 0
461
+ row = Array.new(right.length + 1, 0)
542
462
 
543
- def empty_token_totals
544
- {
545
- input: 0,
546
- output: 0,
547
- input_present: false,
548
- output_present: false
549
- }
550
- end
463
+ left.each_char do |left_char|
464
+ previous = 0
551
465
 
552
- def accumulate_token_totals!(totals, token_usage)
553
- if token_usage[:input_present]
554
- totals[:input_present] = true
555
- totals[:input] += token_usage[:input]
466
+ right.each_char.with_index(1) do |right_char, index|
467
+ current = row[index]
468
+ row[index] = if left_char == right_char
469
+ previous + 1
470
+ else
471
+ 0
472
+ end
473
+ longest = [longest, row[index]].max
474
+ previous = current
475
+ end
556
476
  end
557
477
 
558
- return unless token_usage[:output_present]
478
+ longest
479
+ end
559
480
 
560
- totals[:output_present] = true
561
- totals[:output] += token_usage[:output]
481
+ def authoritative_full_snapshot?(obj)
482
+ obj["type"].to_s.match?(/\A(?:assistant\.message|turn\.)/) ||
483
+ obj["message"].is_a?(Hash) ||
484
+ nested_hash_value(obj, "data", "message").is_a?(Hash)
562
485
  end
563
486
 
564
- def token_value(obj, *keys)
565
- keys.each do |candidate|
566
- next unless obj.key?(candidate)
487
+ def assistant_output_event?(obj)
488
+ type = obj["type"]
489
+ return true if type.nil? && !role_key_present?(obj)
567
490
 
568
- value, valid = coerce_token_value(obj[candidate])
569
- return [value, true] if valid
570
- end
491
+ role = extract_event_role(obj)
492
+ return true if role.nil? && type.to_s.match?(/\A(?:assistant\.|turn\.)/)
571
493
 
572
- [0, false]
494
+ role == "assistant"
573
495
  end
574
496
 
575
- def build_tokens(shutdown_tokens:, usage_tokens:, fallback_tokens:)
576
- input, input_present = first_present_token_metric(usage_tokens, fallback_tokens, :input)
577
- output, output_present = first_present_token_metric(usage_tokens, fallback_tokens, :output)
578
- return token_hash(input, output, input_present, output_present) if input_present || output_present
579
-
580
- input, input_present = first_present_token_metric(shutdown_tokens, :input)
581
- output, output_present = first_present_token_metric(shutdown_tokens, :output)
582
- token_hash(input, output, input_present, output_present)
497
+ def role_key_present?(obj)
498
+ obj.key?("role") ||
499
+ hash_key_present?(obj["data"], "role") ||
500
+ hash_key_present?(obj["message"], "role") ||
501
+ hash_key_present?(nested_hash_value(obj, "data", "message"), "role")
583
502
  end
584
503
 
585
- def token_hash(input, output, input_present, output_present)
586
- return nil unless input_present || output_present
587
-
588
- {input: input, output: output, total: input + output}
504
+ def extract_event_role(obj)
505
+ [
506
+ obj["role"],
507
+ nested_hash_value(obj, "data", "role"),
508
+ nested_hash_value(obj, "message", "role"),
509
+ nested_hash_value(obj, "data", "message", "role")
510
+ ].compact.first&.to_s
589
511
  end
590
512
 
591
- def first_present_token_metric(*sources, metric)
592
- present_key = :"#{metric}_present"
593
-
594
- sources.each do |source|
595
- next unless source[present_key]
513
+ def extract_tokens_from_jsonl(parsed_lines)
514
+ authoritative = authoritative_usage_set(parsed_lines)
596
515
 
597
- return [source[metric], true]
516
+ if authoritative.nil?
517
+ usages = parsed_lines.flat_map { |obj| find_usages(obj) }
518
+ return aggregate_token_totals(usages)
598
519
  end
599
520
 
600
- [0, false]
601
- end
602
-
603
- def render_output_segments(segments)
604
- rendered = +""
605
- previous_kind = nil
606
- previous_terminated = false
521
+ auth_input = sum_token_field(authoritative, "input_tokens", "prompt_tokens", "inputTokens", "promptTokens")
522
+ auth_output = sum_token_field(authoritative, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
607
523
 
608
- segments.each do |segment|
609
- if previous_terminated && previous_kind == :assistant &&
610
- segment[:kind] != :assistant &&
611
- !rendered.empty? &&
612
- !rendered.end_with?("\n")
613
- rendered << "\n"
614
- end
615
-
616
- rendered << segment[:content]
617
- previous_kind = segment[:kind]
618
- previous_terminated = segment[:terminated]
524
+ if !auth_input.nil? && !auth_output.nil?
525
+ return {input: auth_input, output: auth_output, total: auth_input + auth_output}
619
526
  end
620
527
 
621
- rendered
622
- end
623
-
624
- def append_delta_segment!(segments, text, terminated:)
625
- previous_segment = segments.last
626
- if previous_segment&.[](:provisional) && previous_segment[:kind] == :assistant
627
- previous_segment[:content] << text
628
- previous_segment[:terminated] = terminated
629
- return
630
- end
528
+ fallback_usages = parsed_lines.flat_map { |obj| find_usages(obj) }
529
+ fallback_input = sum_token_field(fallback_usages, "input_tokens", "prompt_tokens", "inputTokens", "promptTokens")
530
+ fallback_output = sum_token_field(fallback_usages, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
631
531
 
632
- segments << {
633
- kind: :assistant,
634
- content: +text,
635
- terminated: terminated,
636
- provisional: true
637
- }
638
- end
532
+ input = auth_input.nil? ? fallback_input : auth_input
533
+ output = auth_output.nil? ? fallback_output : auth_output
639
534
 
640
- def replace_assistant_segments!(segments, text, terminated:)
641
- drop_assistant_segments!(segments)
642
- segments << {kind: :assistant, content: text, terminated: terminated}
643
- end
535
+ return nil if input.nil? && output.nil?
644
536
 
645
- def drop_assistant_segments!(segments)
646
- segments.reject! { |segment| segment[:kind] == :assistant }
537
+ input ||= 0
538
+ output ||= 0
539
+ {input: input, output: output, total: input + output}
647
540
  end
648
541
 
649
- def with_request_probe_env(env)
650
- stack = writable_request_probe_env_stack
651
- stack << env
652
- yield
653
- ensure
654
- stack&.pop
655
- clear_request_probe_env_stack! if stack&.empty?
656
- end
542
+ def aggregate_token_totals(usages)
543
+ total_input = 0
544
+ total_output = 0
545
+ found = false
657
546
 
658
- def current_probe_env
659
- stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
660
- stack = stacks && stacks[object_id]
661
- stack&.last || {}
662
- end
547
+ usages.each do |usage|
548
+ input = token_count_for(usage, "input_tokens", "prompt_tokens", "inputTokens", "promptTokens")
549
+ output_tok = token_count_for(usage, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
550
+ next if input.nil? && output_tok.nil?
663
551
 
664
- def version_probe_env_cache_key(env)
665
- resolved_binary_path_for_env(env) ||
666
- if env.key?("PATH")
667
- [:path_override, cacheable_path_override(env["PATH"])]
668
- else
669
- self.class.binary_name
670
- end
671
- end
552
+ total_input += input || 0
553
+ total_output += output_tok || 0
554
+ found = true
555
+ end
672
556
 
673
- def cacheable_path_override(path)
674
- return nil unless path.is_a?(String)
557
+ return nil unless found
675
558
 
676
- Digest::SHA256.hexdigest(path)
559
+ {input: total_input, output: total_output, total: total_input + total_output}
677
560
  end
678
561
 
679
- def resolved_binary_path_for_env(env)
680
- path = if env.key?("PATH")
681
- env["PATH"]
682
- else
683
- ENV["PATH"]
684
- end
685
- return nil unless path.is_a?(String) && !path.empty?
562
+ def sum_token_field(usages, *keys)
563
+ total = nil
564
+ usages.each do |usage|
565
+ value = token_count_for(usage, *keys)
566
+ next if value.nil?
686
567
 
687
- path.split(File::PATH_SEPARATOR).each do |entry|
688
- full_path = File.join(entry, self.class.binary_name)
689
- return full_path if File.executable?(full_path)
568
+ total = total.nil? ? value : total + value
690
569
  end
691
-
692
- nil
570
+ total
693
571
  end
694
572
 
695
- def request_probe_env_from_raw_runtime(runtime)
696
- case runtime
697
- when nil
698
- {}
699
- when ProviderRuntime
700
- runtime.env.merge(runtime.unset_env.to_h { |key| [key, nil] })
701
- when Hash
702
- request_probe_env_from_raw_hash(runtime)
703
- else
704
- {}
573
+ def authoritative_usage_set(parsed_lines)
574
+ usages = parsed_lines.flat_map do |obj|
575
+ next [] unless authoritative_usage_event?(obj)
576
+
577
+ find_usages(obj)
705
578
  end
579
+
580
+ usages.any? ? usages : nil
706
581
  end
707
582
 
708
- def request_probe_env_from_raw_hash(runtime_hash)
709
- env = stringify_probe_env(runtime_hash[:env] || runtime_hash["env"])
710
- unset_env = stringify_probe_unset_env(runtime_hash[:unset_env] || runtime_hash["unset_env"])
711
- return {} unless env && unset_env
583
+ def authoritative_usage_event?(obj)
584
+ return false unless obj.is_a?(Hash)
712
585
 
713
- env.merge(unset_env.to_h { |key| [key, nil] })
586
+ type = obj["type"].to_s
587
+ type == "session.shutdown" ||
588
+ type.end_with?(".shutdown") ||
589
+ model_metrics_present?(obj)
714
590
  end
715
591
 
716
- def stringify_probe_env(raw_env)
717
- return {} if raw_env.nil?
718
- return nil unless raw_env.is_a?(Hash)
592
+ def model_metrics_present?(obj)
593
+ obj["modelMetrics"].is_a?(Hash) ||
594
+ obj["model_metrics"].is_a?(Hash) ||
595
+ nested_hash_value(obj, "data", "modelMetrics").is_a?(Hash) ||
596
+ nested_hash_value(obj, "data", "model_metrics").is_a?(Hash) ||
597
+ nested_hash_value(obj, "message", "modelMetrics").is_a?(Hash) ||
598
+ nested_hash_value(obj, "message", "model_metrics").is_a?(Hash) ||
599
+ nested_hash_value(obj, "data", "message", "modelMetrics").is_a?(Hash) ||
600
+ nested_hash_value(obj, "data", "message", "model_metrics").is_a?(Hash)
601
+ end
719
602
 
720
- raw_env.each_with_object({}) do |(key, value), env|
721
- return nil unless value.is_a?(String)
603
+ def find_usages(obj)
604
+ return [] unless obj.is_a?(Hash)
722
605
 
723
- env[key.to_s] = value
724
- end
725
- end
606
+ direct_usage = select_best_usage_payload([
607
+ obj["usage"],
608
+ obj["tokens"],
609
+ usage_payload?(obj) ? obj : nil,
610
+ usage_payload?(obj["data"]) ? obj["data"] : nil,
611
+ usage_payload?(obj["message"]) ? obj["message"] : nil,
612
+ usage_payload?(nested_hash_value(obj, "data", "message")) ? nested_hash_value(obj, "data", "message") : nil,
613
+ nested_hash_value(obj, "data", "usage"),
614
+ nested_hash_value(obj, "data", "tokens"),
615
+ nested_hash_value(obj, "message", "usage"),
616
+ nested_hash_value(obj, "message", "tokens"),
617
+ nested_hash_value(obj, "data", "message", "usage"),
618
+ nested_hash_value(obj, "data", "message", "tokens")
619
+ ])
620
+ metrics_usages =
621
+ model_metrics_usages(obj["modelMetrics"]) +
622
+ model_metrics_usages(obj["model_metrics"]) +
623
+ model_metrics_usages(nested_hash_value(obj, "data", "modelMetrics")) +
624
+ model_metrics_usages(nested_hash_value(obj, "data", "model_metrics")) +
625
+ model_metrics_usages(nested_hash_value(obj, "message", "modelMetrics")) +
626
+ model_metrics_usages(nested_hash_value(obj, "message", "model_metrics")) +
627
+ model_metrics_usages(nested_hash_value(obj, "data", "message", "modelMetrics")) +
628
+ model_metrics_usages(nested_hash_value(obj, "data", "message", "model_metrics"))
726
629
 
727
- def stringify_probe_unset_env(raw_unset_env)
728
- return [] if raw_unset_env.nil?
729
- return nil unless raw_unset_env.is_a?(Array)
630
+ return metrics_usages if prefer_usage_set?(aggregate_usage_payload(metrics_usages), direct_usage)
631
+ return [direct_usage] if direct_usage
730
632
 
731
- raw_unset_env.map(&:to_s)
732
- rescue NoMethodError
733
- nil
633
+ metrics_usages
734
634
  end
735
635
 
736
- def writable_request_probe_env_stack
737
- stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
738
- unless stacks
739
- stacks = {}
740
- Thread.current.thread_variable_set(REQUEST_PROBE_ENV_STACK_KEY, stacks)
741
- end
636
+ MAX_METRICS_DEPTH = 5
742
637
 
743
- stacks[object_id] ||= []
744
- end
638
+ def model_metrics_usages(metrics, depth: 0)
639
+ return [] unless metrics.is_a?(Hash)
745
640
 
746
- def clear_request_probe_env_stack!
747
- stacks = Thread.current.thread_variable_get(REQUEST_PROBE_ENV_STACK_KEY)
748
- return unless stacks
641
+ return [metrics] if usage_with_token_counts?(metrics)
749
642
 
750
- stacks.delete(object_id)
751
- Thread.current.thread_variable_set(REQUEST_PROBE_ENV_STACK_KEY, nil) if stacks.empty?
752
- end
643
+ direct_usage = [
644
+ metrics["usage"],
645
+ metrics["totals"],
646
+ metrics["total"],
647
+ metrics["aggregate"]
648
+ ].find { |value| usage_with_token_counts?(value) }
649
+ return [direct_usage] if direct_usage
753
650
 
754
- def copilot_cli_supports_json_output?(env: current_probe_env)
755
- @copilot_cli_supports_json_output ||= {}
756
- cache_key = version_probe_env_cache_key(env)
757
- return @copilot_cli_supports_json_output[cache_key] if @copilot_cli_supports_json_output.key?(cache_key)
651
+ return [] if depth >= MAX_METRICS_DEPTH
758
652
 
759
- version = copilot_cli_version(env: env)
760
- @copilot_cli_supports_json_output[cache_key] = !version.nil? && version >= MIN_JSON_OUTPUT_VERSION
761
- rescue
762
- @copilot_cli_supports_json_output[cache_key] = false
653
+ metrics.each_value.flat_map { |value| model_metrics_usages(value, depth: depth + 1) }
763
654
  end
764
655
 
765
- def copilot_cli_version(env: current_probe_env)
766
- @copilot_cli_version ||= {}
767
- cache_key = version_probe_env_cache_key(env)
768
- return @copilot_cli_version[cache_key] if @copilot_cli_version.key?(cache_key)
656
+ def aggregate_usage_payload(usages)
657
+ return nil if usages.empty?
769
658
 
770
- result = @executor.execute([self.class.binary_name, "--version"], timeout: 5, env: env)
771
- return @copilot_cli_version[cache_key] = nil unless result.exit_code.zero?
659
+ input = sum_token_field(usages, "input_tokens", "prompt_tokens", "inputTokens", "promptTokens")
660
+ output = sum_token_field(usages, "output_tokens", "completion_tokens", "outputTokens", "completionTokens")
661
+ return nil if input.nil? && output.nil?
772
662
 
773
- @copilot_cli_version[cache_key] = parse_copilot_cli_version(result.stdout) || parse_copilot_cli_version(result.stderr)
774
- rescue
775
- @copilot_cli_version[cache_key] = nil
663
+ payload = {}
664
+ payload["input_tokens"] = input unless input.nil?
665
+ payload["output_tokens"] = output unless output.nil?
666
+ payload
776
667
  end
777
668
 
778
- def parse_copilot_cli_version(output)
779
- match = output.to_s.match(/(\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?)/)
780
- return nil unless match
669
+ def prefer_usage_set?(candidate, current)
670
+ return false if candidate.nil?
671
+ return true if current.nil?
781
672
 
782
- Gem::Version.new(match[1])
783
- rescue ArgumentError
784
- nil
673
+ (
674
+ [usage_token_field_count(candidate), usage_token_total(candidate)] <=>
675
+ [usage_token_field_count(current), usage_token_total(current)]
676
+ ) == 1
785
677
  end
786
678
 
787
- def coerce_token_value(value)
679
+ def extract_text_value(value)
788
680
  case value
789
- when Integer
790
- return [value, true] if value >= 0
791
- when Float
792
- return [value.to_i, true] if value.finite? && value >= 0 && value == value.to_i
793
681
  when String
794
- return [value.to_i, true] if /\A\+?\d+\z/.match?(value)
682
+ value
683
+ when Array
684
+ parts = value.filter_map { |part| extract_text_value(part) }
685
+ parts.empty? ? nil : parts.join
686
+ when Hash
687
+ extract_text_value(value["text"]) ||
688
+ extract_text_value(value["content"]) ||
689
+ extract_text_value(value["parts"]) ||
690
+ extract_text_value(value["result"]) ||
691
+ extract_text_value(value["deltaContent"]) ||
692
+ extract_text_value(value["delta_content"]) ||
693
+ extract_text_value(value["delta"]) ||
694
+ extract_text_value(value["message"]) ||
695
+ extract_text_value(value["data"])
795
696
  end
697
+ end
796
698
 
797
- [0, false]
699
+ def extract_non_delta_text(obj)
700
+ extract_text_value(obj["text"]) ||
701
+ extract_text_value(obj["content"]) ||
702
+ extract_text_value(obj["parts"]) ||
703
+ extract_text_value(obj["result"]) ||
704
+ extract_text_value(nested_hash_value(obj, "message", "text")) ||
705
+ extract_text_value(nested_hash_value(obj, "message", "content")) ||
706
+ extract_text_value(nested_hash_value(obj, "message", "parts")) ||
707
+ extract_text_value(nested_hash_value(obj, "message", "result")) ||
708
+ extract_text_value(nested_hash_value(obj, "data", "text")) ||
709
+ extract_text_value(nested_hash_value(obj, "data", "content")) ||
710
+ extract_text_value(nested_hash_value(obj, "data", "parts")) ||
711
+ extract_text_value(nested_hash_value(obj, "data", "result")) ||
712
+ extract_text_value(nested_hash_value(obj, "data", "message", "text")) ||
713
+ extract_text_value(nested_hash_value(obj, "data", "message", "content")) ||
714
+ extract_text_value(nested_hash_value(obj, "data", "message", "parts")) ||
715
+ extract_text_value(nested_hash_value(obj, "data", "message", "result"))
716
+ end
717
+
718
+ def extract_delta_text(obj)
719
+ extract_text_value(obj["deltaContent"]) ||
720
+ extract_text_value(obj["delta_content"]) ||
721
+ extract_text_value(obj["delta"]) ||
722
+ extract_text_value(nested_hash_value(obj, "data", "deltaContent")) ||
723
+ extract_text_value(nested_hash_value(obj, "data", "delta_content")) ||
724
+ extract_text_value(nested_hash_value(obj, "data", "delta")) ||
725
+ extract_text_value(nested_hash_value(obj, "message", "deltaContent")) ||
726
+ extract_text_value(nested_hash_value(obj, "message", "delta_content")) ||
727
+ extract_text_value(nested_hash_value(obj, "message", "delta")) ||
728
+ extract_text_value(nested_hash_value(obj, "data", "message", "deltaContent")) ||
729
+ extract_text_value(nested_hash_value(obj, "data", "message", "delta_content")) ||
730
+ extract_text_value(nested_hash_value(obj, "data", "message", "delta"))
731
+ end
732
+
733
+ def usage_payload?(value)
734
+ value.is_a?(Hash) && token_count_keys.any? { |key| value.key?(key) }
735
+ end
736
+
737
+ def hash_key_present?(value, key)
738
+ value.is_a?(Hash) && value.key?(key)
798
739
  end
799
740
  end
800
741
  end