ace-support-config 0.11.2 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "open3"
5
+ require "pathname"
6
+ require "rubygems"
7
+ require "thread"
8
+ require "time"
9
+ require "yaml"
10
+ require_relative "../molecules/setup_doctor_reporter"
11
+ require_relative "../models/config_templates"
12
+
13
+ module Ace
14
+ module Support
15
+ module Config
16
+ module Organisms
17
+ class SetupDoctor
18
+ PROVIDER_GEM = "ace-llm-providers-cli"
19
+ PASS = "pass"
20
+ WARN = "warn"
21
+ BLOCKER = "blocker"
22
+ SKIP = "skip"
23
+ INFO = "info"
24
+ STATUS_GLYPHS = {PASS => "✓", WARN => "✗", BLOCKER => "✗", SKIP => "○", INFO => "○", "running" => "○"}.freeze
25
+ STATUS_COLORS = {PASS => "\e[32m", WARN => "\e[31m", BLOCKER => "\e[31m", SKIP => "\e[33m", INFO => "\e[36m", "running" => "\e[33m"}.freeze
26
+ ANSI_RESET = "\e[0m"
27
+
28
+ CORE_ROLES = %w[commit doctor].freeze
29
+ UTILITY_ROLE_GROUPS = %w[_utility _utility-lite].freeze
30
+ ROLE_REFERENCE_PATTERN = /\brole:([A-Za-z0-9_-]+)\b/
31
+ COST_BIAS_MARKER = "Cost Bias Override"
32
+ AGENT_ENGINEERING_ANCHOR = "docs/tools.md#agent-engineering-practices"
33
+ AGENT_ENGINEERING_HEADING = "## Agent Engineering Practices"
34
+ AGENT_ENGINEERING_NEXT_ACTION = "Run ace-config sync ace-support-core --force in generated projects, " \
35
+ "or manually add the Cost Bias Override line and docs/tools.md Agent Engineering Practices section " \
36
+ "in customized projects."
37
+
38
+ def run(json: false, no_probe: false, probe: false, hygiene: false, verbose: false, colors: true, quiet: false, io: $stdout)
39
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
40
+ checks = []
41
+ stream = !json && !quiet
42
+
43
+ append_check(checks, check_artifact_hygiene, stream: stream, io: io)
44
+
45
+ package_check = check_provider_package
46
+ append_check(checks, package_check, stream: stream, io: io)
47
+
48
+ discovery_check = check_provider_discovery
49
+ append_check(checks, discovery_check, stream: stream, io: io)
50
+
51
+ append_check(checks, check_config_defaults, stream: stream, io: io)
52
+ append_check(checks, check_agent_engineering_guidance, stream: stream, io: io)
53
+
54
+ provider_context = load_provider_context if package_check[:status] != BLOCKER
55
+
56
+ checks << check_alias_hygiene(provider_context)
57
+
58
+ role_health_check = check_role_health(provider_context)
59
+ append_check(checks, role_health_check, stream: stream, io: io)
60
+ checks << check_role_hygiene(provider_context)
61
+ append_check(checks, check_skill_sync, stream: stream, io: io)
62
+ utility_provider_targets = utility_provider_targets(provider_context)
63
+
64
+ append_check(checks, check_probe_readiness(
65
+ provider_context,
66
+ no_probe: no_probe,
67
+ probe: probe && !no_probe,
68
+ role_targets: utility_provider_targets,
69
+ structural_blockers: health_blocking?(checks),
70
+ progress_io: (stream ? io : nil)
71
+ ), stream: stream, io: io)
72
+
73
+ result = build_summary(checks).merge(
74
+ valid: !health_blocking?(checks),
75
+ duration: Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at,
76
+ stats: build_stats(checks)
77
+ )
78
+
79
+ unless quiet
80
+ io.puts Molecules::SetupDoctorReporter.format_results(
81
+ result,
82
+ format: (json ? :json : :terminal),
83
+ hygiene: hygiene,
84
+ verbose: verbose,
85
+ colors: colors && !json
86
+ )
87
+ flush_io(io)
88
+ end
89
+ health_blocking?(checks) ? 1 : 0
90
+ end
91
+
92
+ private
93
+
94
+ def check_artifact_hygiene
95
+ root = project_root
96
+ gitignore_path = File.join(root, ".gitignore")
97
+ unless File.exist?(gitignore_path)
98
+ return check(
99
+ id: "artifact-hygiene",
100
+ kind: "health",
101
+ status: BLOCKER,
102
+ message: ".gitignore is missing at project root",
103
+ next_action: "Create #{gitignore_path} and add .ace-local/."
104
+ )
105
+ end
106
+
107
+ content = File.read(gitignore_path)
108
+ if gitignore_entry_present?(content, ".ace-local/")
109
+ check(id: "artifact-hygiene", kind: "health", status: PASS, message: ".ace-local/ is ignored")
110
+ else
111
+ check(
112
+ id: "artifact-hygiene",
113
+ kind: "health",
114
+ status: BLOCKER,
115
+ message: ".ace-local/ is not ignored",
116
+ next_action: "Add .ace-local/ to #{gitignore_path}."
117
+ )
118
+ end
119
+ end
120
+
121
+ def check_provider_package
122
+ installed = Gem::Specification.find_all_by_name(PROVIDER_GEM).any?
123
+ return check(id: "provider-package", kind: "health", status: PASS, message: "CLI provider package #{PROVIDER_GEM} is available") if installed
124
+
125
+ check(
126
+ id: "provider-package",
127
+ kind: "health",
128
+ status: BLOCKER,
129
+ message: "CLI provider package missing: #{PROVIDER_GEM}",
130
+ next_action: "Install #{PROVIDER_GEM} and run bundle install."
131
+ )
132
+ rescue => e
133
+ check(
134
+ id: "provider-package",
135
+ kind: "health",
136
+ status: WARN,
137
+ message: "Unable to verify provider package availability: #{e.message}",
138
+ next_action: "Verify #{PROVIDER_GEM} is installed."
139
+ )
140
+ end
141
+
142
+ def check_provider_discovery
143
+ _out, err, status = Open3.capture3("ace-llm", "--list-providers")
144
+ return check(id: "provider-discovery", kind: "health", status: PASS, message: "Provider discovery completed") if status.success?
145
+
146
+ check(
147
+ id: "provider-discovery",
148
+ kind: "health",
149
+ status: BLOCKER,
150
+ message: "Provider discovery failed",
151
+ next_action: discovery_next_action(err)
152
+ )
153
+ rescue Errno::ENOENT
154
+ check(
155
+ id: "provider-discovery",
156
+ kind: "health",
157
+ status: BLOCKER,
158
+ message: "ace-llm command is unavailable",
159
+ next_action: "Install ace-llm and #{PROVIDER_GEM}, then rerun ace-config doctor."
160
+ )
161
+ end
162
+
163
+ def check_config_defaults
164
+ summary = collect_config_defaults_summary
165
+ check(
166
+ id: "config-defaults",
167
+ kind: "info",
168
+ status: INFO,
169
+ message: "Config defaults comparison completed (#{summary[:customized]} customized, #{summary[:default]} default)",
170
+ details: summary[:details],
171
+ summary: summary
172
+ )
173
+ rescue => e
174
+ check(
175
+ id: "config-defaults",
176
+ kind: "info",
177
+ status: INFO,
178
+ message: "Config defaults comparison skipped: #{e.message}",
179
+ next_action: "Run ace-config diff --one-line to inspect config drift."
180
+ )
181
+ end
182
+
183
+ def check_agent_engineering_guidance
184
+ root = project_root
185
+ root_guidance_paths = %w[AGENTS.md CLAUDE.md].map { |name| File.join(root, name) }
186
+ docs_path = File.join(root, "docs", "tools.md")
187
+ existing_root_guidance = root_guidance_paths.select { |path| File.exist?(path) }
188
+ docs_content = File.exist?(docs_path) ? File.read(docs_path) : nil
189
+ guidance_contents = existing_root_guidance.to_h { |path| [path, File.read(path)] }
190
+
191
+ unless existing_root_guidance.any? || docs_content
192
+ return check(
193
+ id: "agent-engineering-guidance",
194
+ kind: "health",
195
+ status: PASS,
196
+ message: "Agent engineering guidance not installed"
197
+ )
198
+ end
199
+
200
+ findings = []
201
+ guidance_contents.each do |path, content|
202
+ next if content.include?(COST_BIAS_MARKER)
203
+
204
+ findings << "#{File.basename(path)} lacks #{COST_BIAS_MARKER}"
205
+ end
206
+
207
+ if docs_content.nil?
208
+ findings << "docs/tools.md is missing"
209
+ elsif !docs_content.include?(AGENT_ENGINEERING_HEADING)
210
+ findings << "docs/tools.md lacks #{AGENT_ENGINEERING_HEADING}"
211
+ end
212
+
213
+ if guidance_contents.values.any? { |content| content.include?(AGENT_ENGINEERING_ANCHOR) } &&
214
+ (!docs_content || !docs_content.include?(AGENT_ENGINEERING_HEADING))
215
+ findings << "root guidance links #{AGENT_ENGINEERING_ANCHOR} but the anchor target is absent"
216
+ end
217
+
218
+ if findings.empty?
219
+ return check(
220
+ id: "agent-engineering-guidance",
221
+ kind: "health",
222
+ status: PASS,
223
+ message: "Agent engineering guidance is present"
224
+ )
225
+ end
226
+
227
+ check(
228
+ id: "agent-engineering-guidance",
229
+ kind: "health",
230
+ status: WARN,
231
+ message: "Agent engineering guidance is incomplete",
232
+ next_action: AGENT_ENGINEERING_NEXT_ACTION,
233
+ details: findings.uniq
234
+ )
235
+ rescue => e
236
+ check(
237
+ id: "agent-engineering-guidance",
238
+ kind: "health",
239
+ status: WARN,
240
+ message: "Agent engineering guidance check failed: #{e.message}",
241
+ next_action: AGENT_ENGINEERING_NEXT_ACTION
242
+ )
243
+ end
244
+
245
+ def check_skill_sync
246
+ out, err, status = Open3.capture3("ace-handbook", "status", "--format", "json")
247
+ unless status.success?
248
+ return check(
249
+ id: "skill-sync",
250
+ kind: "health",
251
+ status: WARN,
252
+ message: "Provider skill sync check failed",
253
+ next_action: "Run ace-handbook status to inspect provider skill projections.",
254
+ details: [err.to_s.strip, out.to_s.strip].reject(&:empty?)
255
+ )
256
+ end
257
+
258
+ snapshot = JSON.parse(out)
259
+ providers = Array(snapshot["providers"])
260
+ checked_providers = providers.select { |entry| entry.fetch("enabled", true) }
261
+ drifted = checked_providers.select do |entry|
262
+ entry.fetch("missing", 0).to_i.positive? ||
263
+ entry.fetch("outdated", 0).to_i.positive? ||
264
+ entry.fetch("extra", 0).to_i.positive?
265
+ end
266
+
267
+ if drifted.empty?
268
+ total_skills = snapshot.dig("canonical", "total").to_i
269
+ return check(
270
+ id: "skill-sync",
271
+ kind: "health",
272
+ status: PASS,
273
+ message: "Provider skills are in sync (#{checked_providers.length} providers, #{total_skills} skills)",
274
+ skill_sync: {providers: checked_providers, canonical_total: total_skills}
275
+ )
276
+ end
277
+
278
+ check(
279
+ id: "skill-sync",
280
+ kind: "health",
281
+ status: WARN,
282
+ message: "Provider skill sync drift detected (#{drifted.length}/#{checked_providers.length} providers)",
283
+ next_action: "Run ace-handbook sync to refresh provider-native skills.",
284
+ details: drifted.map { |entry| skill_sync_detail(entry) },
285
+ skill_sync: {providers: checked_providers, drifted: drifted}
286
+ )
287
+ rescue Errno::ENOENT
288
+ check(
289
+ id: "skill-sync",
290
+ kind: "health",
291
+ status: WARN,
292
+ message: "Provider skill sync check unavailable: ace-handbook command is missing",
293
+ next_action: "Install ace-handbook, then rerun ace-config doctor."
294
+ )
295
+ rescue JSON::ParserError => e
296
+ check(
297
+ id: "skill-sync",
298
+ kind: "health",
299
+ status: WARN,
300
+ message: "Provider skill sync check returned invalid JSON: #{e.message}",
301
+ next_action: "Run ace-handbook status --format json to inspect provider skill projections."
302
+ )
303
+ rescue => e
304
+ check(
305
+ id: "skill-sync",
306
+ kind: "health",
307
+ status: WARN,
308
+ message: "Provider skill sync check failed: #{e.message}",
309
+ next_action: "Run ace-handbook status to inspect provider skill projections."
310
+ )
311
+ end
312
+
313
+ def check_alias_hygiene(provider_context)
314
+ unless provider_context
315
+ return check(
316
+ id: "alias-readiness",
317
+ kind: "hygiene",
318
+ status: WARN,
319
+ message: "Alias readiness check skipped: provider context unavailable",
320
+ next_action: "Ensure ace-llm is installed and provider discovery succeeds."
321
+ )
322
+ end
323
+
324
+ stale = find_stale_aliases(provider_context)
325
+ if stale.empty?
326
+ return check(
327
+ id: "alias-readiness",
328
+ kind: "hygiene",
329
+ status: PASS,
330
+ message: "Configured model aliases resolve"
331
+ )
332
+ end
333
+
334
+ check(
335
+ id: "alias-readiness",
336
+ kind: "hygiene",
337
+ status: WARN,
338
+ message: "Unsupported alias mappings detected (#{stale.length})",
339
+ next_action: "Update aliases to declared provider models.",
340
+ details: stale.map { |item| "#{item[:provider]}:#{item[:alias]} -> #{item[:resolved]}" }
341
+ )
342
+ rescue => e
343
+ check(
344
+ id: "alias-readiness",
345
+ kind: "hygiene",
346
+ status: WARN,
347
+ message: "Alias readiness check failed: #{e.message}",
348
+ next_action: "Review llm/providers alias configuration."
349
+ )
350
+ end
351
+
352
+ def check_role_health(provider_context)
353
+ unless provider_context
354
+ return check(
355
+ id: "role-defaults",
356
+ kind: "health",
357
+ status: WARN,
358
+ message: "Role default readiness skipped: provider context unavailable",
359
+ next_action: "Ensure ace-llm is installed and provider discovery succeeds."
360
+ )
361
+ end
362
+
363
+ registry = provider_context[:registry]
364
+ role_config = load_role_config
365
+ roles = CORE_ROLES
366
+ problems = []
367
+ targets = []
368
+
369
+ roles.each do |role|
370
+ candidates = role_config.candidates_for(role)
371
+ unless candidates
372
+ problems << "role:#{role} is referenced but not defined"
373
+ next
374
+ end
375
+
376
+ validations = candidates.first(2).map { |candidate| validate_role_candidate(role, candidate, registry) }
377
+ targets.concat(validations.filter_map { |item| item[:target] if item[:status] == PASS })
378
+
379
+ unless validations.any? { |item| item[:status] == PASS }
380
+ problems.concat(validations.reject { |item| item[:status] == PASS }.map { |item| item[:message] })
381
+ problems << "role:#{role} has no ready provider in its first two candidates"
382
+ end
383
+ end
384
+
385
+ if problems.any?
386
+ return check(
387
+ id: "role-defaults",
388
+ status: BLOCKER,
389
+ kind: "health",
390
+ message: "Core role readiness failed (#{problems.uniq.length})",
391
+ next_action: "Update core llm.roles so setup workflows have a usable model path.",
392
+ details: problems.uniq,
393
+ targets: dedupe_targets(targets)
394
+ )
395
+ end
396
+
397
+ check(
398
+ id: "role-defaults",
399
+ kind: "health",
400
+ status: PASS,
401
+ message: "Core role defaults resolve",
402
+ targets: dedupe_targets(targets)
403
+ )
404
+ rescue => e
405
+ check(
406
+ id: "role-defaults",
407
+ kind: "health",
408
+ status: WARN,
409
+ message: "Role default readiness check failed: #{e.message}",
410
+ next_action: "Review llm.roles and provider configuration."
411
+ )
412
+ end
413
+
414
+ def check_role_hygiene(provider_context)
415
+ unless provider_context
416
+ return check(
417
+ id: "role-hygiene",
418
+ kind: "hygiene",
419
+ status: WARN,
420
+ message: "Role hygiene skipped: provider context unavailable",
421
+ next_action: "Ensure ace-llm is installed and provider discovery succeeds."
422
+ )
423
+ end
424
+
425
+ registry = provider_context[:registry]
426
+ role_config = load_role_config
427
+ findings = []
428
+
429
+ used_role_names(role_config).each do |role|
430
+ candidates = role_config.candidates_for(role)
431
+ unless candidates
432
+ findings << "role:#{role} is referenced but not defined"
433
+ next
434
+ end
435
+
436
+ candidates.first(2).each do |candidate|
437
+ validation = validate_role_candidate(role, candidate, registry)
438
+ findings << validation[:message] unless validation[:status] == PASS
439
+ end
440
+ end
441
+
442
+ if findings.any?
443
+ return check(
444
+ id: "role-hygiene",
445
+ kind: "hygiene",
446
+ status: WARN,
447
+ message: "Role/default hygiene findings detected (#{findings.uniq.length})",
448
+ next_action: "Review llm.roles and update stale or misspelled role references.",
449
+ details: findings.uniq
450
+ )
451
+ end
452
+
453
+ check(
454
+ id: "role-hygiene",
455
+ kind: "hygiene",
456
+ status: PASS,
457
+ message: "Role/default hygiene looks clean"
458
+ )
459
+ rescue => e
460
+ check(
461
+ id: "role-hygiene",
462
+ kind: "hygiene",
463
+ status: WARN,
464
+ message: "Role hygiene check failed: #{e.message}",
465
+ next_action: "Review llm.roles and provider configuration."
466
+ )
467
+ end
468
+
469
+ def check_probe_readiness(provider_context, no_probe:, probe:, role_targets:, structural_blockers:, progress_io: nil)
470
+ if no_probe
471
+ return check(
472
+ id: "probe-readiness",
473
+ kind: "health",
474
+ status: SKIP,
475
+ message: "Live provider probes disabled by --no-probe"
476
+ )
477
+ end
478
+ if structural_blockers
479
+ return check(
480
+ id: "probe-readiness",
481
+ kind: "health",
482
+ status: SKIP,
483
+ message: "Live provider probes skipped because setup blockers exist",
484
+ next_action: "Fix blocker checks, then rerun ace-config doctor --probe."
485
+ )
486
+ end
487
+ unless provider_context
488
+ return check(
489
+ id: "probe-readiness",
490
+ kind: "health",
491
+ status: WARN,
492
+ message: "Probe readiness skipped: provider context unavailable",
493
+ next_action: "Verify provider discovery first."
494
+ )
495
+ end
496
+
497
+ targets = order_probe_targets(dedupe_targets(role_targets), provider_context)
498
+ if targets.empty?
499
+ return check(
500
+ id: "probe-readiness",
501
+ kind: "health",
502
+ status: WARN,
503
+ message: "No resolved utility provider targets available for probes",
504
+ next_action: "Define llm.roles._utility or llm.roles.commit, then rerun ace-config doctor."
505
+ )
506
+ end
507
+
508
+ progress = provider_progress(progress_io, targets)
509
+ progress&.start
510
+
511
+ outcomes = run_probe_targets(targets, progress: progress)
512
+ pass_count = outcomes.count { |outcome| outcome[:status] == PASS }
513
+ total_count = outcomes.length
514
+ details = progress_io ? [] : ping_detail_lines(outcomes)
515
+ if pass_count == total_count && total_count.positive?
516
+ return check(
517
+ id: "probe-readiness",
518
+ kind: "health",
519
+ status: PASS,
520
+ message: "Utility provider pings completed (#{pass_count}/#{total_count} passed)",
521
+ details: details,
522
+ outcomes: outcomes
523
+ )
524
+ end
525
+
526
+ if pass_count.positive?
527
+ return check(
528
+ id: "probe-readiness",
529
+ kind: "health",
530
+ status: WARN,
531
+ message: "Utility provider pings partially completed (#{pass_count}/#{total_count} passed)",
532
+ details: details,
533
+ outcomes: outcomes,
534
+ next_action: "At least one utility provider works; inspect failed providers if you need full redundancy."
535
+ )
536
+ end
537
+
538
+ next_actions = outcomes.filter_map { |o| o[:next_action] }.uniq
539
+ check(
540
+ id: "probe-readiness",
541
+ kind: "health",
542
+ status: WARN,
543
+ message: "Utility provider pings failed (0/#{total_count} passed)",
544
+ next_action: next_actions.first || "Authenticate at least one provider and rerun ace-config doctor.",
545
+ outcomes: outcomes,
546
+ details: details.empty? ? next_actions : details
547
+ )
548
+ end
549
+
550
+ def run_probe_targets(targets, progress: nil)
551
+ outcomes = Array.new(targets.length)
552
+ queue = Queue.new
553
+ targets.each_with_index { |target, index| queue << [index, target] }
554
+
555
+ threads = targets.length.times.map do
556
+ Thread.new do
557
+ loop do
558
+ index, target = queue.pop(true)
559
+ outcome = run_probe_target(target)
560
+ outcomes[index] = outcome
561
+ progress&.finish(index, outcome)
562
+ rescue ThreadError
563
+ break
564
+ end
565
+ end
566
+ end
567
+ threads.each(&:join)
568
+
569
+ outcomes.compact
570
+ end
571
+
572
+ def run_probe_target(target)
573
+ selector = target[:selector] || [target[:provider], target[:model]].compact.join(":")
574
+ label = target[:label] || selector
575
+ timeout_seconds = target[:timeout_seconds] || 15
576
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
577
+ out, err, status = Open3.capture3(
578
+ "ace-llm",
579
+ label.to_s,
580
+ "ping",
581
+ "--no-fallback",
582
+ "--quiet",
583
+ "--timeout",
584
+ timeout_seconds.to_s,
585
+ "--max-tokens",
586
+ "4"
587
+ )
588
+ if status.success?
589
+ {
590
+ status: PASS,
591
+ provider: target[:provider].to_s,
592
+ label: label.to_s,
593
+ selector: selector.to_s,
594
+ provider_kind: target[:provider_kind],
595
+ timeout_seconds: timeout_seconds,
596
+ elapsed_ms: elapsed_ms(started_at)
597
+ }
598
+ else
599
+ failure_text = "#{out}\n#{err}"
600
+ {
601
+ status: WARN,
602
+ provider: target[:provider].to_s,
603
+ label: label.to_s,
604
+ selector: selector.to_s,
605
+ provider_kind: target[:provider_kind],
606
+ timeout_seconds: timeout_seconds,
607
+ failure_type: timeout_error?(failure_text) ? "timeout" : "error",
608
+ next_action: ping_next_action(failure_text, selector: selector)
609
+ }
610
+ end
611
+ rescue => e
612
+ {
613
+ status: WARN,
614
+ provider: target[:provider].to_s,
615
+ label: target[:label].to_s,
616
+ selector: selector.to_s,
617
+ provider_kind: target[:provider_kind],
618
+ timeout_seconds: timeout_seconds,
619
+ failure_type: timeout_error?(e.message) ? "timeout" : "error",
620
+ next_action: ping_next_action(e.message, selector: selector)
621
+ }
622
+ end
623
+
624
+ def elapsed_ms(started_at)
625
+ ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at) * 1000).round
626
+ end
627
+
628
+ def ping_detail_lines(outcomes)
629
+ outcomes.map do |outcome|
630
+ elapsed = outcome[:elapsed_ms] ? " in #{outcome[:elapsed_ms]}ms" : ""
631
+ target = target_display(outcome)
632
+ if outcome[:status] == PASS
633
+ "#{target} responded#{elapsed}"
634
+ elsif outcome[:failure_type] == "timeout"
635
+ "#{target} timed out after #{outcome[:timeout_seconds]}s"
636
+ else
637
+ "#{target} failed"
638
+ end
639
+ end
640
+ end
641
+
642
+ def provider_progress(io, targets)
643
+ return nil unless io
644
+
645
+ ProviderProgress.new(self, io, targets)
646
+ end
647
+
648
+ def ping_next_action(text, selector: nil)
649
+ if auth_related_error?(text)
650
+ "Authenticate at least one utility provider and rerun."
651
+ else
652
+ target = selector ? " #{selector}" : ""
653
+ "Run ace-llm#{target} \"ping\" --no-fallback to inspect provider setup."
654
+ end
655
+ end
656
+
657
+ def load_provider_context
658
+ require "ace/llm"
659
+ require "ace/llm/molecules/client_registry"
660
+
661
+ registry = Ace::LLM::Molecules::ClientRegistry.new
662
+ {
663
+ registry: registry,
664
+ providers: registry.list_providers_with_status,
665
+ aliases: registry.available_aliases
666
+ }
667
+ rescue LoadError, StandardError
668
+ nil
669
+ end
670
+
671
+ def load_role_config
672
+ require "ace/llm"
673
+ require "ace/llm/models/role_config"
674
+
675
+ Ace::LLM::Models::RoleConfig.from_hash(Ace::LLM.configuration.get("llm.roles"))
676
+ end
677
+
678
+ def used_role_names(_role_config)
679
+ (CORE_ROLES + role_references_from_config_files).uniq.sort
680
+ end
681
+
682
+ def utility_provider_targets(provider_context)
683
+ return [] unless provider_context
684
+
685
+ registry = provider_context[:registry]
686
+ role_config = load_role_config
687
+ utility_role = UTILITY_ROLE_GROUPS.find { |role| role_config.candidates_for(role) }
688
+ candidates = Array(role_config.candidates_for(utility_role)) + Array(role_config.candidates_for("commit"))
689
+ candidates.filter_map do |candidate|
690
+ parse_role_provider_target("utility", candidate, registry)
691
+ end
692
+ rescue
693
+ []
694
+ end
695
+
696
+ def parse_role_provider_target(role, candidate, registry)
697
+ require "ace/llm"
698
+ require "ace/llm/molecules/llm_alias_resolver"
699
+ require "ace/llm/molecules/provider_model_parser"
700
+
701
+ alias_resolver = Ace::LLM::Molecules::LlmAliasResolver.new(registry: registry)
702
+ parser = Ace::LLM::Molecules::ProviderModelParser.new(alias_resolver: alias_resolver, registry: registry)
703
+ parsed = parser.parse(candidate.to_s)
704
+ return nil if parsed.invalid?
705
+
706
+ provider = parsed.provider.to_s
707
+ model = parsed.model.to_s
708
+ selector = model.empty? ? provider : "#{provider}:#{model}"
709
+ {role: role.to_s, provider: provider, model: model, selector: selector, label: candidate.to_s}
710
+ end
711
+
712
+ def order_probe_targets(targets, provider_context)
713
+ targets.map.with_index do |target, index|
714
+ provider_kind = provider_kind(target[:provider], provider_context)
715
+ timeout_seconds = (provider_kind == "cli") ? 30 : 15
716
+ target.merge(provider_kind: provider_kind, timeout_seconds: timeout_seconds, _order: index)
717
+ end.sort_by do |target|
718
+ [(target[:provider_kind] == "api") ? 0 : 1, target[:_order]]
719
+ end.map { |target| target.reject { |key, _| key == :_order } }
720
+ end
721
+
722
+ def provider_kind(provider_name, provider_context)
723
+ registry = provider_context&.fetch(:registry, nil)
724
+ provider_config = registry&.respond_to?(:get_provider) ? registry.get_provider(provider_name) : nil
725
+ klass = provider_config&.fetch("class", "").to_s
726
+ gem_name = provider_config&.fetch("gem", "").to_s
727
+ return "cli" if gem_name == PROVIDER_GEM || klass.include?("Providers::CLI")
728
+
729
+ "api"
730
+ rescue
731
+ "api"
732
+ end
733
+
734
+ def role_references_from_config_files
735
+ config_files.flat_map do |path|
736
+ data = YAML.safe_load_file(path, aliases: true)
737
+ extract_role_references(data)
738
+ rescue Psych::Exception, Errno::ENOENT, Errno::EACCES
739
+ []
740
+ end
741
+ end
742
+
743
+ def config_files
744
+ root = project_root
745
+ patterns = [
746
+ File.join(root, ".ace", "**", "*.yml"),
747
+ File.join(root, ".ace", "**", "*.yaml"),
748
+ File.join(root, "*", ".ace-defaults", "**", "*.yml"),
749
+ File.join(root, "*", ".ace-defaults", "**", "*.yaml")
750
+ ]
751
+ patterns.flat_map { |pattern| Dir.glob(pattern) }.select { |path| File.file?(path) }.uniq.sort
752
+ end
753
+
754
+ def extract_role_references(value)
755
+ case value
756
+ when Hash
757
+ value.values.flat_map { |nested| extract_role_references(nested) }
758
+ when Array
759
+ value.flat_map { |nested| extract_role_references(nested) }
760
+ when String
761
+ value.scan(ROLE_REFERENCE_PATTERN).flatten
762
+ else
763
+ []
764
+ end
765
+ end
766
+
767
+ def validate_role_candidate(role, candidate, registry)
768
+ require "ace/llm"
769
+ require "ace/llm/molecules/llm_alias_resolver"
770
+ require "ace/llm/molecules/provider_model_parser"
771
+
772
+ alias_resolver = Ace::LLM::Molecules::LlmAliasResolver.new(registry: registry)
773
+ parser = Ace::LLM::Molecules::ProviderModelParser.new(alias_resolver: alias_resolver, registry: registry)
774
+ parsed = parser.parse(candidate.to_s)
775
+ if parsed.invalid?
776
+ return {
777
+ status: BLOCKER,
778
+ message: "role:#{role} candidate #{candidate} is invalid: #{parsed.error}"
779
+ }
780
+ end
781
+
782
+ provider = parsed.provider.to_s
783
+ model = parsed.model.to_s
784
+ unless Array(registry.models_for_provider(provider)).map(&:to_s).include?(model)
785
+ return {
786
+ status: BLOCKER,
787
+ message: "role:#{role} candidate #{candidate} resolves to unsupported model #{provider}:#{model}"
788
+ }
789
+ end
790
+
791
+ target = {role: role.to_s, provider: provider, model: model, selector: "#{provider}:#{model}"}
792
+ unless registry.provider_available?(provider)
793
+ return {
794
+ status: WARN,
795
+ message: "role:#{role} candidate #{candidate} provider #{provider} is unavailable",
796
+ target: target
797
+ }
798
+ end
799
+
800
+ if registry.provider_api_key_required?(provider) && !registry.provider_api_key_present?(provider)
801
+ return {
802
+ status: WARN,
803
+ message: "role:#{role} candidate #{candidate} provider #{provider} is missing credentials",
804
+ target: target
805
+ }
806
+ end
807
+
808
+ {status: PASS, target: target}
809
+ end
810
+
811
+ def dedupe_targets(targets)
812
+ seen = {}
813
+ targets.each_with_object([]) do |target, result|
814
+ provider = target[:provider].to_s
815
+ next if seen[provider]
816
+
817
+ seen[provider] = true
818
+ result << target
819
+ end
820
+ end
821
+
822
+ def target_display(target)
823
+ label = target[:label].to_s
824
+ selector = target[:selector].to_s
825
+ label = selector if label.empty?
826
+ selector.empty? || selector == label ? label : "#{label} (#{selector})"
827
+ end
828
+
829
+ def format_probe_line(target, status:, color: false, elapsed_ms: nil)
830
+ glyph = status_glyph(status)
831
+ glyph = colorize(glyph, status) if color
832
+ suffix = if status == PASS && elapsed_ms
833
+ " in #{elapsed_ms}ms"
834
+ elsif status == WARN && target[:failure_type] == "timeout"
835
+ " timed out after #{target[:timeout_seconds]}s"
836
+ elsif status == WARN
837
+ " failed"
838
+ else
839
+ ""
840
+ end
841
+ "#{glyph} #{target_display(target)}#{suffix}"
842
+ end
843
+
844
+ def status_glyph(status)
845
+ STATUS_GLYPHS.fetch(status, STATUS_GLYPHS[WARN])
846
+ end
847
+
848
+ def colorize(value, status)
849
+ "#{STATUS_COLORS.fetch(status, "")}#{value}#{ANSI_RESET}"
850
+ end
851
+
852
+ class ProviderProgress
853
+ def initialize(doctor, io, targets)
854
+ @doctor = doctor
855
+ @io = io
856
+ @targets = targets
857
+ @tty = io.respond_to?(:tty?) && io.tty?
858
+ @line_count = 0
859
+ @mutex = Mutex.new
860
+ end
861
+
862
+ def start
863
+ @io.puts "RUN Utility provider pings running (0/#{@targets.length} passed)"
864
+ @targets.each do |target|
865
+ @io.puts " #{format_line(target, status: "running")}"
866
+ end
867
+ @line_count = @targets.length
868
+ flush
869
+ end
870
+
871
+ def finish(index, outcome)
872
+ @mutex.synchronize do
873
+ if @tty
874
+ rewrite_line(index, outcome)
875
+ else
876
+ @io.puts " #{format_line(outcome, status: outcome[:status], elapsed_ms: outcome[:elapsed_ms])}"
877
+ end
878
+ flush
879
+ end
880
+ end
881
+
882
+ private
883
+
884
+ def rewrite_line(index, outcome)
885
+ return append_line(outcome) if @line_count.zero?
886
+
887
+ up = @line_count - index
888
+ @io.print "\e[#{up}A" if up.positive?
889
+ @io.print "\r\e[2K #{format_line(outcome, status: outcome[:status], elapsed_ms: outcome[:elapsed_ms])}\n"
890
+ down = up - 1
891
+ @io.print "\e[#{down}B" if down.positive?
892
+ end
893
+
894
+ def append_line(outcome)
895
+ @io.puts " #{format_line(outcome, status: outcome[:status], elapsed_ms: outcome[:elapsed_ms])}"
896
+ end
897
+
898
+ def format_line(target, status:, elapsed_ms: nil)
899
+ @doctor.send(:format_probe_line, target, status: status, color: @tty, elapsed_ms: elapsed_ms)
900
+ end
901
+
902
+ def flush
903
+ @io.flush if @io.respond_to?(:flush)
904
+ end
905
+ end
906
+
907
+ def find_stale_aliases(provider_context)
908
+ providers = provider_context[:providers]
909
+ aliases = provider_context[:aliases] || {}
910
+ provider_models = providers.to_h do |provider|
911
+ [provider[:name].to_s, Array(provider[:models]).map(&:to_s)]
912
+ end
913
+
914
+ stale = []
915
+
916
+ model_aliases = aliases[:model] || aliases["model"] || {}
917
+ model_aliases.each do |provider_name, mapping|
918
+ (mapping || {}).each do |alias_name, model_name|
919
+ next if valid_provider_model_target?(provider_models, provider_name, model_name)
920
+
921
+ stale << {provider: provider_name.to_s, alias: alias_name.to_s, resolved: model_name.to_s}
922
+ end
923
+ end
924
+
925
+ global_aliases = aliases[:global] || aliases["global"] || {}
926
+ registry = provider_context[:registry]
927
+ global_aliases.each do |alias_name, target|
928
+ provider_name, model_name = parse_provider_model_target(resolve_alias_target(registry, target))
929
+ next unless provider_name && model_name
930
+ next if valid_provider_model_target?(provider_models, provider_name, model_name)
931
+
932
+ stale << {provider: provider_name, alias: alias_name.to_s, resolved: model_name}
933
+ end
934
+
935
+ stale
936
+ end
937
+
938
+ def append_check(checks, check_row, stream: false, io: nil, skip_ids: [])
939
+ checks << check_row
940
+ output_progress_check(check_row, io: io) if stream && io && !Array(skip_ids).include?(check_row[:id])
941
+ check_row
942
+ end
943
+
944
+ def output_progress_check(check_row, io:)
945
+ return if check_row[:kind] == "hygiene"
946
+
947
+ io.puts "#{check_row[:status].upcase} #{check_row[:message]}"
948
+ flush_io(io)
949
+ end
950
+
951
+ def build_summary(checks)
952
+ health_checks = checks.select { |check_row| check_row[:kind] == "health" }
953
+ info_checks = checks.select { |check_row| check_row[:kind] == "info" }
954
+ hygiene_checks = checks.select { |check_row| check_row[:kind] == "hygiene" }
955
+ {
956
+ generated_at: Time.now.utc.iso8601,
957
+ blocker_count: health_checks.count { |check_row| check_row[:status] == BLOCKER },
958
+ warning_count: health_checks.count { |check_row| check_row[:status] == WARN },
959
+ info_count: info_checks.length,
960
+ health: {
961
+ blocker_count: health_checks.count { |check_row| check_row[:status] == BLOCKER },
962
+ warning_count: health_checks.count { |check_row| check_row[:status] == WARN }
963
+ },
964
+ info: {
965
+ count: info_checks.length
966
+ },
967
+ hygiene: {
968
+ finding_count: hygiene_finding_count(hygiene_checks),
969
+ warning_count: hygiene_checks.count { |check_row| check_row[:status] == WARN },
970
+ blocker_count: hygiene_checks.count { |check_row| check_row[:status] == BLOCKER }
971
+ },
972
+ checks: checks
973
+ }
974
+ end
975
+
976
+ def build_stats(checks)
977
+ health_checks = checks.select { |check_row| check_row[:kind] == "health" }
978
+ info_checks = checks.select { |check_row| check_row[:kind] == "info" }
979
+ probe_check = health_checks.find { |check_row| check_row[:id] == "probe-readiness" }
980
+ provider_outcomes = Array(probe_check&.fetch(:outcomes, []))
981
+ config_check = info_checks.find { |check_row| check_row[:id] == "config-defaults" }
982
+ skill_sync_check = health_checks.find { |check_row| check_row[:id] == "skill-sync" }
983
+ {
984
+ health_checks: health_checks.length,
985
+ info_checks: info_checks.length,
986
+ provider_targets: provider_outcomes.length,
987
+ provider_passed: provider_outcomes.count { |outcome| outcome[:status] == PASS },
988
+ hygiene_findings: hygiene_finding_count(checks.select { |check_row| check_row[:kind] == "hygiene" }),
989
+ config_defaults: config_check&.fetch(:summary, nil),
990
+ skill_sync: skill_sync_check&.fetch(:skill_sync, nil)
991
+ }
992
+ end
993
+
994
+ def health_blocking?(checks)
995
+ checks.any? { |check_row| check_row[:kind] == "health" && check_row[:status] == BLOCKER }
996
+ end
997
+
998
+ def flush_io(io)
999
+ io.flush if io.respond_to?(:flush)
1000
+ end
1001
+
1002
+ def hygiene_finding_count(checks)
1003
+ checks.sum do |check_row|
1004
+ details = Array(check_row[:details])
1005
+ if details.any?
1006
+ details.length
1007
+ elsif check_row[:status] == PASS
1008
+ 0
1009
+ else
1010
+ 1
1011
+ end
1012
+ end
1013
+ end
1014
+
1015
+ def check(id:, kind: "health", status:, message:, next_action: nil, **extra)
1016
+ {id: id, kind: kind, status: status, message: message, next_action: next_action}.merge(extra)
1017
+ end
1018
+
1019
+ def discovery_next_action(stderr_output)
1020
+ text = stderr_output.to_s
1021
+ if text.include?(PROVIDER_GEM)
1022
+ "Install #{PROVIDER_GEM} and rerun ace-llm --list-providers."
1023
+ else
1024
+ "Run ace-llm --list-providers to inspect provider setup, then rerun ace-config doctor."
1025
+ end
1026
+ end
1027
+
1028
+ def auth_related_error?(text)
1029
+ value = text.to_s.downcase
1030
+ value.include?("credential") ||
1031
+ value.include?("api key") ||
1032
+ value.include?("auth") ||
1033
+ value.include?("login")
1034
+ end
1035
+
1036
+ def timeout_error?(text)
1037
+ value = text.to_s.downcase
1038
+ value.include?("timed out") || value.include?("timeout")
1039
+ end
1040
+
1041
+ def collect_config_defaults_summary
1042
+ root = project_root
1043
+ details = []
1044
+ customized = 0
1045
+ default = 0
1046
+ files = 0
1047
+
1048
+ Models::ConfigTemplates.all_gems.each do |gem_name|
1049
+ source_dir = Models::ConfigTemplates.example_dir_for(gem_name)
1050
+ next unless source_dir && Dir.exist?(source_dir)
1051
+
1052
+ gem_files = Dir.glob(File.join(source_dir, "**", "*")).select { |path| File.file?(path) }
1053
+ next if gem_files.empty?
1054
+
1055
+ gem_customized = 0
1056
+ gem_default = 0
1057
+ gem_files.each do |source_file|
1058
+ relative = Pathname.new(source_file).relative_path_from(Pathname.new(source_dir)).to_s
1059
+ target_file = File.join(root, ".ace", relative)
1060
+ files += 1
1061
+ if File.exist?(target_file) && File.read(source_file) != File.read(target_file)
1062
+ customized += 1
1063
+ gem_customized += 1
1064
+ else
1065
+ default += 1
1066
+ gem_default += 1
1067
+ end
1068
+ rescue
1069
+ default += 1
1070
+ gem_default += 1
1071
+ end
1072
+
1073
+ details << "#{gem_name}: #{gem_customized} customized, #{gem_default} default"
1074
+ end
1075
+
1076
+ {files: files, customized: customized, default: default, details: details}
1077
+ end
1078
+
1079
+ def skill_sync_detail(entry)
1080
+ provider = entry.fetch("provider")
1081
+ expected = entry.fetch("expected", 0).to_i
1082
+ in_sync = entry.fetch("in_sync", 0).to_i
1083
+ missing = entry.fetch("missing", 0).to_i
1084
+ outdated = entry.fetch("outdated", 0).to_i
1085
+ extra = entry.fetch("extra", 0).to_i
1086
+ "#{provider}: #{in_sync}/#{expected} in sync, #{missing} missing, #{outdated} outdated, #{extra} extra"
1087
+ end
1088
+
1089
+ def project_root
1090
+ Ace::Support::Config.find_project_root(start_path: Dir.pwd) || Dir.pwd
1091
+ end
1092
+
1093
+ def gitignore_entry_present?(content, entry)
1094
+ target = canonical_ignore_token(entry)
1095
+ content.each_line.any? do |line|
1096
+ normalized = canonical_ignore_token(line)
1097
+ next false if normalized.nil?
1098
+
1099
+ normalized == target || normalized.start_with?("#{target}/")
1100
+ end
1101
+ end
1102
+
1103
+ def valid_provider_model_target?(provider_models, provider_name, model_name)
1104
+ expected = provider_models[provider_name.to_s] || []
1105
+ !expected.empty? && expected.include?(model_name.to_s)
1106
+ end
1107
+
1108
+ def resolve_alias_target(registry, target)
1109
+ value = target.to_s
1110
+ return value unless registry&.respond_to?(:resolve_alias)
1111
+
1112
+ registry.resolve_alias(value).to_s
1113
+ rescue StandardError
1114
+ value
1115
+ end
1116
+
1117
+ def parse_provider_model_target(value)
1118
+ parts = value.to_s.split(":", 2)
1119
+ return [nil, nil] if parts.length != 2
1120
+
1121
+ provider, model = parts
1122
+ return [nil, nil] if provider.to_s.empty? || model.to_s.empty?
1123
+
1124
+ [provider.to_s, model.to_s]
1125
+ end
1126
+
1127
+ def canonical_ignore_token(value)
1128
+ token = value.to_s.strip
1129
+ return nil if token.empty? || token.start_with?("#") || token.start_with?("!")
1130
+
1131
+ token = token.delete_prefix("/")
1132
+ token = token.delete_suffix("/")
1133
+ token
1134
+ end
1135
+ end
1136
+ end
1137
+ end
1138
+ end
1139
+ end