aidp 0.32.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +35 -0
  3. data/lib/aidp/analyze/feature_analyzer.rb +322 -320
  4. data/lib/aidp/analyze/tree_sitter_scan.rb +3 -0
  5. data/lib/aidp/auto_update/coordinator.rb +97 -7
  6. data/lib/aidp/auto_update.rb +0 -12
  7. data/lib/aidp/cli/devcontainer_commands.rb +0 -5
  8. data/lib/aidp/cli/eval_command.rb +399 -0
  9. data/lib/aidp/cli/harness_command.rb +1 -1
  10. data/lib/aidp/cli/security_command.rb +416 -0
  11. data/lib/aidp/cli/tools_command.rb +6 -4
  12. data/lib/aidp/cli.rb +172 -4
  13. data/lib/aidp/comment_consolidator.rb +78 -0
  14. data/lib/aidp/concurrency/exec.rb +3 -0
  15. data/lib/aidp/concurrency.rb +0 -3
  16. data/lib/aidp/config.rb +113 -1
  17. data/lib/aidp/config_paths.rb +91 -0
  18. data/lib/aidp/daemon/runner.rb +8 -4
  19. data/lib/aidp/errors.rb +134 -0
  20. data/lib/aidp/evaluations/context_capture.rb +205 -0
  21. data/lib/aidp/evaluations/evaluation_record.rb +114 -0
  22. data/lib/aidp/evaluations/evaluation_storage.rb +250 -0
  23. data/lib/aidp/evaluations.rb +23 -0
  24. data/lib/aidp/execute/async_work_loop_runner.rb +4 -1
  25. data/lib/aidp/execute/interactive_repl.rb +6 -2
  26. data/lib/aidp/execute/prompt_evaluator.rb +359 -0
  27. data/lib/aidp/execute/repl_macros.rb +100 -1
  28. data/lib/aidp/execute/work_loop_runner.rb +719 -58
  29. data/lib/aidp/execute/work_loop_state.rb +4 -1
  30. data/lib/aidp/execute/workflow_selector.rb +3 -0
  31. data/lib/aidp/harness/ai_decision_engine.rb +79 -0
  32. data/lib/aidp/harness/ai_filter_factory.rb +285 -0
  33. data/lib/aidp/harness/capability_registry.rb +2 -0
  34. data/lib/aidp/harness/condition_detector.rb +3 -0
  35. data/lib/aidp/harness/config_loader.rb +3 -0
  36. data/lib/aidp/harness/config_schema.rb +97 -1
  37. data/lib/aidp/harness/config_validator.rb +1 -1
  38. data/lib/aidp/harness/configuration.rb +61 -5
  39. data/lib/aidp/harness/enhanced_runner.rb +14 -11
  40. data/lib/aidp/harness/error_handler.rb +3 -0
  41. data/lib/aidp/harness/filter_definition.rb +212 -0
  42. data/lib/aidp/harness/generated_filter_strategy.rb +197 -0
  43. data/lib/aidp/harness/output_filter.rb +50 -25
  44. data/lib/aidp/harness/output_filter_config.rb +129 -0
  45. data/lib/aidp/harness/provider_factory.rb +3 -0
  46. data/lib/aidp/harness/provider_manager.rb +96 -2
  47. data/lib/aidp/harness/runner.rb +5 -12
  48. data/lib/aidp/harness/state/persistence.rb +3 -0
  49. data/lib/aidp/harness/state_manager.rb +3 -0
  50. data/lib/aidp/harness/status_display.rb +28 -20
  51. data/lib/aidp/harness/test_runner.rb +179 -41
  52. data/lib/aidp/harness/thinking_depth_manager.rb +44 -28
  53. data/lib/aidp/harness/ui/enhanced_tui.rb +4 -0
  54. data/lib/aidp/harness/ui/enhanced_workflow_selector.rb +4 -0
  55. data/lib/aidp/harness/ui/error_handler.rb +3 -0
  56. data/lib/aidp/harness/ui/job_monitor.rb +4 -0
  57. data/lib/aidp/harness/ui/navigation/submenu.rb +2 -2
  58. data/lib/aidp/harness/ui/navigation/workflow_selector.rb +6 -0
  59. data/lib/aidp/harness/ui/spinner_helper.rb +3 -0
  60. data/lib/aidp/harness/ui/workflow_controller.rb +3 -0
  61. data/lib/aidp/harness/user_interface.rb +3 -0
  62. data/lib/aidp/loader.rb +195 -0
  63. data/lib/aidp/logger.rb +3 -0
  64. data/lib/aidp/message_display.rb +31 -0
  65. data/lib/aidp/metadata/compiler.rb +29 -17
  66. data/lib/aidp/metadata/query.rb +1 -1
  67. data/lib/aidp/metadata/scanner.rb +8 -1
  68. data/lib/aidp/metadata/tool_metadata.rb +13 -13
  69. data/lib/aidp/metadata/validator.rb +10 -0
  70. data/lib/aidp/metadata.rb +16 -0
  71. data/lib/aidp/pr_worktree_manager.rb +20 -8
  72. data/lib/aidp/provider_manager.rb +4 -7
  73. data/lib/aidp/providers/base.rb +2 -0
  74. data/lib/aidp/security/rule_of_two_enforcer.rb +210 -0
  75. data/lib/aidp/security/secrets_proxy.rb +328 -0
  76. data/lib/aidp/security/secrets_registry.rb +227 -0
  77. data/lib/aidp/security/trifecta_state.rb +220 -0
  78. data/lib/aidp/security/watch_mode_handler.rb +306 -0
  79. data/lib/aidp/security/work_loop_adapter.rb +277 -0
  80. data/lib/aidp/security.rb +56 -0
  81. data/lib/aidp/setup/wizard.rb +283 -11
  82. data/lib/aidp/skills.rb +0 -5
  83. data/lib/aidp/storage/csv_storage.rb +3 -0
  84. data/lib/aidp/style_guide/selector.rb +360 -0
  85. data/lib/aidp/tooling_detector.rb +283 -16
  86. data/lib/aidp/version.rb +1 -1
  87. data/lib/aidp/watch/auto_merger.rb +274 -0
  88. data/lib/aidp/watch/auto_pr_processor.rb +125 -7
  89. data/lib/aidp/watch/build_processor.rb +16 -1
  90. data/lib/aidp/watch/change_request_processor.rb +682 -150
  91. data/lib/aidp/watch/ci_fix_processor.rb +262 -4
  92. data/lib/aidp/watch/feedback_collector.rb +191 -0
  93. data/lib/aidp/watch/hierarchical_pr_strategy.rb +256 -0
  94. data/lib/aidp/watch/implementation_verifier.rb +142 -1
  95. data/lib/aidp/watch/plan_generator.rb +70 -13
  96. data/lib/aidp/watch/plan_processor.rb +12 -5
  97. data/lib/aidp/watch/projects_processor.rb +286 -0
  98. data/lib/aidp/watch/repository_client.rb +871 -22
  99. data/lib/aidp/watch/review_processor.rb +33 -6
  100. data/lib/aidp/watch/runner.rb +80 -29
  101. data/lib/aidp/watch/state_store.rb +233 -0
  102. data/lib/aidp/watch/sub_issue_creator.rb +221 -0
  103. data/lib/aidp/watch.rb +5 -7
  104. data/lib/aidp/workflows/guided_agent.rb +4 -0
  105. data/lib/aidp/workstream_cleanup.rb +0 -2
  106. data/lib/aidp/workstream_executor.rb +3 -4
  107. data/lib/aidp/worktree.rb +61 -12
  108. data/lib/aidp/worktree_branch_manager.rb +347 -101
  109. data/lib/aidp.rb +21 -106
  110. data/templates/implementation/iterative_implementation.md +46 -3
  111. metadata +91 -36
  112. data/lib/aidp/config/paths.rb +0 -131
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require_relative "evaluation_record"
6
+ require_relative "../config_paths"
7
+ require_relative "../rescue_logging"
8
+
9
+ module Aidp
10
+ module Evaluations
11
+ # Storage manager for evaluation records
12
+ #
13
+ # Stores evaluations in `.aidp/evaluations/` with append-only semantics:
14
+ # - Individual evaluations stored as JSON files: `eval_YYYYMMDD_HHMMSS_xxxx.json`
15
+ # - Indexed summary file for efficient lookups: `index.json`
16
+ #
17
+ # @example Storing an evaluation
18
+ # storage = EvaluationStorage.new(project_dir: Dir.pwd)
19
+ # storage.store(record)
20
+ #
21
+ # @example Listing evaluations
22
+ # storage.list(limit: 10)
23
+ # storage.list(rating: "bad")
24
+ class EvaluationStorage
25
+ include Aidp::RescueLogging
26
+
27
+ def initialize(project_dir: Dir.pwd)
28
+ @project_dir = project_dir
29
+ @evaluations_dir = ConfigPaths.evaluations_dir(project_dir)
30
+ @index_file = ConfigPaths.evaluations_index_file(project_dir)
31
+
32
+ Aidp.log_debug("evaluation_storage", "initialize",
33
+ project_dir: project_dir, evaluations_dir: @evaluations_dir)
34
+ end
35
+
36
+ # Store a new evaluation record
37
+ #
38
+ # @param record [EvaluationRecord] The evaluation to store
39
+ # @return [Hash] Result with :success and :id keys
40
+ def store(record)
41
+ ensure_directory
42
+ file_path = File.join(@evaluations_dir, "#{record.id}.json")
43
+
44
+ Aidp.log_debug("evaluation_storage", "store",
45
+ id: record.id, rating: record.rating, file_path: file_path)
46
+
47
+ File.write(file_path, JSON.pretty_generate(record.to_h))
48
+ update_index(record)
49
+
50
+ {success: true, id: record.id, file_path: file_path}
51
+ rescue => error
52
+ log_rescue(error,
53
+ component: "evaluation_storage",
54
+ action: "store",
55
+ fallback: {success: false},
56
+ id: record.id)
57
+ {success: false, error: error.message, id: record.id}
58
+ end
59
+
60
+ # Load a specific evaluation by ID
61
+ #
62
+ # @param id [String] The evaluation ID
63
+ # @return [EvaluationRecord, nil] The record or nil if not found
64
+ def load(id)
65
+ file_path = File.join(@evaluations_dir, "#{id}.json")
66
+ return nil unless File.exist?(file_path)
67
+
68
+ Aidp.log_debug("evaluation_storage", "load", id: id)
69
+
70
+ data = JSON.parse(File.read(file_path))
71
+ EvaluationRecord.from_h(data)
72
+ rescue => error
73
+ log_rescue(error,
74
+ component: "evaluation_storage",
75
+ action: "load",
76
+ fallback: nil,
77
+ id: id)
78
+ nil
79
+ end
80
+
81
+ # List evaluations with optional filtering
82
+ #
83
+ # @param limit [Integer] Maximum number of records to return
84
+ # @param rating [String, nil] Filter by rating (good/neutral/bad)
85
+ # @param target_type [String, nil] Filter by target type
86
+ # @return [Array<EvaluationRecord>] Matching records, newest first
87
+ def list(limit: 50, rating: nil, target_type: nil)
88
+ Aidp.log_debug("evaluation_storage", "list",
89
+ limit: limit, rating: rating, target_type: target_type)
90
+
91
+ index = load_index
92
+ entries = index[:entries] || []
93
+
94
+ # Apply filters
95
+ entries = entries.select { |e| e[:rating] == rating } if rating
96
+ entries = entries.select { |e| e[:target_type] == target_type } if target_type
97
+
98
+ # Sort by created_at descending, take limit
99
+ entries = entries.sort_by { |e| e[:created_at] || "" }.reverse.take(limit)
100
+
101
+ # Load full records
102
+ entries.filter_map { |entry| load(entry[:id]) }
103
+ rescue => error
104
+ log_rescue(error,
105
+ component: "evaluation_storage",
106
+ action: "list",
107
+ fallback: [],
108
+ limit: limit)
109
+ []
110
+ end
111
+
112
+ # Get statistics about evaluations
113
+ #
114
+ # @return [Hash] Statistics including counts by rating
115
+ def stats
116
+ Aidp.log_debug("evaluation_storage", "stats")
117
+
118
+ index = load_index
119
+ entries = index[:entries] || []
120
+
121
+ total = entries.size
122
+ by_rating = entries.group_by { |e| e[:rating] }
123
+ by_target = entries.group_by { |e| e[:target_type] }
124
+
125
+ {
126
+ total: total,
127
+ by_rating: {
128
+ good: (by_rating["good"] || []).size,
129
+ neutral: (by_rating["neutral"] || []).size,
130
+ bad: (by_rating["bad"] || []).size
131
+ },
132
+ by_target_type: by_target.transform_values(&:size),
133
+ first_evaluation: entries.min_by { |e| e[:created_at] || "" }&.dig(:created_at),
134
+ last_evaluation: entries.max_by { |e| e[:created_at] || "" }&.dig(:created_at)
135
+ }
136
+ rescue => error
137
+ log_rescue(error,
138
+ component: "evaluation_storage",
139
+ action: "stats",
140
+ fallback: {total: 0, by_rating: {good: 0, neutral: 0, bad: 0}})
141
+ {total: 0, by_rating: {good: 0, neutral: 0, bad: 0}, by_target_type: {}}
142
+ end
143
+
144
+ # Delete an evaluation by ID
145
+ #
146
+ # @param id [String] The evaluation ID
147
+ # @return [Hash] Result with :success key
148
+ def delete(id)
149
+ file_path = File.join(@evaluations_dir, "#{id}.json")
150
+ return {success: true, message: "Evaluation not found"} unless File.exist?(file_path)
151
+
152
+ Aidp.log_debug("evaluation_storage", "delete", id: id)
153
+
154
+ File.delete(file_path)
155
+ remove_from_index(id)
156
+
157
+ {success: true, id: id}
158
+ rescue => error
159
+ log_rescue(error,
160
+ component: "evaluation_storage",
161
+ action: "delete",
162
+ fallback: {success: false},
163
+ id: id)
164
+ {success: false, error: error.message}
165
+ end
166
+
167
+ # Clear all evaluations
168
+ #
169
+ # @return [Hash] Result with :success and :count keys
170
+ def clear
171
+ Aidp.log_debug("evaluation_storage", "clear")
172
+
173
+ return {success: true, count: 0} unless Dir.exist?(@evaluations_dir)
174
+
175
+ count = Dir.glob(File.join(@evaluations_dir, "eval_*.json")).size
176
+ FileUtils.rm_rf(@evaluations_dir)
177
+
178
+ {success: true, count: count}
179
+ rescue => error
180
+ log_rescue(error,
181
+ component: "evaluation_storage",
182
+ action: "clear",
183
+ fallback: {success: false})
184
+ {success: false, error: error.message}
185
+ end
186
+
187
+ # Check if evaluations directory exists and has evaluations
188
+ def any?
189
+ Dir.exist?(@evaluations_dir) && Dir.glob(File.join(@evaluations_dir, "eval_*.json")).any?
190
+ end
191
+
192
+ private
193
+
194
+ def ensure_directory
195
+ ConfigPaths.ensure_evaluations_dir(@project_dir)
196
+ end
197
+
198
+ def load_index
199
+ return {entries: []} unless File.exist?(@index_file)
200
+
201
+ data = JSON.parse(File.read(@index_file))
202
+ symbolize_index(data)
203
+ rescue
204
+ {entries: []}
205
+ end
206
+
207
+ def update_index(record)
208
+ index = load_index
209
+ index[:entries] ||= []
210
+
211
+ # Add new entry to index (stores minimal data for quick lookups)
212
+ index[:entries] << {
213
+ id: record.id,
214
+ rating: record.rating,
215
+ target_type: record.target_type,
216
+ target_id: record.target_id,
217
+ created_at: record.created_at
218
+ }
219
+
220
+ index[:updated_at] = Time.now.iso8601
221
+
222
+ File.write(@index_file, JSON.pretty_generate(index))
223
+ end
224
+
225
+ def remove_from_index(id)
226
+ index = load_index
227
+ index[:entries]&.reject! { |e| e[:id] == id }
228
+ index[:updated_at] = Time.now.iso8601
229
+
230
+ File.write(@index_file, JSON.pretty_generate(index))
231
+ end
232
+
233
+ def symbolize_index(data)
234
+ return data unless data.is_a?(Hash)
235
+ result = {}
236
+ data.each do |key, value|
237
+ sym_key = key.is_a?(String) ? key.to_sym : key
238
+ result[sym_key] = if value.is_a?(Array)
239
+ value.map { |v| v.is_a?(Hash) ? symbolize_index(v) : v }
240
+ elsif value.is_a?(Hash)
241
+ symbolize_index(value)
242
+ else
243
+ value
244
+ end
245
+ end
246
+ result
247
+ end
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluations/evaluation_record"
4
+ require_relative "evaluations/evaluation_storage"
5
+ require_relative "evaluations/context_capture"
6
+
7
+ module Aidp
8
+ # Evaluation and feedback system for AIDP outputs
9
+ #
10
+ # Enables users to rate generated outputs (prompts, work units, work loops)
11
+ # as good, neutral, or bad while capturing rich execution context.
12
+ #
13
+ # @example Creating and storing an evaluation
14
+ # record = Aidp::Evaluations::EvaluationRecord.new(
15
+ # rating: "good",
16
+ # comment: "Clean code generated",
17
+ # target_type: "work_unit"
18
+ # )
19
+ # storage = Aidp::Evaluations::EvaluationStorage.new
20
+ # storage.store(record)
21
+ module Evaluations
22
+ end
23
+ end
@@ -21,6 +21,9 @@ module Aidp
21
21
 
22
22
  attr_reader :state, :instruction_queue, :work_thread
23
23
 
24
+ # Expose sync_runner for testability
25
+ attr_accessor :sync_runner
26
+
24
27
  def initialize(project_dir, provider_manager, config, options = {})
25
28
  @project_dir = project_dir
26
29
  @provider_manager = provider_manager
@@ -175,7 +178,7 @@ module Aidp
175
178
  def save_cancellation_checkpoint
176
179
  return unless @sync_runner
177
180
 
178
- checkpoint = @sync_runner.instance_variable_get(:@checkpoint)
181
+ checkpoint = @sync_runner.checkpoint
179
182
  return unless checkpoint
180
183
 
181
184
  checkpoint.record_checkpoint(
@@ -23,6 +23,10 @@ module Aidp
23
23
  class InteractiveRepl
24
24
  include Aidp::RescueLogging
25
25
 
26
+ # Expose running state and repl_macros for testability
27
+ attr_accessor :running
28
+ attr_reader :repl_macros, :async_runner, :completion_setup_needed, :output_display_thread
29
+
26
30
  def initialize(project_dir, provider_manager, config, options = {})
27
31
  @project_dir = project_dir
28
32
  @provider_manager = provider_manager
@@ -30,8 +34,8 @@ module Aidp
30
34
  @options = options
31
35
  @prompt = options[:prompt] || TTY::Prompt.new
32
36
  @async_runner_class = options[:async_runner_class] || AsyncWorkLoopRunner
33
- @async_runner = nil
34
- @repl_macros = ReplMacros.new
37
+ @async_runner = options[:async_runner]
38
+ @repl_macros = options[:repl_macros] || ReplMacros.new
35
39
  @output_display_thread = nil
36
40
  @running = false
37
41
  @completion_setup_needed = true
@@ -0,0 +1,359 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../harness/ai_decision_engine"
4
+
5
+ module Aidp
6
+ module Execute
7
+ # Evaluates prompt effectiveness using ZFC after multiple iterations
8
+ #
9
+ # FIX for issue #391: When the work loop reaches 10+ iterations without completion,
10
+ # this evaluator assesses prompt quality and suggests improvements.
11
+ #
12
+ # Uses Zero Framework Cognition (ZFC) to analyze:
13
+ # - Whether the prompt clearly defines completion criteria
14
+ # - If task breakdown instructions are adequate
15
+ # - Whether the agent has sufficient context
16
+ # - If there are blockers preventing progress
17
+ #
18
+ # @example
19
+ # evaluator = PromptEvaluator.new(config)
20
+ # result = evaluator.evaluate(
21
+ # prompt_content: prompt_manager.read,
22
+ # iteration_count: 12,
23
+ # task_summary: persistent_tasklist.summary,
24
+ # recent_failures: all_results
25
+ # )
26
+ # # => { effective: false, issues: [...], suggestions: [...] }
27
+ #
28
+ class PromptEvaluator
29
+ # Threshold for triggering evaluation
30
+ EVALUATION_ITERATION_THRESHOLD = 10
31
+
32
+ # Re-evaluate periodically after threshold
33
+ EVALUATION_INTERVAL = 5
34
+
35
+ # Expose for testability
36
+ attr_reader :ai_decision_engine
37
+
38
+ def initialize(config, ai_decision_engine: nil)
39
+ @config = config
40
+ @ai_decision_engine = ai_decision_engine || safely_build_ai_decision_engine
41
+ end
42
+
43
+ # Safely build AIDecisionEngine, returning nil if config doesn't support it
44
+ # This allows tests with mock configs to work without AI calls
45
+ def safely_build_ai_decision_engine
46
+ # Check if config supports the methods AIDecisionEngine needs
47
+ return nil unless @config.respond_to?(:default_provider)
48
+
49
+ build_default_ai_decision_engine
50
+ rescue => e
51
+ Aidp.log_debug("prompt_evaluator", "skipping_ai_decision_engine",
52
+ reason: e.message)
53
+ nil
54
+ end
55
+
56
+ # Check if evaluation should be triggered based on iteration count
57
+ # @param iteration_count [Integer] Current iteration number
58
+ # @return [Boolean]
59
+ def should_evaluate?(iteration_count)
60
+ return false unless iteration_count >= EVALUATION_ITERATION_THRESHOLD
61
+
62
+ # Evaluate at threshold and every EVALUATION_INTERVAL after
63
+ (iteration_count - EVALUATION_ITERATION_THRESHOLD) % EVALUATION_INTERVAL == 0
64
+ end
65
+
66
+ # Evaluate prompt effectiveness
67
+ # @param prompt_content [String] Current PROMPT.md content
68
+ # @param iteration_count [Integer] Current iteration number
69
+ # @param task_summary [Hash] Summary of task statuses
70
+ # @param recent_failures [Hash] Recent test/lint failures
71
+ # @param step_name [String] Name of current step
72
+ # @return [Hash] Evaluation result with :effective, :issues, :suggestions
73
+ def evaluate(prompt_content:, iteration_count:, task_summary:, recent_failures:, step_name: nil)
74
+ Aidp.log_debug("prompt_evaluator", "starting_evaluation",
75
+ iteration: iteration_count,
76
+ step: step_name,
77
+ prompt_size: prompt_content&.length || 0)
78
+
79
+ # When AI decision engine is unavailable (e.g., in tests with mock configs),
80
+ # return a neutral result that doesn't trigger feedback appending
81
+ unless @ai_decision_engine
82
+ Aidp.log_debug("prompt_evaluator", "skipping_evaluation_no_ai_engine")
83
+ return {
84
+ effective: true, # Assume effective to avoid unnecessary feedback
85
+ issues: [],
86
+ suggestions: [],
87
+ likely_blockers: [],
88
+ recommended_actions: [],
89
+ confidence: 0.0,
90
+ skipped: true,
91
+ skip_reason: "AI decision engine not available"
92
+ }
93
+ end
94
+
95
+ prompt = build_evaluation_prompt(
96
+ prompt_content: prompt_content,
97
+ iteration_count: iteration_count,
98
+ task_summary: task_summary,
99
+ recent_failures: recent_failures
100
+ )
101
+
102
+ schema = {
103
+ type: "object",
104
+ properties: {
105
+ effective: {
106
+ type: "boolean",
107
+ description: "True if the prompt is likely to lead to completion within a few more iterations"
108
+ },
109
+ issues: {
110
+ type: "array",
111
+ items: {type: "string"},
112
+ description: "Specific problems identified with the current prompt"
113
+ },
114
+ suggestions: {
115
+ type: "array",
116
+ items: {type: "string"},
117
+ description: "Actionable suggestions to improve prompt effectiveness"
118
+ },
119
+ likely_blockers: {
120
+ type: "array",
121
+ items: {type: "string"},
122
+ description: "Potential blockers preventing progress"
123
+ },
124
+ recommended_actions: {
125
+ type: "array",
126
+ items: {
127
+ type: "object",
128
+ properties: {
129
+ action: {type: "string"},
130
+ priority: {type: "string", enum: ["high", "medium", "low"]},
131
+ rationale: {type: "string"}
132
+ }
133
+ },
134
+ description: "Specific actions to take, prioritized"
135
+ },
136
+ confidence: {
137
+ type: "number",
138
+ minimum: 0.0,
139
+ maximum: 1.0,
140
+ description: "Confidence in this assessment"
141
+ }
142
+ },
143
+ required: ["effective", "issues", "suggestions", "confidence"]
144
+ }
145
+
146
+ begin
147
+ result = @ai_decision_engine.decide(
148
+ :prompt_evaluation,
149
+ context: {prompt: prompt},
150
+ schema: schema,
151
+ tier: :mini,
152
+ cache_ttl: nil # Each evaluation is context-specific
153
+ )
154
+
155
+ Aidp.log_info("prompt_evaluator", "evaluation_complete",
156
+ iteration: iteration_count,
157
+ effective: result[:effective],
158
+ issue_count: result[:issues]&.size || 0,
159
+ confidence: result[:confidence])
160
+
161
+ result
162
+ rescue => e
163
+ Aidp.log_error("prompt_evaluator", "evaluation_failed",
164
+ error: e.message,
165
+ error_class: e.class.name)
166
+
167
+ build_fallback_result("Evaluation failed: #{e.message}")
168
+ end
169
+ end
170
+
171
+ # Generate improvement recommendations for the prompt template
172
+ # Used for AGD pattern - generating improved templates based on evaluation
173
+ # @param evaluation_result [Hash] Result from evaluate()
174
+ # @param original_template [String] The original template content
175
+ # @return [Hash] Template improvements
176
+ def generate_template_improvements(evaluation_result:, original_template:)
177
+ return nil unless @ai_decision_engine
178
+
179
+ Aidp.log_debug("prompt_evaluator", "generating_template_improvements",
180
+ issue_count: evaluation_result[:issues]&.size || 0)
181
+
182
+ prompt = build_improvement_prompt(evaluation_result, original_template)
183
+
184
+ schema = {
185
+ type: "object",
186
+ properties: {
187
+ improved_sections: {
188
+ type: "array",
189
+ items: {
190
+ type: "object",
191
+ properties: {
192
+ section_name: {type: "string"},
193
+ original: {type: "string"},
194
+ improved: {type: "string"},
195
+ rationale: {type: "string"}
196
+ }
197
+ }
198
+ },
199
+ additional_sections: {
200
+ type: "array",
201
+ items: {
202
+ type: "object",
203
+ properties: {
204
+ section_name: {type: "string"},
205
+ content: {type: "string"},
206
+ rationale: {type: "string"}
207
+ }
208
+ }
209
+ },
210
+ completion_criteria_improvements: {
211
+ type: "array",
212
+ items: {type: "string"},
213
+ description: "Specific improvements to completion criteria definitions"
214
+ }
215
+ },
216
+ required: ["improved_sections", "completion_criteria_improvements"]
217
+ }
218
+
219
+ @ai_decision_engine.decide(
220
+ :template_improvement,
221
+ context: {prompt: prompt},
222
+ schema: schema,
223
+ tier: :standard, # Use standard tier for more thoughtful improvements
224
+ cache_ttl: nil
225
+ )
226
+ rescue => e
227
+ Aidp.log_error("prompt_evaluator", "template_improvement_failed",
228
+ error: e.message)
229
+ nil
230
+ end
231
+
232
+ private
233
+
234
+ def build_evaluation_prompt(prompt_content:, iteration_count:, task_summary:, recent_failures:)
235
+ <<~PROMPT
236
+ You are evaluating the effectiveness of a work loop prompt that has been running for #{iteration_count} iterations without completion.
237
+
238
+ ## Current Prompt Content
239
+ #{truncate_content(prompt_content, 8000)}
240
+
241
+ ## Task Summary
242
+ #{format_task_summary(task_summary)}
243
+
244
+ ## Recent Check Results
245
+ #{format_failures(recent_failures)}
246
+
247
+ ## Evaluation Criteria
248
+
249
+ Analyze why this prompt may not be leading to completion:
250
+
251
+ 1. **Clarity of Goals**: Are the implementation requirements clearly defined?
252
+ 2. **Task Breakdown**: Does the prompt guide proper task decomposition?
253
+ 3. **Completion Criteria**: Are the completion criteria specific and achievable?
254
+ 4. **Context Sufficiency**: Does the agent have enough context to proceed?
255
+ 5. **Blockers**: Are there technical blockers or missing information?
256
+ 6. **Scope**: Is the scope realistic for an AI agent to complete?
257
+
258
+ ## Your Assessment
259
+
260
+ Provide:
261
+ - Whether this prompt is likely effective (true/false)
262
+ - Specific issues with the current prompt
263
+ - Actionable suggestions for improvement
264
+ - Likely blockers preventing progress
265
+ - Prioritized recommended actions
266
+ - Your confidence in this assessment (0.0-1.0)
267
+
268
+ Be specific and actionable. Focus on what can be changed to achieve completion.
269
+ PROMPT
270
+ end
271
+
272
+ def build_improvement_prompt(evaluation_result, original_template)
273
+ <<~PROMPT
274
+ Based on the following prompt evaluation, suggest improvements to the template.
275
+
276
+ ## Evaluation Results
277
+ - Effective: #{evaluation_result[:effective]}
278
+ - Issues: #{(evaluation_result[:issues] || []).join(", ")}
279
+ - Suggestions: #{(evaluation_result[:suggestions] || []).join(", ")}
280
+
281
+ ## Original Template
282
+ #{truncate_content(original_template, 4000)}
283
+
284
+ ## Your Task
285
+
286
+ Suggest specific improvements to make the template more effective:
287
+ 1. Identify sections that need improvement
288
+ 2. Propose new sections if needed
289
+ 3. Focus especially on completion criteria clarity
290
+ 4. Ensure task breakdown instructions are explicit
291
+ 5. Add guidance for common failure modes
292
+
293
+ Be specific - provide actual text that could replace or supplement the template.
294
+ PROMPT
295
+ end
296
+
297
+ def format_task_summary(task_summary)
298
+ return "_No task summary available_" if task_summary.nil? || task_summary.empty?
299
+
300
+ if task_summary.is_a?(Hash)
301
+ parts = []
302
+ parts << "Total: #{task_summary[:total] || 0}"
303
+ parts << "Done: #{task_summary[:done] || 0}"
304
+ parts << "In Progress: #{task_summary[:in_progress] || 0}"
305
+ parts << "Pending: #{task_summary[:pending] || 0}"
306
+ parts << "Abandoned: #{task_summary[:abandoned] || 0}"
307
+ parts.join(" | ")
308
+ else
309
+ task_summary.to_s
310
+ end
311
+ end
312
+
313
+ def format_failures(recent_failures)
314
+ return "_No recent failures_" if recent_failures.nil? || recent_failures.empty?
315
+
316
+ parts = []
317
+ recent_failures.each do |check_type, result|
318
+ next unless result.is_a?(Hash)
319
+
320
+ status = result[:success] ? "✅ passed" : "❌ failed"
321
+ parts << "- #{check_type}: #{status}"
322
+
323
+ if !result[:success] && result[:failures]
324
+ failures = result[:failures].take(3)
325
+ failures.each { |f| parts << " - #{truncate_content(f.to_s, 200)}" }
326
+ end
327
+ end
328
+
329
+ parts.empty? ? "_No failures to report_" : parts.join("\n")
330
+ end
331
+
332
+ def truncate_content(content, max_length)
333
+ return "_No content_" if content.nil? || content.empty?
334
+ return content if content.length <= max_length
335
+
336
+ "#{content[0, max_length]}\n\n[... truncated, showing first #{max_length} characters ...]"
337
+ end
338
+
339
+ def build_fallback_result(reason)
340
+ {
341
+ effective: nil,
342
+ issues: ["Unable to evaluate: #{reason}"],
343
+ suggestions: ["Check AI configuration and try again"],
344
+ likely_blockers: [],
345
+ recommended_actions: [],
346
+ confidence: 0.0
347
+ }
348
+ end
349
+
350
+ def build_default_ai_decision_engine
351
+ Aidp::Harness::AIDecisionEngine.new(@config)
352
+ rescue => e
353
+ Aidp.log_warn("prompt_evaluator", "failed_to_create_ai_decision_engine",
354
+ error: e.message)
355
+ nil
356
+ end
357
+ end
358
+ end
359
+ end