aidp 0.33.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +35 -0
- data/lib/aidp/analyze/tree_sitter_scan.rb +3 -0
- data/lib/aidp/cli/eval_command.rb +399 -0
- data/lib/aidp/cli/harness_command.rb +1 -1
- data/lib/aidp/cli/security_command.rb +416 -0
- data/lib/aidp/cli/tools_command.rb +6 -4
- data/lib/aidp/cli.rb +170 -3
- data/lib/aidp/concurrency/exec.rb +3 -0
- data/lib/aidp/config.rb +113 -0
- data/lib/aidp/config_paths.rb +20 -0
- data/lib/aidp/daemon/runner.rb +8 -4
- data/lib/aidp/errors.rb +134 -0
- data/lib/aidp/evaluations/context_capture.rb +205 -0
- data/lib/aidp/evaluations/evaluation_record.rb +114 -0
- data/lib/aidp/evaluations/evaluation_storage.rb +250 -0
- data/lib/aidp/evaluations.rb +23 -0
- data/lib/aidp/execute/async_work_loop_runner.rb +4 -1
- data/lib/aidp/execute/interactive_repl.rb +6 -2
- data/lib/aidp/execute/prompt_evaluator.rb +359 -0
- data/lib/aidp/execute/repl_macros.rb +100 -1
- data/lib/aidp/execute/work_loop_runner.rb +399 -47
- data/lib/aidp/execute/work_loop_state.rb +4 -1
- data/lib/aidp/execute/workflow_selector.rb +3 -0
- data/lib/aidp/harness/ai_decision_engine.rb +79 -0
- data/lib/aidp/harness/capability_registry.rb +2 -0
- data/lib/aidp/harness/condition_detector.rb +3 -0
- data/lib/aidp/harness/config_loader.rb +3 -0
- data/lib/aidp/harness/enhanced_runner.rb +14 -11
- data/lib/aidp/harness/error_handler.rb +3 -0
- data/lib/aidp/harness/provider_factory.rb +3 -0
- data/lib/aidp/harness/provider_manager.rb +6 -0
- data/lib/aidp/harness/runner.rb +5 -1
- data/lib/aidp/harness/state/persistence.rb +3 -0
- data/lib/aidp/harness/state_manager.rb +3 -0
- data/lib/aidp/harness/status_display.rb +28 -20
- data/lib/aidp/harness/thinking_depth_manager.rb +32 -32
- data/lib/aidp/harness/ui/enhanced_tui.rb +4 -0
- data/lib/aidp/harness/ui/enhanced_workflow_selector.rb +4 -0
- data/lib/aidp/harness/ui/error_handler.rb +3 -0
- data/lib/aidp/harness/ui/job_monitor.rb +4 -0
- data/lib/aidp/harness/ui/navigation/submenu.rb +2 -0
- data/lib/aidp/harness/ui/navigation/workflow_selector.rb +6 -0
- data/lib/aidp/harness/ui/spinner_helper.rb +3 -0
- data/lib/aidp/harness/ui/workflow_controller.rb +3 -0
- data/lib/aidp/harness/user_interface.rb +3 -0
- data/lib/aidp/loader.rb +2 -2
- data/lib/aidp/logger.rb +3 -0
- data/lib/aidp/message_display.rb +31 -0
- data/lib/aidp/pr_worktree_manager.rb +18 -6
- data/lib/aidp/provider_manager.rb +3 -0
- data/lib/aidp/providers/base.rb +2 -0
- data/lib/aidp/security/rule_of_two_enforcer.rb +210 -0
- data/lib/aidp/security/secrets_proxy.rb +328 -0
- data/lib/aidp/security/secrets_registry.rb +227 -0
- data/lib/aidp/security/trifecta_state.rb +220 -0
- data/lib/aidp/security/watch_mode_handler.rb +306 -0
- data/lib/aidp/security/work_loop_adapter.rb +277 -0
- data/lib/aidp/security.rb +56 -0
- data/lib/aidp/setup/wizard.rb +4 -2
- data/lib/aidp/version.rb +1 -1
- data/lib/aidp/watch/auto_merger.rb +274 -0
- data/lib/aidp/watch/auto_pr_processor.rb +125 -7
- data/lib/aidp/watch/build_processor.rb +16 -1
- data/lib/aidp/watch/change_request_processor.rb +680 -286
- data/lib/aidp/watch/ci_fix_processor.rb +262 -4
- data/lib/aidp/watch/feedback_collector.rb +191 -0
- data/lib/aidp/watch/hierarchical_pr_strategy.rb +256 -0
- data/lib/aidp/watch/implementation_verifier.rb +142 -1
- data/lib/aidp/watch/plan_generator.rb +70 -13
- data/lib/aidp/watch/plan_processor.rb +12 -5
- data/lib/aidp/watch/projects_processor.rb +286 -0
- data/lib/aidp/watch/repository_client.rb +861 -53
- data/lib/aidp/watch/review_processor.rb +33 -6
- data/lib/aidp/watch/runner.rb +51 -11
- data/lib/aidp/watch/state_store.rb +233 -0
- data/lib/aidp/watch/sub_issue_creator.rb +221 -0
- data/lib/aidp/workflows/guided_agent.rb +4 -0
- data/lib/aidp/workstream_executor.rb +3 -0
- data/lib/aidp/worktree.rb +61 -11
- data/lib/aidp/worktree_branch_manager.rb +347 -101
- data/templates/implementation/iterative_implementation.md +46 -3
- metadata +20 -1
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "prompt_manager"
|
|
4
|
+
require_relative "prompt_evaluator"
|
|
4
5
|
require_relative "checkpoint"
|
|
5
6
|
require_relative "checkpoint_display"
|
|
6
7
|
require_relative "guard_policy"
|
|
@@ -11,6 +12,7 @@ require_relative "steps"
|
|
|
11
12
|
require_relative "../harness/test_runner"
|
|
12
13
|
require_relative "../errors"
|
|
13
14
|
require_relative "../style_guide/selector"
|
|
15
|
+
require_relative "../security"
|
|
14
16
|
|
|
15
17
|
module Aidp
|
|
16
18
|
module Execute
|
|
@@ -39,7 +41,10 @@ module Aidp
|
|
|
39
41
|
}.freeze
|
|
40
42
|
include Aidp::MessageDisplay
|
|
41
43
|
|
|
42
|
-
|
|
44
|
+
# Expose state for testability
|
|
45
|
+
attr_accessor :iteration_count, :step_name, :options, :persistent_tasklist
|
|
46
|
+
attr_reader :project_dir, :current_state, :state_history, :test_runner, :prompt_manager, :checkpoint
|
|
47
|
+
attr_writer :guard_policy, :prompt_manager, :style_guide_selector
|
|
43
48
|
|
|
44
49
|
MAX_ITERATIONS = 50 # Safety limit
|
|
45
50
|
CHECKPOINT_INTERVAL = 5 # Record checkpoint every N iterations
|
|
@@ -67,12 +72,18 @@ module Aidp
|
|
|
67
72
|
|
|
68
73
|
# Initialize thinking depth manager for intelligent model selection
|
|
69
74
|
require_relative "../harness/thinking_depth_manager"
|
|
70
|
-
@thinking_depth_manager = options[:thinking_depth_manager] || Aidp::Harness::ThinkingDepthManager.new(config)
|
|
75
|
+
@thinking_depth_manager = options[:thinking_depth_manager] || Aidp::Harness::ThinkingDepthManager.new(config, root_dir: @project_dir)
|
|
71
76
|
@consecutive_failures = 0
|
|
72
77
|
@last_tier = nil
|
|
73
78
|
|
|
74
79
|
# Initialize style guide selector for intelligent section selection
|
|
75
80
|
@style_guide_selector = options[:style_guide_selector] || Aidp::StyleGuide::Selector.new(project_dir: project_dir)
|
|
81
|
+
|
|
82
|
+
# FIX for issue #391: Initialize prompt evaluator for iteration threshold assessment
|
|
83
|
+
@prompt_evaluator = options[:prompt_evaluator] || PromptEvaluator.new(config)
|
|
84
|
+
|
|
85
|
+
# Initialize security adapter for Rule of Two enforcement
|
|
86
|
+
@security_adapter = options[:security_adapter] || Aidp::Security::WorkLoopAdapter.new(project_dir: project_dir)
|
|
76
87
|
end
|
|
77
88
|
|
|
78
89
|
# Execute a step using fix-forward work loop pattern
|
|
@@ -143,6 +154,11 @@ module Aidp
|
|
|
143
154
|
@current_state = :ready
|
|
144
155
|
@state_history.clear
|
|
145
156
|
|
|
157
|
+
# Begin security tracking for this agentic work unit
|
|
158
|
+
work_unit_id = "agentic_#{@step_name}_#{SecureRandom.hex(4)}"
|
|
159
|
+
@security_adapter.begin_work_unit(work_unit_id: work_unit_id, context: context)
|
|
160
|
+
display_security_status
|
|
161
|
+
|
|
146
162
|
create_initial_prompt(step_spec, context)
|
|
147
163
|
|
|
148
164
|
loop do
|
|
@@ -154,6 +170,10 @@ module Aidp
|
|
|
154
170
|
display_message("⚠️ Max iterations (#{MAX_ITERATIONS}) reached for #{@step_name}", type: :warning)
|
|
155
171
|
display_state_summary
|
|
156
172
|
archive_and_cleanup
|
|
173
|
+
|
|
174
|
+
# End security tracking for this work unit
|
|
175
|
+
@security_adapter.end_work_unit
|
|
176
|
+
|
|
157
177
|
return build_agentic_payload(
|
|
158
178
|
agent_result: nil,
|
|
159
179
|
response: build_max_iterations_result,
|
|
@@ -178,6 +198,30 @@ module Aidp
|
|
|
178
198
|
prompt_length: prompt_length,
|
|
179
199
|
checks: checks_summary)
|
|
180
200
|
|
|
201
|
+
# Check security policy before agent call (Rule of Two enforcement)
|
|
202
|
+
# Agent calls enable egress capability
|
|
203
|
+
begin
|
|
204
|
+
@security_adapter.check_agent_call_allowed!(operation: :agent_execution)
|
|
205
|
+
rescue Aidp::Security::PolicyViolation => e
|
|
206
|
+
# Security policy violation - cannot proceed with agent call
|
|
207
|
+
Aidp.logger.error("work_loop", "Security policy violation",
|
|
208
|
+
step: @step_name,
|
|
209
|
+
iteration: @iteration_count,
|
|
210
|
+
error: e.message)
|
|
211
|
+
display_message(" 🛡️ Security policy violation: #{e.message}", type: :error)
|
|
212
|
+
display_message(" Cannot proceed - Rule of Two would be violated", type: :error)
|
|
213
|
+
|
|
214
|
+
# End security tracking and return error
|
|
215
|
+
@security_adapter.end_work_unit
|
|
216
|
+
return build_agentic_payload(
|
|
217
|
+
agent_result: nil,
|
|
218
|
+
response: {status: "error", message: "Security policy violation: #{e.message}"},
|
|
219
|
+
summary: nil,
|
|
220
|
+
completed: false,
|
|
221
|
+
terminate: true
|
|
222
|
+
)
|
|
223
|
+
end
|
|
224
|
+
|
|
181
225
|
# Wrap agent call in exception handling for true fix-forward
|
|
182
226
|
begin
|
|
183
227
|
agent_result = apply_patch(preview_provider, preview_model)
|
|
@@ -185,6 +229,15 @@ module Aidp
|
|
|
185
229
|
# Configuration errors should crash immediately (crash-early principle)
|
|
186
230
|
# Re-raise without catching
|
|
187
231
|
raise
|
|
232
|
+
rescue Aidp::Security::PolicyViolation => e
|
|
233
|
+
# Security violations should not continue - they are policy failures
|
|
234
|
+
Aidp.logger.error("work_loop", "Security policy violation during agent call",
|
|
235
|
+
step: @step_name,
|
|
236
|
+
iteration: @iteration_count,
|
|
237
|
+
error: e.message)
|
|
238
|
+
display_message(" 🛡️ Security violation: #{e.message}", type: :error)
|
|
239
|
+
@security_adapter.end_work_unit
|
|
240
|
+
raise
|
|
188
241
|
rescue => e
|
|
189
242
|
# Convert exception to error result for fix-forward handling
|
|
190
243
|
Aidp.logger.error("work_loop", "Exception during agent call",
|
|
@@ -263,13 +316,31 @@ module Aidp
|
|
|
263
316
|
|
|
264
317
|
# Check task completion status
|
|
265
318
|
task_completion_result = check_task_completion
|
|
319
|
+
agent_completed = agent_marked_complete?(agent_result)
|
|
320
|
+
|
|
321
|
+
# FIX for issue #391: Comprehensive logging at completion decision point
|
|
322
|
+
Aidp.log_debug("work_loop", "completion_decision_point",
|
|
323
|
+
iteration: @iteration_count,
|
|
324
|
+
all_checks_pass: all_checks_pass,
|
|
325
|
+
agent_marked_complete: agent_completed,
|
|
326
|
+
task_completion_complete: task_completion_result[:complete],
|
|
327
|
+
task_completion_reason: task_completion_result[:reason],
|
|
328
|
+
test_success: test_results[:success],
|
|
329
|
+
lint_success: lint_results[:success],
|
|
330
|
+
formatter_success: formatter_results[:success],
|
|
331
|
+
build_success: build_results[:success],
|
|
332
|
+
doc_success: doc_results[:success])
|
|
266
333
|
|
|
267
334
|
if all_checks_pass
|
|
268
335
|
transition_to(:pass)
|
|
269
336
|
|
|
270
|
-
if
|
|
337
|
+
if agent_completed
|
|
271
338
|
# Check if tasks are complete
|
|
272
339
|
if task_completion_result[:complete]
|
|
340
|
+
Aidp.log_debug("work_loop", "completion_approved",
|
|
341
|
+
iteration: @iteration_count,
|
|
342
|
+
reason: task_completion_result[:reason])
|
|
343
|
+
|
|
273
344
|
transition_to(:done)
|
|
274
345
|
record_final_checkpoint(all_results)
|
|
275
346
|
display_task_summary
|
|
@@ -280,9 +351,13 @@ module Aidp
|
|
|
280
351
|
model: preview_model,
|
|
281
352
|
prompt_length: prompt_length,
|
|
282
353
|
checks: checks_summary,
|
|
283
|
-
task_status: "complete"
|
|
354
|
+
task_status: "complete",
|
|
355
|
+
completion_reason: task_completion_result[:reason])
|
|
284
356
|
archive_and_cleanup
|
|
285
357
|
|
|
358
|
+
# End security tracking for this work unit
|
|
359
|
+
@security_adapter.end_work_unit
|
|
360
|
+
|
|
286
361
|
return build_agentic_payload(
|
|
287
362
|
agent_result: agent_result,
|
|
288
363
|
response: build_success_result(agent_result),
|
|
@@ -292,6 +367,11 @@ module Aidp
|
|
|
292
367
|
)
|
|
293
368
|
else
|
|
294
369
|
# All checks passed but tasks not complete
|
|
370
|
+
Aidp.log_debug("work_loop", "completion_blocked_tasks_incomplete",
|
|
371
|
+
iteration: @iteration_count,
|
|
372
|
+
reason: task_completion_result[:reason],
|
|
373
|
+
message: task_completion_result[:message])
|
|
374
|
+
|
|
295
375
|
display_message(" All checks passed but tasks not complete", type: :warning)
|
|
296
376
|
display_message(" #{task_completion_result[:message]}", type: :warning)
|
|
297
377
|
display_task_summary
|
|
@@ -300,13 +380,17 @@ module Aidp
|
|
|
300
380
|
model: preview_model,
|
|
301
381
|
prompt_length: prompt_length,
|
|
302
382
|
checks: checks_summary,
|
|
303
|
-
task_status: "incomplete"
|
|
383
|
+
task_status: "incomplete",
|
|
384
|
+
task_completion_reason: task_completion_result[:reason])
|
|
304
385
|
transition_to(:next_patch)
|
|
305
386
|
|
|
306
387
|
# Append task completion requirement to PROMPT.md
|
|
307
388
|
append_task_requirement_to_prompt(task_completion_result[:message])
|
|
308
389
|
end
|
|
309
390
|
else
|
|
391
|
+
Aidp.log_debug("work_loop", "completion_blocked_agent_not_complete",
|
|
392
|
+
iteration: @iteration_count)
|
|
393
|
+
|
|
310
394
|
display_message(" All checks passed but work not marked complete", type: :info)
|
|
311
395
|
log_iteration_status("checks_passed_waiting_agent_completion",
|
|
312
396
|
provider: preview_provider,
|
|
@@ -331,7 +415,146 @@ module Aidp
|
|
|
331
415
|
failures: failure_summary_for_log(all_results))
|
|
332
416
|
prepare_next_iteration(all_results, diagnostic)
|
|
333
417
|
end
|
|
418
|
+
|
|
419
|
+
# FIX for issue #391: Evaluate prompt effectiveness at iteration thresholds
|
|
420
|
+
# After 10+ iterations, assess whether the prompt is leading to progress
|
|
421
|
+
evaluate_prompt_effectiveness(all_results)
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# Evaluate prompt effectiveness at iteration thresholds
|
|
426
|
+
# FIX for issue #391: Provides feedback when work loop is stuck
|
|
427
|
+
# Note: Errors during evaluation are logged but don't fail the work loop
|
|
428
|
+
def evaluate_prompt_effectiveness(all_results)
|
|
429
|
+
return unless @prompt_evaluator.should_evaluate?(@iteration_count)
|
|
430
|
+
|
|
431
|
+
Aidp.log_debug("work_loop", "evaluating_prompt_effectiveness",
|
|
432
|
+
iteration: @iteration_count)
|
|
433
|
+
|
|
434
|
+
display_message("📊 Evaluating prompt effectiveness (iteration #{@iteration_count})...", type: :info)
|
|
435
|
+
|
|
436
|
+
task_summary = build_task_summary_for_evaluation
|
|
437
|
+
prompt_content = @prompt_manager.read
|
|
438
|
+
|
|
439
|
+
evaluation = @prompt_evaluator.evaluate(
|
|
440
|
+
prompt_content: prompt_content,
|
|
441
|
+
iteration_count: @iteration_count,
|
|
442
|
+
task_summary: task_summary,
|
|
443
|
+
recent_failures: all_results,
|
|
444
|
+
step_name: @step_name
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
display_prompt_evaluation_results(evaluation)
|
|
448
|
+
|
|
449
|
+
# If prompt is deemed ineffective, append suggestions to PROMPT.md
|
|
450
|
+
unless evaluation[:effective]
|
|
451
|
+
append_evaluation_feedback_to_prompt(evaluation)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
Aidp.log_info("work_loop", "prompt_evaluation_complete",
|
|
455
|
+
iteration: @iteration_count,
|
|
456
|
+
effective: evaluation[:effective],
|
|
457
|
+
confidence: evaluation[:confidence])
|
|
458
|
+
rescue => e
|
|
459
|
+
# Don't let evaluation errors break the work loop
|
|
460
|
+
Aidp.log_warn("work_loop", "prompt_evaluation_error",
|
|
461
|
+
iteration: @iteration_count,
|
|
462
|
+
error: e.message,
|
|
463
|
+
error_class: e.class.name)
|
|
464
|
+
display_message(" ⚠️ Prompt evaluation skipped due to error: #{e.message}", type: :muted)
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
def build_task_summary_for_evaluation
|
|
468
|
+
all_tasks = @persistent_tasklist.all
|
|
469
|
+
return {} if all_tasks.empty?
|
|
470
|
+
|
|
471
|
+
{
|
|
472
|
+
total: all_tasks.size,
|
|
473
|
+
done: all_tasks.count { |t| t.status == :done },
|
|
474
|
+
in_progress: all_tasks.count { |t| t.status == :in_progress },
|
|
475
|
+
pending: all_tasks.count { |t| t.status == :pending },
|
|
476
|
+
abandoned: all_tasks.count { |t| t.status == :abandoned }
|
|
477
|
+
}
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def display_prompt_evaluation_results(evaluation)
|
|
481
|
+
# Skip display if evaluation was skipped
|
|
482
|
+
if evaluation[:skipped]
|
|
483
|
+
display_message(" ℹ️ Prompt evaluation skipped: #{evaluation[:skip_reason]}", type: :muted)
|
|
484
|
+
return
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
if evaluation[:effective]
|
|
488
|
+
display_message(" ✅ Prompt appears effective, continuing...", type: :success)
|
|
489
|
+
else
|
|
490
|
+
display_message(" ⚠️ Prompt may need improvement:", type: :warning)
|
|
491
|
+
|
|
492
|
+
if evaluation[:issues]&.any?
|
|
493
|
+
display_message(" Issues identified:", type: :info)
|
|
494
|
+
evaluation[:issues].each { |issue| display_message(" - #{issue}", type: :warning) }
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
if evaluation[:suggestions]&.any?
|
|
498
|
+
display_message(" Suggestions:", type: :info)
|
|
499
|
+
evaluation[:suggestions].take(3).each { |s| display_message(" - #{s}", type: :info) }
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
if evaluation[:likely_blockers]&.any?
|
|
503
|
+
display_message(" Likely blockers:", type: :warning)
|
|
504
|
+
evaluation[:likely_blockers].each { |b| display_message(" - #{b}", type: :error) }
|
|
505
|
+
end
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
display_message(" Confidence: #{(evaluation[:confidence] * 100).round}%", type: :muted)
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def append_evaluation_feedback_to_prompt(evaluation)
|
|
512
|
+
feedback_section = build_evaluation_feedback_section(evaluation)
|
|
513
|
+
|
|
514
|
+
@prompt_manager.append(feedback_section)
|
|
515
|
+
|
|
516
|
+
Aidp.log_debug("work_loop", "appended_evaluation_feedback",
|
|
517
|
+
iteration: @iteration_count,
|
|
518
|
+
feedback_size: feedback_section.length)
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
def build_evaluation_feedback_section(evaluation)
|
|
522
|
+
parts = []
|
|
523
|
+
parts << "\n\n## ⚠️ Work Loop Progress Assessment (Iteration #{@iteration_count})"
|
|
524
|
+
parts << ""
|
|
525
|
+
parts << "The work loop has been running for #{@iteration_count} iterations without completion."
|
|
526
|
+
parts << "An automated assessment identified the following:"
|
|
527
|
+
parts << ""
|
|
528
|
+
|
|
529
|
+
if evaluation[:issues]&.any?
|
|
530
|
+
parts << "### Issues Identified"
|
|
531
|
+
evaluation[:issues].each { |i| parts << "- #{i}" }
|
|
532
|
+
parts << ""
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
if evaluation[:suggestions]&.any?
|
|
536
|
+
parts << "### Suggestions for Progress"
|
|
537
|
+
evaluation[:suggestions].each { |s| parts << "- #{s}" }
|
|
538
|
+
parts << ""
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
if evaluation[:recommended_actions]&.any?
|
|
542
|
+
parts << "### Recommended Actions"
|
|
543
|
+
evaluation[:recommended_actions].each do |action|
|
|
544
|
+
parts << "- [#{action[:priority]&.upcase || "MEDIUM"}] #{action[:action]}"
|
|
545
|
+
parts << " Rationale: #{action[:rationale]}" if action[:rationale]
|
|
546
|
+
end
|
|
547
|
+
parts << ""
|
|
334
548
|
end
|
|
549
|
+
|
|
550
|
+
parts << "### Next Steps"
|
|
551
|
+
parts << "Please address the above issues and either:"
|
|
552
|
+
parts << "1. Complete the remaining work and mark STATUS: COMPLETE"
|
|
553
|
+
parts << "2. File tasks for remaining work and complete them systematically"
|
|
554
|
+
parts << "3. If blocked, explain the blocker clearly in your response"
|
|
555
|
+
parts << ""
|
|
556
|
+
|
|
557
|
+
parts.join("\n")
|
|
335
558
|
end
|
|
336
559
|
|
|
337
560
|
def run_decider_agentic_unit(context)
|
|
@@ -802,18 +1025,27 @@ module Aidp
|
|
|
802
1025
|
# CRITICAL: Change to project directory before calling provider
|
|
803
1026
|
# This ensures Claude CLI runs in the correct directory and can create files
|
|
804
1027
|
Dir.chdir(@project_dir) do
|
|
805
|
-
#
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
1028
|
+
# Execute with sanitized environment (secrets stripped) when security is enabled
|
|
1029
|
+
# This ensures agent processes cannot access registered secrets directly
|
|
1030
|
+
execute_block = lambda do
|
|
1031
|
+
@provider_manager.execute_with_provider(
|
|
1032
|
+
provider_name,
|
|
1033
|
+
full_prompt,
|
|
1034
|
+
{
|
|
1035
|
+
step_name: @step_name,
|
|
1036
|
+
iteration: @iteration_count,
|
|
1037
|
+
project_dir: @project_dir,
|
|
1038
|
+
model: model_name,
|
|
1039
|
+
tier: @thinking_depth_manager.current_tier
|
|
1040
|
+
}
|
|
1041
|
+
)
|
|
1042
|
+
end
|
|
1043
|
+
|
|
1044
|
+
if @security_adapter.enabled?
|
|
1045
|
+
@security_adapter.with_sanitized_environment(&execute_block)
|
|
1046
|
+
else
|
|
1047
|
+
execute_block.call
|
|
1048
|
+
end
|
|
817
1049
|
end
|
|
818
1050
|
end
|
|
819
1051
|
|
|
@@ -947,7 +1179,9 @@ module Aidp
|
|
|
947
1179
|
[]
|
|
948
1180
|
end
|
|
949
1181
|
|
|
950
|
-
|
|
1182
|
+
# FIX for issue #391: Added completion_reason and task_completion_reason parameters for better logging
|
|
1183
|
+
def log_iteration_status(status, provider:, model:, prompt_length:, checks: nil, failures: nil, task_status: nil,
|
|
1184
|
+
completion_reason: nil, task_completion_reason: nil)
|
|
951
1185
|
context_labels = iteration_context_labels
|
|
952
1186
|
metadata = {
|
|
953
1187
|
step: @step_name,
|
|
@@ -959,7 +1193,9 @@ module Aidp
|
|
|
959
1193
|
prompt_length: prompt_length,
|
|
960
1194
|
checks: checks,
|
|
961
1195
|
failures: failures,
|
|
962
|
-
task_status: task_status
|
|
1196
|
+
task_status: task_status,
|
|
1197
|
+
completion_reason: completion_reason,
|
|
1198
|
+
task_completion_reason: task_completion_reason
|
|
963
1199
|
}
|
|
964
1200
|
|
|
965
1201
|
metadata.merge!(iteration_context_metadata)
|
|
@@ -975,17 +1211,19 @@ module Aidp
|
|
|
975
1211
|
Aidp.log_warn("work_loop", "failed_to_log_iteration_status", error: e.message)
|
|
976
1212
|
end
|
|
977
1213
|
|
|
1214
|
+
# FIX for issue #391: Enhanced work loop header with upfront task filing requirements
|
|
978
1215
|
def build_work_loop_header(step_name, iteration)
|
|
979
1216
|
parts = []
|
|
980
1217
|
parts << "# Work Loop: #{step_name} (Iteration #{iteration})"
|
|
981
1218
|
parts << ""
|
|
982
1219
|
parts << "## Instructions"
|
|
983
1220
|
parts << "You are working in a work loop. Your responsibilities:"
|
|
984
|
-
parts << "1.
|
|
985
|
-
parts << "2.
|
|
986
|
-
parts << "3.
|
|
987
|
-
parts << "4.
|
|
988
|
-
parts << "5.
|
|
1221
|
+
parts << "1. **FIRST**: File tasks for all work items (see Task Filing section below)"
|
|
1222
|
+
parts << "2. Read the task description below to understand what needs to be done"
|
|
1223
|
+
parts << "3. **Write/edit CODE files** to implement the required changes"
|
|
1224
|
+
parts << "4. Run tests to verify your changes work correctly"
|
|
1225
|
+
parts << "5. Update task status as you complete items"
|
|
1226
|
+
parts << "6. When ALL tasks are complete and tests pass, mark the step COMPLETE"
|
|
989
1227
|
parts << ""
|
|
990
1228
|
parts << "## Important Notes"
|
|
991
1229
|
parts << "- You have full file system access - create and edit files as needed"
|
|
@@ -993,29 +1231,53 @@ module Aidp
|
|
|
993
1231
|
parts << "- After you finish, tests and linters will run automatically"
|
|
994
1232
|
parts << "- If tests/linters fail, you'll see the errors in the next iteration and can fix them"
|
|
995
1233
|
parts << ""
|
|
1234
|
+
parts << "## ⚠️ Code Changes Required"
|
|
1235
|
+
parts << "**IMPORTANT**: This implementation requires actual code changes."
|
|
1236
|
+
parts << "- Documentation-only changes will NOT be accepted as complete"
|
|
1237
|
+
parts << "- Configuration-only changes will NOT be accepted as complete"
|
|
1238
|
+
parts << "- You must modify/create code files (.rb, .py, .js, etc.) to implement the feature/fix"
|
|
1239
|
+
parts << "- Tests should accompany code changes"
|
|
1240
|
+
parts << ""
|
|
996
1241
|
|
|
997
1242
|
if @config.task_completion_required?
|
|
998
|
-
parts << "## Task
|
|
999
|
-
parts << "**CRITICAL**: This work loop requires task tracking
|
|
1243
|
+
parts << "## Task Filing (REQUIRED - DO THIS FIRST)"
|
|
1244
|
+
parts << "**CRITICAL**: This work loop requires task tracking. You MUST file tasks before implementation."
|
|
1245
|
+
parts << ""
|
|
1246
|
+
parts << "### Step 1: File Tasks Immediately"
|
|
1247
|
+
parts << "In your FIRST iteration, analyze the requirements and file tasks for ALL work:"
|
|
1000
1248
|
parts << ""
|
|
1001
|
-
parts << "
|
|
1002
|
-
parts << "
|
|
1003
|
-
parts << "
|
|
1004
|
-
parts << "
|
|
1005
|
-
parts << "
|
|
1006
|
-
parts << "5. **IMPORTANT**: When you write STATUS: COMPLETE, also mark all your tasks as done!"
|
|
1249
|
+
parts << "```text"
|
|
1250
|
+
parts << "File task: \"Implement [feature/fix description]\" priority: high tags: implementation"
|
|
1251
|
+
parts << "File task: \"Add unit tests for [feature]\" priority: high tags: testing"
|
|
1252
|
+
parts << "File task: \"Add integration tests if needed\" priority: medium tags: testing"
|
|
1253
|
+
parts << "```"
|
|
1007
1254
|
parts << ""
|
|
1008
|
-
parts << "
|
|
1009
|
-
parts << "
|
|
1010
|
-
parts << "
|
|
1011
|
-
parts << "
|
|
1255
|
+
parts << "### Step 2: Work Through Tasks"
|
|
1256
|
+
parts << "- Pick the highest priority pending task"
|
|
1257
|
+
parts << "- Implement it completely"
|
|
1258
|
+
parts << "- Mark it done: `Update task: task_id status: done`"
|
|
1259
|
+
parts << "- Repeat until all tasks are complete"
|
|
1012
1260
|
parts << ""
|
|
1013
|
-
parts << "
|
|
1261
|
+
parts << "### Step 3: Complete the Work Loop"
|
|
1262
|
+
parts << "Only after ALL tasks are done:"
|
|
1263
|
+
parts << "- Verify tests pass"
|
|
1264
|
+
parts << "- Add STATUS: COMPLETE to PROMPT.md"
|
|
1265
|
+
parts << ""
|
|
1266
|
+
parts << "### Task Rules"
|
|
1267
|
+
parts << "- **At least ONE task must be filed** - completion blocked without tasks"
|
|
1268
|
+
parts << "- **At least ONE task must be DONE** - completion blocked if all abandoned"
|
|
1269
|
+
parts << "- **Substantive work required** - doc-only changes rejected"
|
|
1270
|
+
parts << ""
|
|
1271
|
+
parts << "**Important**: Tasks exist due to careful planning. Do NOT abandon tasks due to"
|
|
1272
|
+
parts << "perceived complexity - these factors were considered during planning. Only abandon"
|
|
1273
|
+
parts << "when truly obsolete (requirements changed, duplicate, external blockers)."
|
|
1274
|
+
parts << ""
|
|
1275
|
+
parts << "### Task Filing Examples"
|
|
1014
1276
|
parts << "- `File task: \"Implement user authentication\" priority: high tags: security,auth`"
|
|
1015
1277
|
parts << "- `File task: \"Add tests for login flow\" priority: medium tags: testing`"
|
|
1016
1278
|
parts << "- `File task: \"Update documentation\" priority: low tags: docs`"
|
|
1017
1279
|
parts << ""
|
|
1018
|
-
parts << "Task
|
|
1280
|
+
parts << "### Task Status Update Examples"
|
|
1019
1281
|
parts << "- `Update task: task_123_abc status: in_progress`"
|
|
1020
1282
|
parts << "- `Update task: task_456_def status: done`"
|
|
1021
1283
|
parts << "- `Update task: task_789_ghi status: abandoned reason: \"Requirements changed\"`"
|
|
@@ -1454,6 +1716,29 @@ module Aidp
|
|
|
1454
1716
|
display_message("")
|
|
1455
1717
|
end
|
|
1456
1718
|
|
|
1719
|
+
# Display security status for Rule of Two enforcement
|
|
1720
|
+
def display_security_status
|
|
1721
|
+
status = @security_adapter.status
|
|
1722
|
+
return unless status[:enabled]
|
|
1723
|
+
|
|
1724
|
+
display_message("\n🔒 Security (Rule of Two):", type: :info)
|
|
1725
|
+
display_message(" #{status[:status_string]}", type: :info)
|
|
1726
|
+
|
|
1727
|
+
if status[:state]
|
|
1728
|
+
state = status[:state]
|
|
1729
|
+
flags = []
|
|
1730
|
+
flags << "untrusted_input (#{state[:untrusted_input_source]})" if state[:untrusted_input]
|
|
1731
|
+
flags << "private_data (#{state[:private_data_source]})" if state[:private_data]
|
|
1732
|
+
flags << "egress (#{state[:egress_source]})" if state[:egress]
|
|
1733
|
+
|
|
1734
|
+
if flags.any?
|
|
1735
|
+
display_message(" Active flags: #{flags.join(", ")}", type: :info)
|
|
1736
|
+
end
|
|
1737
|
+
end
|
|
1738
|
+
|
|
1739
|
+
display_message("")
|
|
1740
|
+
end
|
|
1741
|
+
|
|
1457
1742
|
# Display pending tasks from persistent tasklist
|
|
1458
1743
|
def display_pending_tasks
|
|
1459
1744
|
pending_tasks = @persistent_tasklist.pending
|
|
@@ -1530,46 +1815,113 @@ module Aidp
|
|
|
1530
1815
|
end
|
|
1531
1816
|
|
|
1532
1817
|
# Check if tasks are required and all are completed or abandoned
|
|
1533
|
-
# Returns {complete: boolean, message: string}
|
|
1818
|
+
# Returns {complete: boolean, message: string, reason: string}
|
|
1534
1819
|
# Note: Tasks are project-scoped, not session-scoped. This allows tasks created
|
|
1535
1820
|
# in planning phases to be completed in build phases.
|
|
1821
|
+
#
|
|
1822
|
+
# FIX for issue #391: Prevent premature completion when tasks haven't been created
|
|
1823
|
+
# The previous logic allowed completion with empty task list, which enabled
|
|
1824
|
+
# the work loop to complete before actually implementing anything.
|
|
1536
1825
|
def check_task_completion
|
|
1537
|
-
|
|
1826
|
+
Aidp.log_debug("work_loop", "check_task_completion_start",
|
|
1827
|
+
task_completion_required: @config.task_completion_required?,
|
|
1828
|
+
iteration: @iteration_count)
|
|
1829
|
+
|
|
1830
|
+
unless @config.task_completion_required?
|
|
1831
|
+
Aidp.log_debug("work_loop", "check_task_completion_skipped",
|
|
1832
|
+
reason: "task_completion_not_required")
|
|
1833
|
+
return {complete: true, message: nil, reason: "task_completion_not_required"}
|
|
1834
|
+
end
|
|
1538
1835
|
|
|
1539
1836
|
all_tasks = @persistent_tasklist.all
|
|
1540
1837
|
|
|
1541
|
-
|
|
1542
|
-
|
|
1838
|
+
Aidp.log_debug("work_loop", "check_task_completion_task_count",
|
|
1839
|
+
total_tasks: all_tasks.size,
|
|
1840
|
+
task_ids: all_tasks.map(&:id))
|
|
1841
|
+
|
|
1842
|
+
# FIX for issue #391: Require at least one task when task_completion is enabled
|
|
1843
|
+
# Empty task list now blocks completion to prevent premature PR creation
|
|
1844
|
+
# This ensures the agent has actually created and completed work items
|
|
1543
1845
|
if all_tasks.empty?
|
|
1544
|
-
|
|
1846
|
+
Aidp.log_debug("work_loop", "check_task_completion_empty_tasks",
|
|
1847
|
+
reason: "no_tasks_filed",
|
|
1848
|
+
iteration: @iteration_count)
|
|
1849
|
+
|
|
1850
|
+
# After multiple iterations, require tasks - agent should have filed some by now
|
|
1851
|
+
if @iteration_count >= 3
|
|
1852
|
+
return {
|
|
1853
|
+
complete: false,
|
|
1854
|
+
message: "No tasks have been filed yet. You must create at least one task using:\n" \
|
|
1855
|
+
" File task: \"description\" priority: high|medium|low tags: tag1,tag2\n\n" \
|
|
1856
|
+
"Tasks help track progress and ensure complete implementation.",
|
|
1857
|
+
reason: "no_tasks_after_iterations"
|
|
1858
|
+
}
|
|
1859
|
+
end
|
|
1860
|
+
|
|
1861
|
+
# In early iterations, allow progress but don't allow completion
|
|
1862
|
+
return {
|
|
1863
|
+
complete: false,
|
|
1864
|
+
message: "Please file tasks to track your implementation work.",
|
|
1865
|
+
reason: "no_tasks_early_iteration"
|
|
1866
|
+
}
|
|
1545
1867
|
end
|
|
1546
1868
|
|
|
1547
1869
|
# Count tasks by status
|
|
1548
1870
|
pending_tasks = all_tasks.select { |t| t.status == :pending }
|
|
1549
1871
|
in_progress_tasks = all_tasks.select { |t| t.status == :in_progress }
|
|
1550
1872
|
abandoned_tasks = all_tasks.select { |t| t.status == :abandoned }
|
|
1551
|
-
all_tasks.select { |t| t.status == :done }
|
|
1873
|
+
done_tasks = all_tasks.select { |t| t.status == :done }
|
|
1874
|
+
|
|
1875
|
+
Aidp.log_debug("work_loop", "check_task_completion_status_counts",
|
|
1876
|
+
pending: pending_tasks.size,
|
|
1877
|
+
in_progress: in_progress_tasks.size,
|
|
1878
|
+
abandoned: abandoned_tasks.size,
|
|
1879
|
+
done: done_tasks.size)
|
|
1552
1880
|
|
|
1553
1881
|
# If tasks exist, all must be done or abandoned before completion
|
|
1554
1882
|
incomplete_tasks = pending_tasks + in_progress_tasks
|
|
1555
1883
|
|
|
1556
1884
|
if incomplete_tasks.any?
|
|
1557
1885
|
task_list = incomplete_tasks.map { |t| "- #{t.description} (#{t.status}, session: #{t.session})" }.join("\n")
|
|
1886
|
+
Aidp.log_debug("work_loop", "check_task_completion_incomplete",
|
|
1887
|
+
incomplete_count: incomplete_tasks.size,
|
|
1888
|
+
incomplete_ids: incomplete_tasks.map(&:id))
|
|
1889
|
+
return {
|
|
1890
|
+
complete: false,
|
|
1891
|
+
message: "Tasks remain incomplete:\n#{task_list}\n\nComplete all tasks or abandon them with reason before marking work complete.",
|
|
1892
|
+
reason: "incomplete_tasks"
|
|
1893
|
+
}
|
|
1894
|
+
end
|
|
1895
|
+
|
|
1896
|
+
# FIX for issue #391: Require at least one done task, not just abandoned
|
|
1897
|
+
# This prevents scenarios where all tasks are abandoned without any work
|
|
1898
|
+
if done_tasks.empty? && abandoned_tasks.any?
|
|
1899
|
+
Aidp.log_debug("work_loop", "check_task_completion_all_abandoned",
|
|
1900
|
+
abandoned_count: abandoned_tasks.size)
|
|
1558
1901
|
return {
|
|
1559
1902
|
complete: false,
|
|
1560
|
-
message: "
|
|
1903
|
+
message: "All tasks have been abandoned with no completed work. " \
|
|
1904
|
+
"At least one task must be completed, or explain why no implementation is needed.",
|
|
1905
|
+
reason: "all_tasks_abandoned"
|
|
1561
1906
|
}
|
|
1562
1907
|
end
|
|
1563
1908
|
|
|
1564
1909
|
# If there are abandoned tasks, confirm with user
|
|
1565
1910
|
if abandoned_tasks.any? && !all_abandoned_tasks_confirmed?(abandoned_tasks)
|
|
1911
|
+
Aidp.log_debug("work_loop", "check_task_completion_unconfirmed_abandoned",
|
|
1912
|
+
abandoned_count: abandoned_tasks.size)
|
|
1566
1913
|
return {
|
|
1567
1914
|
complete: false,
|
|
1568
|
-
message: "Abandoned tasks require user confirmation. Please confirm abandoned tasks."
|
|
1915
|
+
message: "Abandoned tasks require user confirmation. Please confirm abandoned tasks.",
|
|
1916
|
+
reason: "unconfirmed_abandoned_tasks"
|
|
1569
1917
|
}
|
|
1570
1918
|
end
|
|
1571
1919
|
|
|
1572
|
-
|
|
1920
|
+
Aidp.log_debug("work_loop", "check_task_completion_success",
|
|
1921
|
+
done_count: done_tasks.size,
|
|
1922
|
+
abandoned_count: abandoned_tasks.size)
|
|
1923
|
+
|
|
1924
|
+
{complete: true, message: nil, reason: "all_tasks_complete"}
|
|
1573
1925
|
end
|
|
1574
1926
|
|
|
1575
1927
|
# Check if all abandoned tasks have been confirmed
|
|
@@ -18,7 +18,10 @@ module Aidp
|
|
|
18
18
|
error: "ERROR"
|
|
19
19
|
}.freeze
|
|
20
20
|
|
|
21
|
-
attr_reader :
|
|
21
|
+
attr_reader :iteration, :queued_instructions, :last_error
|
|
22
|
+
|
|
23
|
+
# Expose current_state for testability (use state transition methods in production)
|
|
24
|
+
attr_accessor :current_state
|
|
22
25
|
|
|
23
26
|
def initialize
|
|
24
27
|
super # Initialize MonitorMixin
|