aidp 0.33.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +35 -0
  3. data/lib/aidp/analyze/tree_sitter_scan.rb +3 -0
  4. data/lib/aidp/cli/eval_command.rb +399 -0
  5. data/lib/aidp/cli/harness_command.rb +1 -1
  6. data/lib/aidp/cli/security_command.rb +416 -0
  7. data/lib/aidp/cli/tools_command.rb +6 -4
  8. data/lib/aidp/cli.rb +170 -3
  9. data/lib/aidp/concurrency/exec.rb +3 -0
  10. data/lib/aidp/config.rb +113 -0
  11. data/lib/aidp/config_paths.rb +20 -0
  12. data/lib/aidp/daemon/runner.rb +8 -4
  13. data/lib/aidp/errors.rb +134 -0
  14. data/lib/aidp/evaluations/context_capture.rb +205 -0
  15. data/lib/aidp/evaluations/evaluation_record.rb +114 -0
  16. data/lib/aidp/evaluations/evaluation_storage.rb +250 -0
  17. data/lib/aidp/evaluations.rb +23 -0
  18. data/lib/aidp/execute/async_work_loop_runner.rb +4 -1
  19. data/lib/aidp/execute/interactive_repl.rb +6 -2
  20. data/lib/aidp/execute/prompt_evaluator.rb +359 -0
  21. data/lib/aidp/execute/repl_macros.rb +100 -1
  22. data/lib/aidp/execute/work_loop_runner.rb +399 -47
  23. data/lib/aidp/execute/work_loop_state.rb +4 -1
  24. data/lib/aidp/execute/workflow_selector.rb +3 -0
  25. data/lib/aidp/harness/ai_decision_engine.rb +79 -0
  26. data/lib/aidp/harness/capability_registry.rb +2 -0
  27. data/lib/aidp/harness/condition_detector.rb +3 -0
  28. data/lib/aidp/harness/config_loader.rb +3 -0
  29. data/lib/aidp/harness/enhanced_runner.rb +14 -11
  30. data/lib/aidp/harness/error_handler.rb +3 -0
  31. data/lib/aidp/harness/provider_factory.rb +3 -0
  32. data/lib/aidp/harness/provider_manager.rb +6 -0
  33. data/lib/aidp/harness/runner.rb +5 -1
  34. data/lib/aidp/harness/state/persistence.rb +3 -0
  35. data/lib/aidp/harness/state_manager.rb +3 -0
  36. data/lib/aidp/harness/status_display.rb +28 -20
  37. data/lib/aidp/harness/thinking_depth_manager.rb +32 -32
  38. data/lib/aidp/harness/ui/enhanced_tui.rb +4 -0
  39. data/lib/aidp/harness/ui/enhanced_workflow_selector.rb +4 -0
  40. data/lib/aidp/harness/ui/error_handler.rb +3 -0
  41. data/lib/aidp/harness/ui/job_monitor.rb +4 -0
  42. data/lib/aidp/harness/ui/navigation/submenu.rb +2 -0
  43. data/lib/aidp/harness/ui/navigation/workflow_selector.rb +6 -0
  44. data/lib/aidp/harness/ui/spinner_helper.rb +3 -0
  45. data/lib/aidp/harness/ui/workflow_controller.rb +3 -0
  46. data/lib/aidp/harness/user_interface.rb +3 -0
  47. data/lib/aidp/loader.rb +2 -2
  48. data/lib/aidp/logger.rb +3 -0
  49. data/lib/aidp/message_display.rb +31 -0
  50. data/lib/aidp/pr_worktree_manager.rb +18 -6
  51. data/lib/aidp/provider_manager.rb +3 -0
  52. data/lib/aidp/providers/base.rb +2 -0
  53. data/lib/aidp/security/rule_of_two_enforcer.rb +210 -0
  54. data/lib/aidp/security/secrets_proxy.rb +328 -0
  55. data/lib/aidp/security/secrets_registry.rb +227 -0
  56. data/lib/aidp/security/trifecta_state.rb +220 -0
  57. data/lib/aidp/security/watch_mode_handler.rb +306 -0
  58. data/lib/aidp/security/work_loop_adapter.rb +277 -0
  59. data/lib/aidp/security.rb +56 -0
  60. data/lib/aidp/setup/wizard.rb +4 -2
  61. data/lib/aidp/version.rb +1 -1
  62. data/lib/aidp/watch/auto_merger.rb +274 -0
  63. data/lib/aidp/watch/auto_pr_processor.rb +125 -7
  64. data/lib/aidp/watch/build_processor.rb +16 -1
  65. data/lib/aidp/watch/change_request_processor.rb +680 -286
  66. data/lib/aidp/watch/ci_fix_processor.rb +262 -4
  67. data/lib/aidp/watch/feedback_collector.rb +191 -0
  68. data/lib/aidp/watch/hierarchical_pr_strategy.rb +256 -0
  69. data/lib/aidp/watch/implementation_verifier.rb +142 -1
  70. data/lib/aidp/watch/plan_generator.rb +70 -13
  71. data/lib/aidp/watch/plan_processor.rb +12 -5
  72. data/lib/aidp/watch/projects_processor.rb +286 -0
  73. data/lib/aidp/watch/repository_client.rb +861 -53
  74. data/lib/aidp/watch/review_processor.rb +33 -6
  75. data/lib/aidp/watch/runner.rb +51 -11
  76. data/lib/aidp/watch/state_store.rb +233 -0
  77. data/lib/aidp/watch/sub_issue_creator.rb +221 -0
  78. data/lib/aidp/workflows/guided_agent.rb +4 -0
  79. data/lib/aidp/workstream_executor.rb +3 -0
  80. data/lib/aidp/worktree.rb +61 -11
  81. data/lib/aidp/worktree_branch_manager.rb +347 -101
  82. data/templates/implementation/iterative_implementation.md +46 -3
  83. metadata +20 -1
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "prompt_manager"
4
+ require_relative "prompt_evaluator"
4
5
  require_relative "checkpoint"
5
6
  require_relative "checkpoint_display"
6
7
  require_relative "guard_policy"
@@ -11,6 +12,7 @@ require_relative "steps"
11
12
  require_relative "../harness/test_runner"
12
13
  require_relative "../errors"
13
14
  require_relative "../style_guide/selector"
15
+ require_relative "../security"
14
16
 
15
17
  module Aidp
16
18
  module Execute
@@ -39,7 +41,10 @@ module Aidp
39
41
  }.freeze
40
42
  include Aidp::MessageDisplay
41
43
 
42
- attr_reader :iteration_count, :project_dir, :current_state
44
+ # Expose state for testability
45
+ attr_accessor :iteration_count, :step_name, :options, :persistent_tasklist
46
+ attr_reader :project_dir, :current_state, :state_history, :test_runner, :prompt_manager, :checkpoint
47
+ attr_writer :guard_policy, :prompt_manager, :style_guide_selector
43
48
 
44
49
  MAX_ITERATIONS = 50 # Safety limit
45
50
  CHECKPOINT_INTERVAL = 5 # Record checkpoint every N iterations
@@ -67,12 +72,18 @@ module Aidp
67
72
 
68
73
  # Initialize thinking depth manager for intelligent model selection
69
74
  require_relative "../harness/thinking_depth_manager"
70
- @thinking_depth_manager = options[:thinking_depth_manager] || Aidp::Harness::ThinkingDepthManager.new(config)
75
+ @thinking_depth_manager = options[:thinking_depth_manager] || Aidp::Harness::ThinkingDepthManager.new(config, root_dir: @project_dir)
71
76
  @consecutive_failures = 0
72
77
  @last_tier = nil
73
78
 
74
79
  # Initialize style guide selector for intelligent section selection
75
80
  @style_guide_selector = options[:style_guide_selector] || Aidp::StyleGuide::Selector.new(project_dir: project_dir)
81
+
82
+ # FIX for issue #391: Initialize prompt evaluator for iteration threshold assessment
83
+ @prompt_evaluator = options[:prompt_evaluator] || PromptEvaluator.new(config)
84
+
85
+ # Initialize security adapter for Rule of Two enforcement
86
+ @security_adapter = options[:security_adapter] || Aidp::Security::WorkLoopAdapter.new(project_dir: project_dir)
76
87
  end
77
88
 
78
89
  # Execute a step using fix-forward work loop pattern
@@ -143,6 +154,11 @@ module Aidp
143
154
  @current_state = :ready
144
155
  @state_history.clear
145
156
 
157
+ # Begin security tracking for this agentic work unit
158
+ work_unit_id = "agentic_#{@step_name}_#{SecureRandom.hex(4)}"
159
+ @security_adapter.begin_work_unit(work_unit_id: work_unit_id, context: context)
160
+ display_security_status
161
+
146
162
  create_initial_prompt(step_spec, context)
147
163
 
148
164
  loop do
@@ -154,6 +170,10 @@ module Aidp
154
170
  display_message("⚠️ Max iterations (#{MAX_ITERATIONS}) reached for #{@step_name}", type: :warning)
155
171
  display_state_summary
156
172
  archive_and_cleanup
173
+
174
+ # End security tracking for this work unit
175
+ @security_adapter.end_work_unit
176
+
157
177
  return build_agentic_payload(
158
178
  agent_result: nil,
159
179
  response: build_max_iterations_result,
@@ -178,6 +198,30 @@ module Aidp
178
198
  prompt_length: prompt_length,
179
199
  checks: checks_summary)
180
200
 
201
+ # Check security policy before agent call (Rule of Two enforcement)
202
+ # Agent calls enable egress capability
203
+ begin
204
+ @security_adapter.check_agent_call_allowed!(operation: :agent_execution)
205
+ rescue Aidp::Security::PolicyViolation => e
206
+ # Security policy violation - cannot proceed with agent call
207
+ Aidp.logger.error("work_loop", "Security policy violation",
208
+ step: @step_name,
209
+ iteration: @iteration_count,
210
+ error: e.message)
211
+ display_message(" 🛡️ Security policy violation: #{e.message}", type: :error)
212
+ display_message(" Cannot proceed - Rule of Two would be violated", type: :error)
213
+
214
+ # End security tracking and return error
215
+ @security_adapter.end_work_unit
216
+ return build_agentic_payload(
217
+ agent_result: nil,
218
+ response: {status: "error", message: "Security policy violation: #{e.message}"},
219
+ summary: nil,
220
+ completed: false,
221
+ terminate: true
222
+ )
223
+ end
224
+
181
225
  # Wrap agent call in exception handling for true fix-forward
182
226
  begin
183
227
  agent_result = apply_patch(preview_provider, preview_model)
@@ -185,6 +229,15 @@ module Aidp
185
229
  # Configuration errors should crash immediately (crash-early principle)
186
230
  # Re-raise without catching
187
231
  raise
232
+ rescue Aidp::Security::PolicyViolation => e
233
+ # Security violations should not continue - they are policy failures
234
+ Aidp.logger.error("work_loop", "Security policy violation during agent call",
235
+ step: @step_name,
236
+ iteration: @iteration_count,
237
+ error: e.message)
238
+ display_message(" 🛡️ Security violation: #{e.message}", type: :error)
239
+ @security_adapter.end_work_unit
240
+ raise
188
241
  rescue => e
189
242
  # Convert exception to error result for fix-forward handling
190
243
  Aidp.logger.error("work_loop", "Exception during agent call",
@@ -263,13 +316,31 @@ module Aidp
263
316
 
264
317
  # Check task completion status
265
318
  task_completion_result = check_task_completion
319
+ agent_completed = agent_marked_complete?(agent_result)
320
+
321
+ # FIX for issue #391: Comprehensive logging at completion decision point
322
+ Aidp.log_debug("work_loop", "completion_decision_point",
323
+ iteration: @iteration_count,
324
+ all_checks_pass: all_checks_pass,
325
+ agent_marked_complete: agent_completed,
326
+ task_completion_complete: task_completion_result[:complete],
327
+ task_completion_reason: task_completion_result[:reason],
328
+ test_success: test_results[:success],
329
+ lint_success: lint_results[:success],
330
+ formatter_success: formatter_results[:success],
331
+ build_success: build_results[:success],
332
+ doc_success: doc_results[:success])
266
333
 
267
334
  if all_checks_pass
268
335
  transition_to(:pass)
269
336
 
270
- if agent_marked_complete?(agent_result)
337
+ if agent_completed
271
338
  # Check if tasks are complete
272
339
  if task_completion_result[:complete]
340
+ Aidp.log_debug("work_loop", "completion_approved",
341
+ iteration: @iteration_count,
342
+ reason: task_completion_result[:reason])
343
+
273
344
  transition_to(:done)
274
345
  record_final_checkpoint(all_results)
275
346
  display_task_summary
@@ -280,9 +351,13 @@ module Aidp
280
351
  model: preview_model,
281
352
  prompt_length: prompt_length,
282
353
  checks: checks_summary,
283
- task_status: "complete")
354
+ task_status: "complete",
355
+ completion_reason: task_completion_result[:reason])
284
356
  archive_and_cleanup
285
357
 
358
+ # End security tracking for this work unit
359
+ @security_adapter.end_work_unit
360
+
286
361
  return build_agentic_payload(
287
362
  agent_result: agent_result,
288
363
  response: build_success_result(agent_result),
@@ -292,6 +367,11 @@ module Aidp
292
367
  )
293
368
  else
294
369
  # All checks passed but tasks not complete
370
+ Aidp.log_debug("work_loop", "completion_blocked_tasks_incomplete",
371
+ iteration: @iteration_count,
372
+ reason: task_completion_result[:reason],
373
+ message: task_completion_result[:message])
374
+
295
375
  display_message(" All checks passed but tasks not complete", type: :warning)
296
376
  display_message(" #{task_completion_result[:message]}", type: :warning)
297
377
  display_task_summary
@@ -300,13 +380,17 @@ module Aidp
300
380
  model: preview_model,
301
381
  prompt_length: prompt_length,
302
382
  checks: checks_summary,
303
- task_status: "incomplete")
383
+ task_status: "incomplete",
384
+ task_completion_reason: task_completion_result[:reason])
304
385
  transition_to(:next_patch)
305
386
 
306
387
  # Append task completion requirement to PROMPT.md
307
388
  append_task_requirement_to_prompt(task_completion_result[:message])
308
389
  end
309
390
  else
391
+ Aidp.log_debug("work_loop", "completion_blocked_agent_not_complete",
392
+ iteration: @iteration_count)
393
+
310
394
  display_message(" All checks passed but work not marked complete", type: :info)
311
395
  log_iteration_status("checks_passed_waiting_agent_completion",
312
396
  provider: preview_provider,
@@ -331,7 +415,146 @@ module Aidp
331
415
  failures: failure_summary_for_log(all_results))
332
416
  prepare_next_iteration(all_results, diagnostic)
333
417
  end
418
+
419
+ # FIX for issue #391: Evaluate prompt effectiveness at iteration thresholds
420
+ # After 10+ iterations, assess whether the prompt is leading to progress
421
+ evaluate_prompt_effectiveness(all_results)
422
+ end
423
+ end
424
+
425
+ # Evaluate prompt effectiveness at iteration thresholds
426
+ # FIX for issue #391: Provides feedback when work loop is stuck
427
+ # Note: Errors during evaluation are logged but don't fail the work loop
428
+ def evaluate_prompt_effectiveness(all_results)
429
+ return unless @prompt_evaluator.should_evaluate?(@iteration_count)
430
+
431
+ Aidp.log_debug("work_loop", "evaluating_prompt_effectiveness",
432
+ iteration: @iteration_count)
433
+
434
+ display_message("📊 Evaluating prompt effectiveness (iteration #{@iteration_count})...", type: :info)
435
+
436
+ task_summary = build_task_summary_for_evaluation
437
+ prompt_content = @prompt_manager.read
438
+
439
+ evaluation = @prompt_evaluator.evaluate(
440
+ prompt_content: prompt_content,
441
+ iteration_count: @iteration_count,
442
+ task_summary: task_summary,
443
+ recent_failures: all_results,
444
+ step_name: @step_name
445
+ )
446
+
447
+ display_prompt_evaluation_results(evaluation)
448
+
449
+ # If prompt is deemed ineffective, append suggestions to PROMPT.md
450
+ unless evaluation[:effective]
451
+ append_evaluation_feedback_to_prompt(evaluation)
452
+ end
453
+
454
+ Aidp.log_info("work_loop", "prompt_evaluation_complete",
455
+ iteration: @iteration_count,
456
+ effective: evaluation[:effective],
457
+ confidence: evaluation[:confidence])
458
+ rescue => e
459
+ # Don't let evaluation errors break the work loop
460
+ Aidp.log_warn("work_loop", "prompt_evaluation_error",
461
+ iteration: @iteration_count,
462
+ error: e.message,
463
+ error_class: e.class.name)
464
+ display_message(" ⚠️ Prompt evaluation skipped due to error: #{e.message}", type: :muted)
465
+ end
466
+
467
+ def build_task_summary_for_evaluation
468
+ all_tasks = @persistent_tasklist.all
469
+ return {} if all_tasks.empty?
470
+
471
+ {
472
+ total: all_tasks.size,
473
+ done: all_tasks.count { |t| t.status == :done },
474
+ in_progress: all_tasks.count { |t| t.status == :in_progress },
475
+ pending: all_tasks.count { |t| t.status == :pending },
476
+ abandoned: all_tasks.count { |t| t.status == :abandoned }
477
+ }
478
+ end
479
+
480
+ def display_prompt_evaluation_results(evaluation)
481
+ # Skip display if evaluation was skipped
482
+ if evaluation[:skipped]
483
+ display_message(" ℹ️ Prompt evaluation skipped: #{evaluation[:skip_reason]}", type: :muted)
484
+ return
485
+ end
486
+
487
+ if evaluation[:effective]
488
+ display_message(" ✅ Prompt appears effective, continuing...", type: :success)
489
+ else
490
+ display_message(" ⚠️ Prompt may need improvement:", type: :warning)
491
+
492
+ if evaluation[:issues]&.any?
493
+ display_message(" Issues identified:", type: :info)
494
+ evaluation[:issues].each { |issue| display_message(" - #{issue}", type: :warning) }
495
+ end
496
+
497
+ if evaluation[:suggestions]&.any?
498
+ display_message(" Suggestions:", type: :info)
499
+ evaluation[:suggestions].take(3).each { |s| display_message(" - #{s}", type: :info) }
500
+ end
501
+
502
+ if evaluation[:likely_blockers]&.any?
503
+ display_message(" Likely blockers:", type: :warning)
504
+ evaluation[:likely_blockers].each { |b| display_message(" - #{b}", type: :error) }
505
+ end
506
+ end
507
+
508
+ display_message(" Confidence: #{(evaluation[:confidence] * 100).round}%", type: :muted)
509
+ end
510
+
511
+ def append_evaluation_feedback_to_prompt(evaluation)
512
+ feedback_section = build_evaluation_feedback_section(evaluation)
513
+
514
+ @prompt_manager.append(feedback_section)
515
+
516
+ Aidp.log_debug("work_loop", "appended_evaluation_feedback",
517
+ iteration: @iteration_count,
518
+ feedback_size: feedback_section.length)
519
+ end
520
+
521
+ def build_evaluation_feedback_section(evaluation)
522
+ parts = []
523
+ parts << "\n\n## ⚠️ Work Loop Progress Assessment (Iteration #{@iteration_count})"
524
+ parts << ""
525
+ parts << "The work loop has been running for #{@iteration_count} iterations without completion."
526
+ parts << "An automated assessment identified the following:"
527
+ parts << ""
528
+
529
+ if evaluation[:issues]&.any?
530
+ parts << "### Issues Identified"
531
+ evaluation[:issues].each { |i| parts << "- #{i}" }
532
+ parts << ""
533
+ end
534
+
535
+ if evaluation[:suggestions]&.any?
536
+ parts << "### Suggestions for Progress"
537
+ evaluation[:suggestions].each { |s| parts << "- #{s}" }
538
+ parts << ""
539
+ end
540
+
541
+ if evaluation[:recommended_actions]&.any?
542
+ parts << "### Recommended Actions"
543
+ evaluation[:recommended_actions].each do |action|
544
+ parts << "- [#{action[:priority]&.upcase || "MEDIUM"}] #{action[:action]}"
545
+ parts << " Rationale: #{action[:rationale]}" if action[:rationale]
546
+ end
547
+ parts << ""
334
548
  end
549
+
550
+ parts << "### Next Steps"
551
+ parts << "Please address the above issues and either:"
552
+ parts << "1. Complete the remaining work and mark STATUS: COMPLETE"
553
+ parts << "2. File tasks for remaining work and complete them systematically"
554
+ parts << "3. If blocked, explain the blocker clearly in your response"
555
+ parts << ""
556
+
557
+ parts.join("\n")
335
558
  end
336
559
 
337
560
  def run_decider_agentic_unit(context)
@@ -802,18 +1025,27 @@ module Aidp
802
1025
  # CRITICAL: Change to project directory before calling provider
803
1026
  # This ensures Claude CLI runs in the correct directory and can create files
804
1027
  Dir.chdir(@project_dir) do
805
- # Send to provider via provider_manager with selected model
806
- @provider_manager.execute_with_provider(
807
- provider_name,
808
- full_prompt,
809
- {
810
- step_name: @step_name,
811
- iteration: @iteration_count,
812
- project_dir: @project_dir,
813
- model: model_name,
814
- tier: @thinking_depth_manager.current_tier
815
- }
816
- )
1028
+ # Execute with sanitized environment (secrets stripped) when security is enabled
1029
+ # This ensures agent processes cannot access registered secrets directly
1030
+ execute_block = lambda do
1031
+ @provider_manager.execute_with_provider(
1032
+ provider_name,
1033
+ full_prompt,
1034
+ {
1035
+ step_name: @step_name,
1036
+ iteration: @iteration_count,
1037
+ project_dir: @project_dir,
1038
+ model: model_name,
1039
+ tier: @thinking_depth_manager.current_tier
1040
+ }
1041
+ )
1042
+ end
1043
+
1044
+ if @security_adapter.enabled?
1045
+ @security_adapter.with_sanitized_environment(&execute_block)
1046
+ else
1047
+ execute_block.call
1048
+ end
817
1049
  end
818
1050
  end
819
1051
 
@@ -947,7 +1179,9 @@ module Aidp
947
1179
  []
948
1180
  end
949
1181
 
950
- def log_iteration_status(status, provider:, model:, prompt_length:, checks: nil, failures: nil, task_status: nil)
1182
+ # FIX for issue #391: Added completion_reason and task_completion_reason parameters for better logging
1183
+ def log_iteration_status(status, provider:, model:, prompt_length:, checks: nil, failures: nil, task_status: nil,
1184
+ completion_reason: nil, task_completion_reason: nil)
951
1185
  context_labels = iteration_context_labels
952
1186
  metadata = {
953
1187
  step: @step_name,
@@ -959,7 +1193,9 @@ module Aidp
959
1193
  prompt_length: prompt_length,
960
1194
  checks: checks,
961
1195
  failures: failures,
962
- task_status: task_status
1196
+ task_status: task_status,
1197
+ completion_reason: completion_reason,
1198
+ task_completion_reason: task_completion_reason
963
1199
  }
964
1200
 
965
1201
  metadata.merge!(iteration_context_metadata)
@@ -975,17 +1211,19 @@ module Aidp
975
1211
  Aidp.log_warn("work_loop", "failed_to_log_iteration_status", error: e.message)
976
1212
  end
977
1213
 
1214
+ # FIX for issue #391: Enhanced work loop header with upfront task filing requirements
978
1215
  def build_work_loop_header(step_name, iteration)
979
1216
  parts = []
980
1217
  parts << "# Work Loop: #{step_name} (Iteration #{iteration})"
981
1218
  parts << ""
982
1219
  parts << "## Instructions"
983
1220
  parts << "You are working in a work loop. Your responsibilities:"
984
- parts << "1. Read the task description below to understand what needs to be done"
985
- parts << "2. **Write/edit code files** to implement the required changes"
986
- parts << "3. Run tests to verify your changes work correctly"
987
- parts << "4. Update the task list in PROMPT.md as you complete items"
988
- parts << "5. When ALL tasks are complete and tests pass, mark the step COMPLETE"
1221
+ parts << "1. **FIRST**: File tasks for all work items (see Task Filing section below)"
1222
+ parts << "2. Read the task description below to understand what needs to be done"
1223
+ parts << "3. **Write/edit CODE files** to implement the required changes"
1224
+ parts << "4. Run tests to verify your changes work correctly"
1225
+ parts << "5. Update task status as you complete items"
1226
+ parts << "6. When ALL tasks are complete and tests pass, mark the step COMPLETE"
989
1227
  parts << ""
990
1228
  parts << "## Important Notes"
991
1229
  parts << "- You have full file system access - create and edit files as needed"
@@ -993,29 +1231,53 @@ module Aidp
993
1231
  parts << "- After you finish, tests and linters will run automatically"
994
1232
  parts << "- If tests/linters fail, you'll see the errors in the next iteration and can fix them"
995
1233
  parts << ""
1234
+ parts << "## ⚠️ Code Changes Required"
1235
+ parts << "**IMPORTANT**: This implementation requires actual code changes."
1236
+ parts << "- Documentation-only changes will NOT be accepted as complete"
1237
+ parts << "- Configuration-only changes will NOT be accepted as complete"
1238
+ parts << "- You must modify/create code files (.rb, .py, .js, etc.) to implement the feature/fix"
1239
+ parts << "- Tests should accompany code changes"
1240
+ parts << ""
996
1241
 
997
1242
  if @config.task_completion_required?
998
- parts << "## Task Tracking (REQUIRED)"
999
- parts << "**CRITICAL**: This work loop requires task tracking for completion."
1243
+ parts << "## Task Filing (REQUIRED - DO THIS FIRST)"
1244
+ parts << "**CRITICAL**: This work loop requires task tracking. You MUST file tasks before implementation."
1245
+ parts << ""
1246
+ parts << "### Step 1: File Tasks Immediately"
1247
+ parts << "In your FIRST iteration, analyze the requirements and file tasks for ALL work:"
1000
1248
  parts << ""
1001
- parts << "You must:"
1002
- parts << "1. Create at least one task for this session using: `File task: \"description\"`"
1003
- parts << "2. Track all work items as tasks"
1004
- parts << "3. Update task status as you progress"
1005
- parts << "4. All tasks must be DONE or ABANDONED (with reason) before completion"
1006
- parts << "5. **IMPORTANT**: When you write STATUS: COMPLETE, also mark all your tasks as done!"
1249
+ parts << "```text"
1250
+ parts << "File task: \"Implement [feature/fix description]\" priority: high tags: implementation"
1251
+ parts << "File task: \"Add unit tests for [feature]\" priority: high tags: testing"
1252
+ parts << "File task: \"Add integration tests if needed\" priority: medium tags: testing"
1253
+ parts << "```"
1007
1254
  parts << ""
1008
- parts << "**Important**: Tasks in the list exist due to careful planning and requirements analysis."
1009
- parts << "Do NOT abandon tasks due to perceived complexity or scope concerns - these factors were"
1010
- parts << "considered during planning. Only abandon tasks when truly obsolete (requirements changed,"
1011
- parts << "duplicate work, external blockers). When in doubt, mark in_progress and implement."
1255
+ parts << "### Step 2: Work Through Tasks"
1256
+ parts << "- Pick the highest priority pending task"
1257
+ parts << "- Implement it completely"
1258
+ parts << "- Mark it done: `Update task: task_id status: done`"
1259
+ parts << "- Repeat until all tasks are complete"
1012
1260
  parts << ""
1013
- parts << "Task filing examples:"
1261
+ parts << "### Step 3: Complete the Work Loop"
1262
+ parts << "Only after ALL tasks are done:"
1263
+ parts << "- Verify tests pass"
1264
+ parts << "- Add STATUS: COMPLETE to PROMPT.md"
1265
+ parts << ""
1266
+ parts << "### Task Rules"
1267
+ parts << "- **At least ONE task must be filed** - completion blocked without tasks"
1268
+ parts << "- **At least ONE task must be DONE** - completion blocked if all abandoned"
1269
+ parts << "- **Substantive work required** - doc-only changes rejected"
1270
+ parts << ""
1271
+ parts << "**Important**: Tasks exist due to careful planning. Do NOT abandon tasks due to"
1272
+ parts << "perceived complexity - these factors were considered during planning. Only abandon"
1273
+ parts << "when truly obsolete (requirements changed, duplicate, external blockers)."
1274
+ parts << ""
1275
+ parts << "### Task Filing Examples"
1014
1276
  parts << "- `File task: \"Implement user authentication\" priority: high tags: security,auth`"
1015
1277
  parts << "- `File task: \"Add tests for login flow\" priority: medium tags: testing`"
1016
1278
  parts << "- `File task: \"Update documentation\" priority: low tags: docs`"
1017
1279
  parts << ""
1018
- parts << "Task status update examples:"
1280
+ parts << "### Task Status Update Examples"
1019
1281
  parts << "- `Update task: task_123_abc status: in_progress`"
1020
1282
  parts << "- `Update task: task_456_def status: done`"
1021
1283
  parts << "- `Update task: task_789_ghi status: abandoned reason: \"Requirements changed\"`"
@@ -1454,6 +1716,29 @@ module Aidp
1454
1716
  display_message("")
1455
1717
  end
1456
1718
 
1719
+ # Display security status for Rule of Two enforcement
1720
+ def display_security_status
1721
+ status = @security_adapter.status
1722
+ return unless status[:enabled]
1723
+
1724
+ display_message("\n🔒 Security (Rule of Two):", type: :info)
1725
+ display_message(" #{status[:status_string]}", type: :info)
1726
+
1727
+ if status[:state]
1728
+ state = status[:state]
1729
+ flags = []
1730
+ flags << "untrusted_input (#{state[:untrusted_input_source]})" if state[:untrusted_input]
1731
+ flags << "private_data (#{state[:private_data_source]})" if state[:private_data]
1732
+ flags << "egress (#{state[:egress_source]})" if state[:egress]
1733
+
1734
+ if flags.any?
1735
+ display_message(" Active flags: #{flags.join(", ")}", type: :info)
1736
+ end
1737
+ end
1738
+
1739
+ display_message("")
1740
+ end
1741
+
1457
1742
  # Display pending tasks from persistent tasklist
1458
1743
  def display_pending_tasks
1459
1744
  pending_tasks = @persistent_tasklist.pending
@@ -1530,46 +1815,113 @@ module Aidp
1530
1815
  end
1531
1816
 
1532
1817
  # Check if tasks are required and all are completed or abandoned
1533
- # Returns {complete: boolean, message: string}
1818
+ # Returns {complete: boolean, message: string, reason: string}
1534
1819
  # Note: Tasks are project-scoped, not session-scoped. This allows tasks created
1535
1820
  # in planning phases to be completed in build phases.
1821
+ #
1822
+ # FIX for issue #391: Prevent premature completion when tasks haven't been created
1823
+ # The previous logic allowed completion with empty task list, which enabled
1824
+ # the work loop to complete before actually implementing anything.
1536
1825
  def check_task_completion
1537
- return {complete: true, message: nil} unless @config.task_completion_required?
1826
+ Aidp.log_debug("work_loop", "check_task_completion_start",
1827
+ task_completion_required: @config.task_completion_required?,
1828
+ iteration: @iteration_count)
1829
+
1830
+ unless @config.task_completion_required?
1831
+ Aidp.log_debug("work_loop", "check_task_completion_skipped",
1832
+ reason: "task_completion_not_required")
1833
+ return {complete: true, message: nil, reason: "task_completion_not_required"}
1834
+ end
1538
1835
 
1539
1836
  all_tasks = @persistent_tasklist.all
1540
1837
 
1541
- # If no tasks exist yet, allow completion - agent can work without tasks initially
1542
- # This supports workflows where no planning phase created tasks
1838
+ Aidp.log_debug("work_loop", "check_task_completion_task_count",
1839
+ total_tasks: all_tasks.size,
1840
+ task_ids: all_tasks.map(&:id))
1841
+
1842
+ # FIX for issue #391: Require at least one task when task_completion is enabled
1843
+ # Empty task list now blocks completion to prevent premature PR creation
1844
+ # This ensures the agent has actually created and completed work items
1543
1845
  if all_tasks.empty?
1544
- return {complete: true, message: nil}
1846
+ Aidp.log_debug("work_loop", "check_task_completion_empty_tasks",
1847
+ reason: "no_tasks_filed",
1848
+ iteration: @iteration_count)
1849
+
1850
+ # After multiple iterations, require tasks - agent should have filed some by now
1851
+ if @iteration_count >= 3
1852
+ return {
1853
+ complete: false,
1854
+ message: "No tasks have been filed yet. You must create at least one task using:\n" \
1855
+ " File task: \"description\" priority: high|medium|low tags: tag1,tag2\n\n" \
1856
+ "Tasks help track progress and ensure complete implementation.",
1857
+ reason: "no_tasks_after_iterations"
1858
+ }
1859
+ end
1860
+
1861
+ # In early iterations, allow progress but don't allow completion
1862
+ return {
1863
+ complete: false,
1864
+ message: "Please file tasks to track your implementation work.",
1865
+ reason: "no_tasks_early_iteration"
1866
+ }
1545
1867
  end
1546
1868
 
1547
1869
  # Count tasks by status
1548
1870
  pending_tasks = all_tasks.select { |t| t.status == :pending }
1549
1871
  in_progress_tasks = all_tasks.select { |t| t.status == :in_progress }
1550
1872
  abandoned_tasks = all_tasks.select { |t| t.status == :abandoned }
1551
- all_tasks.select { |t| t.status == :done }
1873
+ done_tasks = all_tasks.select { |t| t.status == :done }
1874
+
1875
+ Aidp.log_debug("work_loop", "check_task_completion_status_counts",
1876
+ pending: pending_tasks.size,
1877
+ in_progress: in_progress_tasks.size,
1878
+ abandoned: abandoned_tasks.size,
1879
+ done: done_tasks.size)
1552
1880
 
1553
1881
  # If tasks exist, all must be done or abandoned before completion
1554
1882
  incomplete_tasks = pending_tasks + in_progress_tasks
1555
1883
 
1556
1884
  if incomplete_tasks.any?
1557
1885
  task_list = incomplete_tasks.map { |t| "- #{t.description} (#{t.status}, session: #{t.session})" }.join("\n")
1886
+ Aidp.log_debug("work_loop", "check_task_completion_incomplete",
1887
+ incomplete_count: incomplete_tasks.size,
1888
+ incomplete_ids: incomplete_tasks.map(&:id))
1889
+ return {
1890
+ complete: false,
1891
+ message: "Tasks remain incomplete:\n#{task_list}\n\nComplete all tasks or abandon them with reason before marking work complete.",
1892
+ reason: "incomplete_tasks"
1893
+ }
1894
+ end
1895
+
1896
+ # FIX for issue #391: Require at least one done task, not just abandoned
1897
+ # This prevents scenarios where all tasks are abandoned without any work
1898
+ if done_tasks.empty? && abandoned_tasks.any?
1899
+ Aidp.log_debug("work_loop", "check_task_completion_all_abandoned",
1900
+ abandoned_count: abandoned_tasks.size)
1558
1901
  return {
1559
1902
  complete: false,
1560
- message: "Tasks remain incomplete:\n#{task_list}\n\nComplete all tasks or abandon them with reason before marking work complete."
1903
+ message: "All tasks have been abandoned with no completed work. " \
1904
+ "At least one task must be completed, or explain why no implementation is needed.",
1905
+ reason: "all_tasks_abandoned"
1561
1906
  }
1562
1907
  end
1563
1908
 
1564
1909
  # If there are abandoned tasks, confirm with user
1565
1910
  if abandoned_tasks.any? && !all_abandoned_tasks_confirmed?(abandoned_tasks)
1911
+ Aidp.log_debug("work_loop", "check_task_completion_unconfirmed_abandoned",
1912
+ abandoned_count: abandoned_tasks.size)
1566
1913
  return {
1567
1914
  complete: false,
1568
- message: "Abandoned tasks require user confirmation. Please confirm abandoned tasks."
1915
+ message: "Abandoned tasks require user confirmation. Please confirm abandoned tasks.",
1916
+ reason: "unconfirmed_abandoned_tasks"
1569
1917
  }
1570
1918
  end
1571
1919
 
1572
- {complete: true, message: nil}
1920
+ Aidp.log_debug("work_loop", "check_task_completion_success",
1921
+ done_count: done_tasks.size,
1922
+ abandoned_count: abandoned_tasks.size)
1923
+
1924
+ {complete: true, message: nil, reason: "all_tasks_complete"}
1573
1925
  end
1574
1926
 
1575
1927
  # Check if all abandoned tasks have been confirmed
@@ -18,7 +18,10 @@ module Aidp
18
18
  error: "ERROR"
19
19
  }.freeze
20
20
 
21
- attr_reader :current_state, :iteration, :queued_instructions, :last_error
21
+ attr_reader :iteration, :queued_instructions, :last_error
22
+
23
+ # Expose current_state for testability (use state transition methods in production)
24
+ attr_accessor :current_state
22
25
 
23
26
  def initialize
24
27
  super # Initialize MonitorMixin
@@ -10,6 +10,9 @@ module Aidp
10
10
  class WorkflowSelector
11
11
  include Aidp::MessageDisplay
12
12
 
13
+ # Expose for testability
14
+ attr_reader :user_input
15
+
13
16
  def initialize(prompt: TTY::Prompt.new, workflow_selector: nil)
14
17
  @user_input = {}
15
18
  @prompt = prompt