aidp 0.33.0 → 0.34.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +35 -0
- data/lib/aidp/analyze/tree_sitter_scan.rb +3 -0
- data/lib/aidp/cli/eval_command.rb +399 -0
- data/lib/aidp/cli/harness_command.rb +1 -1
- data/lib/aidp/cli/security_command.rb +416 -0
- data/lib/aidp/cli/tools_command.rb +6 -4
- data/lib/aidp/cli.rb +170 -3
- data/lib/aidp/concurrency/exec.rb +3 -0
- data/lib/aidp/config.rb +113 -0
- data/lib/aidp/config_paths.rb +20 -0
- data/lib/aidp/daemon/runner.rb +8 -4
- data/lib/aidp/errors.rb +134 -0
- data/lib/aidp/evaluations/context_capture.rb +205 -0
- data/lib/aidp/evaluations/evaluation_record.rb +114 -0
- data/lib/aidp/evaluations/evaluation_storage.rb +250 -0
- data/lib/aidp/evaluations.rb +23 -0
- data/lib/aidp/execute/async_work_loop_runner.rb +4 -1
- data/lib/aidp/execute/interactive_repl.rb +6 -2
- data/lib/aidp/execute/prompt_evaluator.rb +359 -0
- data/lib/aidp/execute/repl_macros.rb +100 -1
- data/lib/aidp/execute/work_loop_runner.rb +399 -47
- data/lib/aidp/execute/work_loop_state.rb +4 -1
- data/lib/aidp/execute/workflow_selector.rb +3 -0
- data/lib/aidp/harness/ai_decision_engine.rb +79 -0
- data/lib/aidp/harness/capability_registry.rb +2 -0
- data/lib/aidp/harness/condition_detector.rb +3 -0
- data/lib/aidp/harness/config_loader.rb +3 -0
- data/lib/aidp/harness/enhanced_runner.rb +14 -11
- data/lib/aidp/harness/error_handler.rb +3 -0
- data/lib/aidp/harness/provider_factory.rb +3 -0
- data/lib/aidp/harness/provider_manager.rb +6 -0
- data/lib/aidp/harness/runner.rb +5 -1
- data/lib/aidp/harness/state/persistence.rb +3 -0
- data/lib/aidp/harness/state_manager.rb +3 -0
- data/lib/aidp/harness/status_display.rb +28 -20
- data/lib/aidp/harness/thinking_depth_manager.rb +32 -32
- data/lib/aidp/harness/ui/enhanced_tui.rb +4 -0
- data/lib/aidp/harness/ui/enhanced_workflow_selector.rb +4 -0
- data/lib/aidp/harness/ui/error_handler.rb +3 -0
- data/lib/aidp/harness/ui/job_monitor.rb +4 -0
- data/lib/aidp/harness/ui/navigation/submenu.rb +2 -0
- data/lib/aidp/harness/ui/navigation/workflow_selector.rb +6 -0
- data/lib/aidp/harness/ui/spinner_helper.rb +3 -0
- data/lib/aidp/harness/ui/workflow_controller.rb +3 -0
- data/lib/aidp/harness/ui.rb +11 -0
- data/lib/aidp/harness/user_interface.rb +3 -0
- data/lib/aidp/loader.rb +2 -2
- data/lib/aidp/logger.rb +3 -0
- data/lib/aidp/message_display.rb +31 -0
- data/lib/aidp/pr_worktree_manager.rb +18 -6
- data/lib/aidp/provider_manager.rb +3 -0
- data/lib/aidp/providers/base.rb +2 -0
- data/lib/aidp/security/rule_of_two_enforcer.rb +210 -0
- data/lib/aidp/security/secrets_proxy.rb +328 -0
- data/lib/aidp/security/secrets_registry.rb +227 -0
- data/lib/aidp/security/trifecta_state.rb +220 -0
- data/lib/aidp/security/watch_mode_handler.rb +306 -0
- data/lib/aidp/security/work_loop_adapter.rb +277 -0
- data/lib/aidp/security.rb +56 -0
- data/lib/aidp/setup/wizard.rb +4 -2
- data/lib/aidp/version.rb +1 -1
- data/lib/aidp/watch/auto_merger.rb +274 -0
- data/lib/aidp/watch/auto_pr_processor.rb +125 -7
- data/lib/aidp/watch/build_processor.rb +16 -1
- data/lib/aidp/watch/change_request_processor.rb +680 -286
- data/lib/aidp/watch/ci_fix_processor.rb +262 -4
- data/lib/aidp/watch/feedback_collector.rb +191 -0
- data/lib/aidp/watch/hierarchical_pr_strategy.rb +256 -0
- data/lib/aidp/watch/implementation_verifier.rb +142 -1
- data/lib/aidp/watch/plan_generator.rb +70 -13
- data/lib/aidp/watch/plan_processor.rb +12 -5
- data/lib/aidp/watch/projects_processor.rb +286 -0
- data/lib/aidp/watch/repository_client.rb +861 -53
- data/lib/aidp/watch/review_processor.rb +33 -6
- data/lib/aidp/watch/runner.rb +51 -11
- data/lib/aidp/watch/state_store.rb +233 -0
- data/lib/aidp/watch/sub_issue_creator.rb +221 -0
- data/lib/aidp/workflows/guided_agent.rb +4 -0
- data/lib/aidp/workstream_executor.rb +3 -0
- data/lib/aidp/worktree.rb +61 -11
- data/lib/aidp/worktree_branch_manager.rb +347 -101
- data/templates/implementation/iterative_implementation.md +46 -3
- metadata +21 -1
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../config_paths"
|
|
4
|
+
|
|
5
|
+
module Aidp
|
|
6
|
+
module Evaluations
|
|
7
|
+
# Captures rich context for evaluation records
|
|
8
|
+
#
|
|
9
|
+
# Gathers information about:
|
|
10
|
+
# - Prompt metadata (template, persona, skills, provider, model, tokens, settings)
|
|
11
|
+
# - Work-loop data (unit count, checkpoints, retries, file modifications)
|
|
12
|
+
# - Environment details (devcontainer status, Ruby version, branch info)
|
|
13
|
+
#
|
|
14
|
+
# @example Capturing context
|
|
15
|
+
# context = ContextCapture.new(project_dir: Dir.pwd)
|
|
16
|
+
# data = context.capture(step_name: "01_INIT", iteration: 3)
|
|
17
|
+
class ContextCapture
|
|
18
|
+
def initialize(project_dir: Dir.pwd, config: nil)
|
|
19
|
+
@project_dir = project_dir
|
|
20
|
+
@config = config
|
|
21
|
+
|
|
22
|
+
Aidp.log_debug("context_capture", "initialize", project_dir: project_dir)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Capture full context for an evaluation
|
|
26
|
+
#
|
|
27
|
+
# @param step_name [String, nil] Current work loop step
|
|
28
|
+
# @param iteration [Integer, nil] Current iteration number
|
|
29
|
+
# @param provider [String, nil] AI provider being used
|
|
30
|
+
# @param model [String, nil] AI model being used
|
|
31
|
+
# @param additional [Hash] Additional context to include
|
|
32
|
+
# @return [Hash] Captured context
|
|
33
|
+
def capture(step_name: nil, iteration: nil, provider: nil, model: nil, additional: {})
|
|
34
|
+
Aidp.log_debug("context_capture", "capture",
|
|
35
|
+
step_name: step_name, iteration: iteration, provider: provider)
|
|
36
|
+
|
|
37
|
+
{
|
|
38
|
+
prompt: capture_prompt_context(step_name),
|
|
39
|
+
work_loop: capture_work_loop_context(step_name, iteration),
|
|
40
|
+
environment: capture_environment_context,
|
|
41
|
+
provider: {
|
|
42
|
+
name: provider,
|
|
43
|
+
model: model
|
|
44
|
+
},
|
|
45
|
+
timestamp: Time.now.iso8601
|
|
46
|
+
}.merge(additional)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Capture minimal context (for quick evaluations)
|
|
50
|
+
#
|
|
51
|
+
# @return [Hash] Minimal context with timestamp and environment basics
|
|
52
|
+
def capture_minimal
|
|
53
|
+
Aidp.log_debug("context_capture", "capture_minimal")
|
|
54
|
+
|
|
55
|
+
{
|
|
56
|
+
environment: {
|
|
57
|
+
ruby_version: RUBY_VERSION,
|
|
58
|
+
branch: current_git_branch,
|
|
59
|
+
aidp_version: aidp_version
|
|
60
|
+
},
|
|
61
|
+
timestamp: Time.now.iso8601
|
|
62
|
+
}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Capture watch mode context for evaluating watch outputs
|
|
66
|
+
#
|
|
67
|
+
# @param repo [String] Repository in owner/repo format
|
|
68
|
+
# @param number [Integer] Issue or PR number
|
|
69
|
+
# @param processor_type [String] Type of processor (plan, review, build, ci_fix, change_request)
|
|
70
|
+
# @return [Hash] Watch mode context
|
|
71
|
+
def capture_watch(repo:, number:, processor_type:)
|
|
72
|
+
Aidp.log_debug("context_capture", "capture_watch",
|
|
73
|
+
repo: repo, number: number, processor_type: processor_type)
|
|
74
|
+
|
|
75
|
+
{
|
|
76
|
+
watch: {
|
|
77
|
+
repo: repo,
|
|
78
|
+
number: number,
|
|
79
|
+
processor_type: processor_type,
|
|
80
|
+
state: load_watch_state(repo, number, processor_type)
|
|
81
|
+
},
|
|
82
|
+
environment: capture_environment_context,
|
|
83
|
+
timestamp: Time.now.iso8601
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def load_watch_state(repo, number, processor_type)
|
|
90
|
+
# Try to load state from the watch state store
|
|
91
|
+
state_file = find_watch_state_file(repo)
|
|
92
|
+
return nil unless state_file && File.exist?(state_file)
|
|
93
|
+
|
|
94
|
+
require "yaml"
|
|
95
|
+
state = YAML.safe_load_file(state_file, permitted_classes: [Time, Date, Symbol])
|
|
96
|
+
return nil unless state
|
|
97
|
+
|
|
98
|
+
# Extract relevant state based on processor type
|
|
99
|
+
case processor_type
|
|
100
|
+
when "plan"
|
|
101
|
+
state.dig("issues", number.to_s, "plan") ||
|
|
102
|
+
state.dig(:issues, number, :plan)
|
|
103
|
+
when "review"
|
|
104
|
+
state.dig("pull_requests", number.to_s, "review") ||
|
|
105
|
+
state.dig(:pull_requests, number, :review)
|
|
106
|
+
when "build"
|
|
107
|
+
state.dig("issues", number.to_s, "build") ||
|
|
108
|
+
state.dig(:issues, number, :build)
|
|
109
|
+
when "ci_fix"
|
|
110
|
+
state.dig("pull_requests", number.to_s, "ci_fix") ||
|
|
111
|
+
state.dig(:pull_requests, number, :ci_fix)
|
|
112
|
+
when "change_request"
|
|
113
|
+
state.dig("pull_requests", number.to_s, "change_request") ||
|
|
114
|
+
state.dig(:pull_requests, number, :change_request)
|
|
115
|
+
end
|
|
116
|
+
rescue => e
|
|
117
|
+
Aidp.log_error("context_capture", "load_watch_state failed", error: e.message)
|
|
118
|
+
nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def find_watch_state_file(repo)
|
|
122
|
+
watch_dir = File.join(@project_dir, ".aidp", "watch")
|
|
123
|
+
return nil unless Dir.exist?(watch_dir)
|
|
124
|
+
|
|
125
|
+
# Sanitize repo name the same way StateStore does
|
|
126
|
+
sanitized = repo.tr("/", "_").gsub(/[^a-zA-Z0-9_-]/, "")
|
|
127
|
+
state_file = File.join(watch_dir, "#{sanitized}.yml")
|
|
128
|
+
|
|
129
|
+
File.exist?(state_file) ? state_file : nil
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def capture_prompt_context(step_name)
|
|
133
|
+
prompt_file = File.join(@project_dir, ".aidp", "PROMPT.md")
|
|
134
|
+
return {} unless File.exist?(prompt_file)
|
|
135
|
+
|
|
136
|
+
content = File.read(prompt_file)
|
|
137
|
+
{
|
|
138
|
+
step_name: step_name,
|
|
139
|
+
prompt_length: content.length,
|
|
140
|
+
has_prompt: true
|
|
141
|
+
}
|
|
142
|
+
rescue => e
|
|
143
|
+
Aidp.log_error("context_capture", "capture_prompt_context failed", error: e.message)
|
|
144
|
+
{}
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def capture_work_loop_context(step_name, iteration)
|
|
148
|
+
checkpoint_file = ConfigPaths.checkpoint_file(@project_dir)
|
|
149
|
+
checkpoint_data = if File.exist?(checkpoint_file)
|
|
150
|
+
require "yaml"
|
|
151
|
+
YAML.safe_load_file(checkpoint_file, permitted_classes: [Time, Date, Symbol])
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
{
|
|
155
|
+
step_name: step_name,
|
|
156
|
+
iteration: iteration,
|
|
157
|
+
checkpoint: checkpoint_data ? {
|
|
158
|
+
status: checkpoint_data["status"] || checkpoint_data[:status],
|
|
159
|
+
metrics: checkpoint_data["metrics"] || checkpoint_data[:metrics]
|
|
160
|
+
} : nil
|
|
161
|
+
}
|
|
162
|
+
rescue => e
|
|
163
|
+
Aidp.log_error("context_capture", "capture_work_loop_context failed", error: e.message)
|
|
164
|
+
{step_name: step_name, iteration: iteration}
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def capture_environment_context
|
|
168
|
+
{
|
|
169
|
+
ruby_version: RUBY_VERSION,
|
|
170
|
+
platform: RUBY_PLATFORM,
|
|
171
|
+
branch: current_git_branch,
|
|
172
|
+
commit: current_git_commit,
|
|
173
|
+
devcontainer: in_devcontainer?,
|
|
174
|
+
aidp_version: aidp_version
|
|
175
|
+
}
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def current_git_branch
|
|
179
|
+
Dir.chdir(@project_dir) do
|
|
180
|
+
`git rev-parse --abbrev-ref HEAD 2>/dev/null`.strip
|
|
181
|
+
end
|
|
182
|
+
rescue
|
|
183
|
+
nil
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def current_git_commit
|
|
187
|
+
Dir.chdir(@project_dir) do
|
|
188
|
+
`git rev-parse --short HEAD 2>/dev/null`.strip
|
|
189
|
+
end
|
|
190
|
+
rescue
|
|
191
|
+
nil
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def in_devcontainer?
|
|
195
|
+
File.exist?("/.dockerenv") || ENV["REMOTE_CONTAINERS"] == "true"
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def aidp_version
|
|
199
|
+
Aidp::VERSION if defined?(Aidp::VERSION)
|
|
200
|
+
rescue
|
|
201
|
+
nil
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module Aidp
|
|
6
|
+
module Evaluations
|
|
7
|
+
# Represents a single evaluation record
|
|
8
|
+
#
|
|
9
|
+
# An evaluation captures user feedback (good/neutral/bad) for AIDP outputs
|
|
10
|
+
# such as prompts, work units, or full work loops, along with rich context.
|
|
11
|
+
#
|
|
12
|
+
# @example Creating an evaluation
|
|
13
|
+
# record = EvaluationRecord.new(
|
|
14
|
+
# rating: "good",
|
|
15
|
+
# comment: "Generated code was clean and well-structured",
|
|
16
|
+
# target_type: "work_unit",
|
|
17
|
+
# target_id: "01_INIT"
|
|
18
|
+
# )
|
|
19
|
+
class EvaluationRecord
|
|
20
|
+
VALID_RATINGS = %w[good neutral bad].freeze
|
|
21
|
+
VALID_TARGET_TYPES = %w[prompt work_unit work_loop step plan review build ci_fix change_request].freeze
|
|
22
|
+
|
|
23
|
+
attr_reader :id, :rating, :comment, :target_type, :target_id,
|
|
24
|
+
:context, :created_at
|
|
25
|
+
|
|
26
|
+
def initialize(rating:, comment: nil, target_type: nil, target_id: nil, context: {}, id: nil, created_at: nil)
|
|
27
|
+
@id = id || generate_id
|
|
28
|
+
@rating = validate_rating(rating)
|
|
29
|
+
@comment = comment
|
|
30
|
+
@target_type = validate_target_type(target_type)
|
|
31
|
+
@target_id = target_id
|
|
32
|
+
@context = context || {}
|
|
33
|
+
@created_at = created_at || Time.now.iso8601
|
|
34
|
+
|
|
35
|
+
Aidp.log_debug("evaluation_record", "create",
|
|
36
|
+
id: @id, rating: @rating, target_type: @target_type, target_id: @target_id)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Convert to hash for storage
|
|
40
|
+
def to_h
|
|
41
|
+
{
|
|
42
|
+
id: @id,
|
|
43
|
+
rating: @rating,
|
|
44
|
+
comment: @comment,
|
|
45
|
+
target_type: @target_type,
|
|
46
|
+
target_id: @target_id,
|
|
47
|
+
context: @context,
|
|
48
|
+
created_at: @created_at
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Create record from stored hash
|
|
53
|
+
def self.from_h(hash)
|
|
54
|
+
hash = symbolize_keys(hash)
|
|
55
|
+
new(
|
|
56
|
+
id: hash[:id],
|
|
57
|
+
rating: hash[:rating],
|
|
58
|
+
comment: hash[:comment],
|
|
59
|
+
target_type: hash[:target_type],
|
|
60
|
+
target_id: hash[:target_id],
|
|
61
|
+
context: hash[:context] || {},
|
|
62
|
+
created_at: hash[:created_at]
|
|
63
|
+
)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Check if rating is positive
|
|
67
|
+
def good?
|
|
68
|
+
@rating == "good"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Check if rating is negative
|
|
72
|
+
def bad?
|
|
73
|
+
@rating == "bad"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Check if rating is neutral
|
|
77
|
+
def neutral?
|
|
78
|
+
@rating == "neutral"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
def generate_id
|
|
84
|
+
"eval_#{Time.now.strftime("%Y%m%d_%H%M%S")}_#{SecureRandom.hex(4)}"
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def validate_rating(rating)
|
|
88
|
+
rating_str = rating.to_s.downcase
|
|
89
|
+
unless VALID_RATINGS.include?(rating_str)
|
|
90
|
+
raise ArgumentError, "Invalid rating '#{rating}'. Must be one of: #{VALID_RATINGS.join(", ")}"
|
|
91
|
+
end
|
|
92
|
+
rating_str
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def validate_target_type(target_type)
|
|
96
|
+
return nil if target_type.nil?
|
|
97
|
+
type_str = target_type.to_s.downcase
|
|
98
|
+
unless VALID_TARGET_TYPES.include?(type_str)
|
|
99
|
+
raise ArgumentError, "Invalid target_type '#{target_type}'. Must be one of: #{VALID_TARGET_TYPES.join(", ")}"
|
|
100
|
+
end
|
|
101
|
+
type_str
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
class << self
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
def symbolize_keys(hash)
|
|
108
|
+
return hash unless hash.is_a?(Hash)
|
|
109
|
+
hash.transform_keys { |k| k.is_a?(String) ? k.to_sym : k }
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require_relative "evaluation_record"
|
|
6
|
+
require_relative "../config_paths"
|
|
7
|
+
require_relative "../rescue_logging"
|
|
8
|
+
|
|
9
|
+
module Aidp
|
|
10
|
+
module Evaluations
|
|
11
|
+
# Storage manager for evaluation records
|
|
12
|
+
#
|
|
13
|
+
# Stores evaluations in `.aidp/evaluations/` with append-only semantics:
|
|
14
|
+
# - Individual evaluations stored as JSON files: `eval_YYYYMMDD_HHMMSS_xxxx.json`
|
|
15
|
+
# - Indexed summary file for efficient lookups: `index.json`
|
|
16
|
+
#
|
|
17
|
+
# @example Storing an evaluation
|
|
18
|
+
# storage = EvaluationStorage.new(project_dir: Dir.pwd)
|
|
19
|
+
# storage.store(record)
|
|
20
|
+
#
|
|
21
|
+
# @example Listing evaluations
|
|
22
|
+
# storage.list(limit: 10)
|
|
23
|
+
# storage.list(rating: "bad")
|
|
24
|
+
class EvaluationStorage
|
|
25
|
+
include Aidp::RescueLogging
|
|
26
|
+
|
|
27
|
+
def initialize(project_dir: Dir.pwd)
|
|
28
|
+
@project_dir = project_dir
|
|
29
|
+
@evaluations_dir = ConfigPaths.evaluations_dir(project_dir)
|
|
30
|
+
@index_file = ConfigPaths.evaluations_index_file(project_dir)
|
|
31
|
+
|
|
32
|
+
Aidp.log_debug("evaluation_storage", "initialize",
|
|
33
|
+
project_dir: project_dir, evaluations_dir: @evaluations_dir)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Store a new evaluation record
|
|
37
|
+
#
|
|
38
|
+
# @param record [EvaluationRecord] The evaluation to store
|
|
39
|
+
# @return [Hash] Result with :success and :id keys
|
|
40
|
+
def store(record)
|
|
41
|
+
ensure_directory
|
|
42
|
+
file_path = File.join(@evaluations_dir, "#{record.id}.json")
|
|
43
|
+
|
|
44
|
+
Aidp.log_debug("evaluation_storage", "store",
|
|
45
|
+
id: record.id, rating: record.rating, file_path: file_path)
|
|
46
|
+
|
|
47
|
+
File.write(file_path, JSON.pretty_generate(record.to_h))
|
|
48
|
+
update_index(record)
|
|
49
|
+
|
|
50
|
+
{success: true, id: record.id, file_path: file_path}
|
|
51
|
+
rescue => error
|
|
52
|
+
log_rescue(error,
|
|
53
|
+
component: "evaluation_storage",
|
|
54
|
+
action: "store",
|
|
55
|
+
fallback: {success: false},
|
|
56
|
+
id: record.id)
|
|
57
|
+
{success: false, error: error.message, id: record.id}
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Load a specific evaluation by ID
|
|
61
|
+
#
|
|
62
|
+
# @param id [String] The evaluation ID
|
|
63
|
+
# @return [EvaluationRecord, nil] The record or nil if not found
|
|
64
|
+
def load(id)
|
|
65
|
+
file_path = File.join(@evaluations_dir, "#{id}.json")
|
|
66
|
+
return nil unless File.exist?(file_path)
|
|
67
|
+
|
|
68
|
+
Aidp.log_debug("evaluation_storage", "load", id: id)
|
|
69
|
+
|
|
70
|
+
data = JSON.parse(File.read(file_path))
|
|
71
|
+
EvaluationRecord.from_h(data)
|
|
72
|
+
rescue => error
|
|
73
|
+
log_rescue(error,
|
|
74
|
+
component: "evaluation_storage",
|
|
75
|
+
action: "load",
|
|
76
|
+
fallback: nil,
|
|
77
|
+
id: id)
|
|
78
|
+
nil
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# List evaluations with optional filtering
|
|
82
|
+
#
|
|
83
|
+
# @param limit [Integer] Maximum number of records to return
|
|
84
|
+
# @param rating [String, nil] Filter by rating (good/neutral/bad)
|
|
85
|
+
# @param target_type [String, nil] Filter by target type
|
|
86
|
+
# @return [Array<EvaluationRecord>] Matching records, newest first
|
|
87
|
+
def list(limit: 50, rating: nil, target_type: nil)
|
|
88
|
+
Aidp.log_debug("evaluation_storage", "list",
|
|
89
|
+
limit: limit, rating: rating, target_type: target_type)
|
|
90
|
+
|
|
91
|
+
index = load_index
|
|
92
|
+
entries = index[:entries] || []
|
|
93
|
+
|
|
94
|
+
# Apply filters
|
|
95
|
+
entries = entries.select { |e| e[:rating] == rating } if rating
|
|
96
|
+
entries = entries.select { |e| e[:target_type] == target_type } if target_type
|
|
97
|
+
|
|
98
|
+
# Sort by created_at descending, take limit
|
|
99
|
+
entries = entries.sort_by { |e| e[:created_at] || "" }.reverse.take(limit)
|
|
100
|
+
|
|
101
|
+
# Load full records
|
|
102
|
+
entries.filter_map { |entry| load(entry[:id]) }
|
|
103
|
+
rescue => error
|
|
104
|
+
log_rescue(error,
|
|
105
|
+
component: "evaluation_storage",
|
|
106
|
+
action: "list",
|
|
107
|
+
fallback: [],
|
|
108
|
+
limit: limit)
|
|
109
|
+
[]
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Get statistics about evaluations
|
|
113
|
+
#
|
|
114
|
+
# @return [Hash] Statistics including counts by rating
|
|
115
|
+
def stats
|
|
116
|
+
Aidp.log_debug("evaluation_storage", "stats")
|
|
117
|
+
|
|
118
|
+
index = load_index
|
|
119
|
+
entries = index[:entries] || []
|
|
120
|
+
|
|
121
|
+
total = entries.size
|
|
122
|
+
by_rating = entries.group_by { |e| e[:rating] }
|
|
123
|
+
by_target = entries.group_by { |e| e[:target_type] }
|
|
124
|
+
|
|
125
|
+
{
|
|
126
|
+
total: total,
|
|
127
|
+
by_rating: {
|
|
128
|
+
good: (by_rating["good"] || []).size,
|
|
129
|
+
neutral: (by_rating["neutral"] || []).size,
|
|
130
|
+
bad: (by_rating["bad"] || []).size
|
|
131
|
+
},
|
|
132
|
+
by_target_type: by_target.transform_values(&:size),
|
|
133
|
+
first_evaluation: entries.min_by { |e| e[:created_at] || "" }&.dig(:created_at),
|
|
134
|
+
last_evaluation: entries.max_by { |e| e[:created_at] || "" }&.dig(:created_at)
|
|
135
|
+
}
|
|
136
|
+
rescue => error
|
|
137
|
+
log_rescue(error,
|
|
138
|
+
component: "evaluation_storage",
|
|
139
|
+
action: "stats",
|
|
140
|
+
fallback: {total: 0, by_rating: {good: 0, neutral: 0, bad: 0}})
|
|
141
|
+
{total: 0, by_rating: {good: 0, neutral: 0, bad: 0}, by_target_type: {}}
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Delete an evaluation by ID
|
|
145
|
+
#
|
|
146
|
+
# @param id [String] The evaluation ID
|
|
147
|
+
# @return [Hash] Result with :success key
|
|
148
|
+
def delete(id)
|
|
149
|
+
file_path = File.join(@evaluations_dir, "#{id}.json")
|
|
150
|
+
return {success: true, message: "Evaluation not found"} unless File.exist?(file_path)
|
|
151
|
+
|
|
152
|
+
Aidp.log_debug("evaluation_storage", "delete", id: id)
|
|
153
|
+
|
|
154
|
+
File.delete(file_path)
|
|
155
|
+
remove_from_index(id)
|
|
156
|
+
|
|
157
|
+
{success: true, id: id}
|
|
158
|
+
rescue => error
|
|
159
|
+
log_rescue(error,
|
|
160
|
+
component: "evaluation_storage",
|
|
161
|
+
action: "delete",
|
|
162
|
+
fallback: {success: false},
|
|
163
|
+
id: id)
|
|
164
|
+
{success: false, error: error.message}
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Clear all evaluations
|
|
168
|
+
#
|
|
169
|
+
# @return [Hash] Result with :success and :count keys
|
|
170
|
+
def clear
|
|
171
|
+
Aidp.log_debug("evaluation_storage", "clear")
|
|
172
|
+
|
|
173
|
+
return {success: true, count: 0} unless Dir.exist?(@evaluations_dir)
|
|
174
|
+
|
|
175
|
+
count = Dir.glob(File.join(@evaluations_dir, "eval_*.json")).size
|
|
176
|
+
FileUtils.rm_rf(@evaluations_dir)
|
|
177
|
+
|
|
178
|
+
{success: true, count: count}
|
|
179
|
+
rescue => error
|
|
180
|
+
log_rescue(error,
|
|
181
|
+
component: "evaluation_storage",
|
|
182
|
+
action: "clear",
|
|
183
|
+
fallback: {success: false})
|
|
184
|
+
{success: false, error: error.message}
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Check if evaluations directory exists and has evaluations
|
|
188
|
+
def any?
|
|
189
|
+
Dir.exist?(@evaluations_dir) && Dir.glob(File.join(@evaluations_dir, "eval_*.json")).any?
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
private
|
|
193
|
+
|
|
194
|
+
def ensure_directory
|
|
195
|
+
ConfigPaths.ensure_evaluations_dir(@project_dir)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def load_index
|
|
199
|
+
return {entries: []} unless File.exist?(@index_file)
|
|
200
|
+
|
|
201
|
+
data = JSON.parse(File.read(@index_file))
|
|
202
|
+
symbolize_index(data)
|
|
203
|
+
rescue
|
|
204
|
+
{entries: []}
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def update_index(record)
|
|
208
|
+
index = load_index
|
|
209
|
+
index[:entries] ||= []
|
|
210
|
+
|
|
211
|
+
# Add new entry to index (stores minimal data for quick lookups)
|
|
212
|
+
index[:entries] << {
|
|
213
|
+
id: record.id,
|
|
214
|
+
rating: record.rating,
|
|
215
|
+
target_type: record.target_type,
|
|
216
|
+
target_id: record.target_id,
|
|
217
|
+
created_at: record.created_at
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
index[:updated_at] = Time.now.iso8601
|
|
221
|
+
|
|
222
|
+
File.write(@index_file, JSON.pretty_generate(index))
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def remove_from_index(id)
|
|
226
|
+
index = load_index
|
|
227
|
+
index[:entries]&.reject! { |e| e[:id] == id }
|
|
228
|
+
index[:updated_at] = Time.now.iso8601
|
|
229
|
+
|
|
230
|
+
File.write(@index_file, JSON.pretty_generate(index))
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def symbolize_index(data)
|
|
234
|
+
return data unless data.is_a?(Hash)
|
|
235
|
+
result = {}
|
|
236
|
+
data.each do |key, value|
|
|
237
|
+
sym_key = key.is_a?(String) ? key.to_sym : key
|
|
238
|
+
result[sym_key] = if value.is_a?(Array)
|
|
239
|
+
value.map { |v| v.is_a?(Hash) ? symbolize_index(v) : v }
|
|
240
|
+
elsif value.is_a?(Hash)
|
|
241
|
+
symbolize_index(value)
|
|
242
|
+
else
|
|
243
|
+
value
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
result
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "evaluations/evaluation_record"
|
|
4
|
+
require_relative "evaluations/evaluation_storage"
|
|
5
|
+
require_relative "evaluations/context_capture"
|
|
6
|
+
|
|
7
|
+
module Aidp
|
|
8
|
+
# Evaluation and feedback system for AIDP outputs
|
|
9
|
+
#
|
|
10
|
+
# Enables users to rate generated outputs (prompts, work units, work loops)
|
|
11
|
+
# as good, neutral, or bad while capturing rich execution context.
|
|
12
|
+
#
|
|
13
|
+
# @example Creating and storing an evaluation
|
|
14
|
+
# record = Aidp::Evaluations::EvaluationRecord.new(
|
|
15
|
+
# rating: "good",
|
|
16
|
+
# comment: "Clean code generated",
|
|
17
|
+
# target_type: "work_unit"
|
|
18
|
+
# )
|
|
19
|
+
# storage = Aidp::Evaluations::EvaluationStorage.new
|
|
20
|
+
# storage.store(record)
|
|
21
|
+
module Evaluations
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -21,6 +21,9 @@ module Aidp
|
|
|
21
21
|
|
|
22
22
|
attr_reader :state, :instruction_queue, :work_thread
|
|
23
23
|
|
|
24
|
+
# Expose sync_runner for testability
|
|
25
|
+
attr_accessor :sync_runner
|
|
26
|
+
|
|
24
27
|
def initialize(project_dir, provider_manager, config, options = {})
|
|
25
28
|
@project_dir = project_dir
|
|
26
29
|
@provider_manager = provider_manager
|
|
@@ -175,7 +178,7 @@ module Aidp
|
|
|
175
178
|
def save_cancellation_checkpoint
|
|
176
179
|
return unless @sync_runner
|
|
177
180
|
|
|
178
|
-
checkpoint = @sync_runner.
|
|
181
|
+
checkpoint = @sync_runner.checkpoint
|
|
179
182
|
return unless checkpoint
|
|
180
183
|
|
|
181
184
|
checkpoint.record_checkpoint(
|
|
@@ -23,6 +23,10 @@ module Aidp
|
|
|
23
23
|
class InteractiveRepl
|
|
24
24
|
include Aidp::RescueLogging
|
|
25
25
|
|
|
26
|
+
# Expose running state and repl_macros for testability
|
|
27
|
+
attr_accessor :running
|
|
28
|
+
attr_reader :repl_macros, :async_runner, :completion_setup_needed, :output_display_thread
|
|
29
|
+
|
|
26
30
|
def initialize(project_dir, provider_manager, config, options = {})
|
|
27
31
|
@project_dir = project_dir
|
|
28
32
|
@provider_manager = provider_manager
|
|
@@ -30,8 +34,8 @@ module Aidp
|
|
|
30
34
|
@options = options
|
|
31
35
|
@prompt = options[:prompt] || TTY::Prompt.new
|
|
32
36
|
@async_runner_class = options[:async_runner_class] || AsyncWorkLoopRunner
|
|
33
|
-
@async_runner =
|
|
34
|
-
@repl_macros = ReplMacros.new
|
|
37
|
+
@async_runner = options[:async_runner]
|
|
38
|
+
@repl_macros = options[:repl_macros] || ReplMacros.new
|
|
35
39
|
@output_display_thread = nil
|
|
36
40
|
@running = false
|
|
37
41
|
@completion_setup_needed = true
|