agentic 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.agentic.yml +2 -0
- data/.architecture/decisions/ArchitecturalFeatureBuilder.md +136 -0
- data/.architecture/decisions/ArchitectureConsiderations.md +200 -0
- data/.architecture/decisions/adr_001_observer_pattern_implementation.md +196 -0
- data/.architecture/decisions/adr_002_plan_orchestrator.md +320 -0
- data/.architecture/decisions/adr_003_plan_orchestrator_interface.md +179 -0
- data/.architecture/decisions/adrs/ADR-001-dependency-management.md +147 -0
- data/.architecture/decisions/adrs/ADR-002-system-boundaries.md +162 -0
- data/.architecture/decisions/adrs/ADR-003-content-safety.md +158 -0
- data/.architecture/decisions/adrs/ADR-004-agent-permissions.md +161 -0
- data/.architecture/decisions/adrs/ADR-005-adaptation-engine.md +127 -0
- data/.architecture/decisions/adrs/ADR-006-extension-system.md +273 -0
- data/.architecture/decisions/adrs/ADR-007-learning-system.md +156 -0
- data/.architecture/decisions/adrs/ADR-008-prompt-generation.md +325 -0
- data/.architecture/decisions/adrs/ADR-009-task-failure-handling.md +353 -0
- data/.architecture/decisions/adrs/ADR-010-task-input-handling.md +251 -0
- data/.architecture/decisions/adrs/ADR-011-task-observable-pattern.md +391 -0
- data/.architecture/decisions/adrs/ADR-012-task-output-handling.md +205 -0
- data/.architecture/decisions/adrs/ADR-013-architecture-alignment.md +211 -0
- data/.architecture/decisions/adrs/ADR-014-agent-capability-registry.md +80 -0
- data/.architecture/decisions/adrs/ADR-015-persistent-agent-store.md +100 -0
- data/.architecture/decisions/adrs/ADR-016-agent-assembly-engine.md +117 -0
- data/.architecture/decisions/adrs/ADR-017-streaming-observability.md +171 -0
- data/.architecture/decisions/capability_tools_distinction.md +150 -0
- data/.architecture/decisions/cli_command_structure.md +61 -0
- data/.architecture/implementation/agent_self_assembly_implementation.md +267 -0
- data/.architecture/implementation/agent_self_assembly_summary.md +138 -0
- data/.architecture/members.yml +187 -0
- data/.architecture/planning/self_implementation_exercise.md +295 -0
- data/.architecture/planning/session_compaction_rule.md +43 -0
- data/.architecture/planning/streaming_observability_feature.md +223 -0
- data/.architecture/principles.md +151 -0
- data/.architecture/recalibration/0-2-0.md +92 -0
- data/.architecture/recalibration/agent_self_assembly.md +238 -0
- data/.architecture/recalibration/cli_command_structure.md +91 -0
- data/.architecture/recalibration/implementation_roadmap_0-2-0.md +301 -0
- data/.architecture/recalibration/progress_tracking_0-2-0.md +114 -0
- data/.architecture/recalibration_process.md +127 -0
- data/.architecture/reviews/0-2-0.md +181 -0
- data/.architecture/reviews/cli_command_duplication.md +98 -0
- data/.architecture/templates/adr.md +105 -0
- data/.architecture/templates/implementation_roadmap.md +125 -0
- data/.architecture/templates/progress_tracking.md +89 -0
- data/.architecture/templates/recalibration_plan.md +70 -0
- data/.architecture/templates/version_comparison.md +124 -0
- data/.claude/settings.local.json +13 -0
- data/.claude-sessions/001-task-class-architecture-implementation.md +129 -0
- data/.claude-sessions/002-plan-orchestrator-interface-review.md +105 -0
- data/.claude-sessions/architecture-governance-implementation.md +37 -0
- data/.claude-sessions/architecture-review-session.md +27 -0
- data/ArchitecturalFeatureBuilder.md +136 -0
- data/ArchitectureConsiderations.md +229 -0
- data/CHANGELOG.md +57 -2
- data/CLAUDE.md +111 -0
- data/CONTRIBUTING.md +286 -0
- data/MAINTAINING.md +301 -0
- data/README.md +582 -28
- data/docs/agent_capabilities_api.md +259 -0
- data/docs/artifact_extension_points.md +757 -0
- data/docs/artifact_generation_architecture.md +323 -0
- data/docs/artifact_implementation_plan.md +596 -0
- data/docs/artifact_integration_points.md +345 -0
- data/docs/artifact_verification_strategies.md +581 -0
- data/docs/streaming_observability_architecture.md +510 -0
- data/exe/agentic +6 -1
- data/lefthook.yml +5 -0
- data/lib/agentic/adaptation_engine.rb +124 -0
- data/lib/agentic/agent.rb +181 -4
- data/lib/agentic/agent_assembly_engine.rb +442 -0
- data/lib/agentic/agent_capability_registry.rb +260 -0
- data/lib/agentic/agent_config.rb +63 -0
- data/lib/agentic/agent_specification.rb +46 -0
- data/lib/agentic/capabilities/examples.rb +530 -0
- data/lib/agentic/capabilities.rb +14 -0
- data/lib/agentic/capability_provider.rb +146 -0
- data/lib/agentic/capability_specification.rb +118 -0
- data/lib/agentic/cli/agent.rb +31 -0
- data/lib/agentic/cli/capabilities.rb +191 -0
- data/lib/agentic/cli/config.rb +134 -0
- data/lib/agentic/cli/execution_observer.rb +796 -0
- data/lib/agentic/cli.rb +1068 -0
- data/lib/agentic/default_agent_provider.rb +35 -0
- data/lib/agentic/errors/llm_error.rb +184 -0
- data/lib/agentic/execution_plan.rb +53 -0
- data/lib/agentic/execution_result.rb +91 -0
- data/lib/agentic/expected_answer_format.rb +46 -0
- data/lib/agentic/extension/domain_adapter.rb +109 -0
- data/lib/agentic/extension/plugin_manager.rb +163 -0
- data/lib/agentic/extension/protocol_handler.rb +116 -0
- data/lib/agentic/extension.rb +45 -0
- data/lib/agentic/factory_methods.rb +9 -1
- data/lib/agentic/generation_stats.rb +61 -0
- data/lib/agentic/learning/README.md +84 -0
- data/lib/agentic/learning/capability_optimizer.rb +613 -0
- data/lib/agentic/learning/execution_history_store.rb +251 -0
- data/lib/agentic/learning/pattern_recognizer.rb +500 -0
- data/lib/agentic/learning/strategy_optimizer.rb +706 -0
- data/lib/agentic/learning.rb +131 -0
- data/lib/agentic/llm_assisted_composition_strategy.rb +188 -0
- data/lib/agentic/llm_client.rb +215 -15
- data/lib/agentic/llm_config.rb +65 -1
- data/lib/agentic/llm_response.rb +163 -0
- data/lib/agentic/logger.rb +1 -1
- data/lib/agentic/observable.rb +51 -0
- data/lib/agentic/persistent_agent_store.rb +385 -0
- data/lib/agentic/plan_execution_result.rb +129 -0
- data/lib/agentic/plan_orchestrator.rb +464 -0
- data/lib/agentic/plan_orchestrator_config.rb +57 -0
- data/lib/agentic/retry_config.rb +63 -0
- data/lib/agentic/retry_handler.rb +125 -0
- data/lib/agentic/structured_outputs.rb +1 -1
- data/lib/agentic/task.rb +193 -0
- data/lib/agentic/task_definition.rb +39 -0
- data/lib/agentic/task_execution_result.rb +92 -0
- data/lib/agentic/task_failure.rb +66 -0
- data/lib/agentic/task_output_schemas.rb +112 -0
- data/lib/agentic/task_planner.rb +54 -19
- data/lib/agentic/task_result.rb +48 -0
- data/lib/agentic/ui.rb +244 -0
- data/lib/agentic/verification/critic_framework.rb +116 -0
- data/lib/agentic/verification/llm_verification_strategy.rb +60 -0
- data/lib/agentic/verification/schema_verification_strategy.rb +47 -0
- data/lib/agentic/verification/verification_hub.rb +62 -0
- data/lib/agentic/verification/verification_result.rb +50 -0
- data/lib/agentic/verification/verification_strategy.rb +26 -0
- data/lib/agentic/version.rb +1 -1
- data/lib/agentic.rb +74 -2
- data/plugins/README.md +41 -0
- metadata +245 -6
data/lib/agentic/task_planner.rb
CHANGED
@@ -1,15 +1,25 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative "execution_plan"
|
4
|
+
require_relative "agent_specification"
|
5
|
+
require_relative "task_definition"
|
6
|
+
require_relative "expected_answer_format"
|
7
|
+
|
3
8
|
module Agentic
|
4
9
|
# Handles the task planning process for Agentic using LLM
|
10
|
+
#
|
11
|
+
# This class follows separation of concerns by:
|
12
|
+
# 1. Focusing on core planning logic and data generation
|
13
|
+
# 2. Returning structured data (ExecutionPlan) instead of formatted strings
|
14
|
+
# 3. Delegating presentation concerns to the ExecutionPlan class
|
5
15
|
class TaskPlanner
|
6
16
|
# @return [String] The goal to be accomplished
|
7
17
|
attr_reader :goal
|
8
18
|
|
9
|
-
# @return [Array<
|
19
|
+
# @return [Array<TaskDefinition>] The list of tasks to accomplish the goal
|
10
20
|
attr_reader :tasks
|
11
21
|
|
12
|
-
# @return [
|
22
|
+
# @return [ExpectedAnswerFormat] The expected answer format
|
13
23
|
attr_reader :expected_answer
|
14
24
|
|
15
25
|
# @return [LlmConfig] The configuration for the LLM
|
@@ -21,7 +31,11 @@ module Agentic
|
|
21
31
|
def initialize(goal, llm_config = LlmConfig.new)
|
22
32
|
@goal = goal
|
23
33
|
@tasks = []
|
24
|
-
@expected_answer =
|
34
|
+
@expected_answer = ExpectedAnswerFormat.new(
|
35
|
+
format: "Undetermined",
|
36
|
+
sections: [],
|
37
|
+
length: "Undetermined"
|
38
|
+
)
|
25
39
|
@llm_config = llm_config
|
26
40
|
end
|
27
41
|
|
@@ -51,7 +65,22 @@ module Agentic
|
|
51
65
|
end
|
52
66
|
|
53
67
|
response = llm_request(system_message, user_message, schema)
|
54
|
-
|
68
|
+
|
69
|
+
if response.successful?
|
70
|
+
@tasks = response.content["tasks"].map do |task_data|
|
71
|
+
TaskDefinition.new(
|
72
|
+
description: task_data["description"],
|
73
|
+
agent: AgentSpecification.new(
|
74
|
+
name: task_data["agent"]["name"],
|
75
|
+
description: task_data["agent"]["description"],
|
76
|
+
instructions: task_data["agent"]["instructions"]
|
77
|
+
)
|
78
|
+
)
|
79
|
+
end
|
80
|
+
else
|
81
|
+
Agentic.logger.error("Failed to analyze goal: #{response.error&.message || response.refusal}")
|
82
|
+
@tasks = []
|
83
|
+
end
|
55
84
|
end
|
56
85
|
|
57
86
|
# Determines the expected answer format using LLM
|
@@ -67,29 +96,35 @@ module Agentic
|
|
67
96
|
end
|
68
97
|
|
69
98
|
response = llm_request(system_message, user_message, schema)
|
70
|
-
@expected_answer = response[:content]
|
71
|
-
end
|
72
99
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
100
|
+
if response.successful?
|
101
|
+
@expected_answer = ExpectedAnswerFormat.new(
|
102
|
+
format: response.content["format"],
|
103
|
+
sections: response.content["sections"],
|
104
|
+
length: response.content["length"]
|
105
|
+
)
|
106
|
+
else
|
107
|
+
Agentic.logger.error("Failed to determine expected answer format: #{response.error&.message || response.refusal}")
|
108
|
+
@expected_answer = ExpectedAnswerFormat.new(
|
109
|
+
format: "Undetermined",
|
110
|
+
sections: [],
|
111
|
+
length: "Undetermined"
|
112
|
+
)
|
79
113
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns an ExecutionPlan object representing the execution plan
|
117
|
+
# @return [ExecutionPlan] The structured execution plan
|
118
|
+
def execution_plan
|
119
|
+
ExecutionPlan.new(@tasks, @expected_answer)
|
85
120
|
end
|
86
121
|
|
87
122
|
# Executes the entire planning process
|
88
|
-
# @return [
|
123
|
+
# @return [ExecutionPlan] The structured execution plan
|
89
124
|
def plan
|
90
125
|
analyze_goal
|
91
126
|
determine_expected_answer
|
92
|
-
|
127
|
+
execution_plan
|
93
128
|
end
|
94
129
|
|
95
130
|
private
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Agentic
|
4
|
+
# Represents the result of a task execution
|
5
|
+
# @attr_reader [String] task_id The ID of the task that produced this result
|
6
|
+
# @attr_reader [Boolean] success Whether the task execution was successful
|
7
|
+
# @attr_reader [Hash, nil] output The output produced by the task, nil if unsuccessful
|
8
|
+
# @attr_reader [TaskFailure, nil] failure The failure information, nil if successful
|
9
|
+
class TaskResult
|
10
|
+
attr_reader :task_id, :success, :output, :failure
|
11
|
+
|
12
|
+
# Initializes a new task result
|
13
|
+
# @param task_id [String] The ID of the task that produced this result
|
14
|
+
# @param success [Boolean] Whether the task execution was successful
|
15
|
+
# @param output [Hash, nil] The output produced by the task
|
16
|
+
# @param failure [TaskFailure, nil] The failure information
|
17
|
+
# @return [TaskResult] A new task result instance
|
18
|
+
def initialize(task_id:, success:, output: nil, failure: nil)
|
19
|
+
@task_id = task_id
|
20
|
+
@success = success
|
21
|
+
@output = output
|
22
|
+
@failure = failure
|
23
|
+
end
|
24
|
+
|
25
|
+
# Checks if the task execution was successful
|
26
|
+
# @return [Boolean] True if successful, false otherwise
|
27
|
+
def successful?
|
28
|
+
@success
|
29
|
+
end
|
30
|
+
|
31
|
+
# Checks if the task execution failed
|
32
|
+
# @return [Boolean] True if failed, false otherwise
|
33
|
+
def failed?
|
34
|
+
!@success
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns a serializable representation of the result
|
38
|
+
# @return [Hash] The result as a hash
|
39
|
+
def to_h
|
40
|
+
{
|
41
|
+
task_id: @task_id,
|
42
|
+
success: @success,
|
43
|
+
output: @output,
|
44
|
+
failure: @failure&.to_h
|
45
|
+
}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/lib/agentic/ui.rb
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "tty-spinner"
|
4
|
+
require "tty-progressbar"
|
5
|
+
require "tty-box"
|
6
|
+
require "tty-table"
|
7
|
+
require "tty-cursor"
|
8
|
+
require "pastel"
|
9
|
+
|
10
|
+
module Agentic
|
11
|
+
# UI helpers for the CLI
|
12
|
+
module UI
|
13
|
+
# Creates and returns a new spinner
|
14
|
+
# @param message [String] The message to display with the spinner
|
15
|
+
# @param format [Symbol] The spinner format
|
16
|
+
# @return [TTY::Spinner] The spinner object
|
17
|
+
def self.spinner(message, format: :dots)
|
18
|
+
TTY::Spinner.new("[:spinner] #{message}", format: format)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Creates and returns a new progress bar
|
22
|
+
# @param title [String] The progress bar title
|
23
|
+
# @param total [Integer] The total number of steps
|
24
|
+
# @param options [Hash] Additional options for the progress bar
|
25
|
+
# @return [TTY::ProgressBar] The progress bar object
|
26
|
+
def self.progress_bar(title, total, options = {})
|
27
|
+
TTY::ProgressBar.new("[:bar] #{title} :percent",
|
28
|
+
total: total,
|
29
|
+
width: 40,
|
30
|
+
**options)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Creates a colored text box
|
34
|
+
# @param title [String] The box title
|
35
|
+
# @param content [String] The box content
|
36
|
+
# @param options [Hash] Additional options for the box
|
37
|
+
# @return [String] The formatted box
|
38
|
+
def self.box(title, content, options = {})
|
39
|
+
# Calculate width based on visible characters (strip ANSI codes)
|
40
|
+
visible_lines = content.lines.map { |line| line.gsub(/\e\[[0-9;]*m/, "") }
|
41
|
+
max_line_length = visible_lines.map(&:length).max || 0
|
42
|
+
|
43
|
+
TTY::Box.frame(
|
44
|
+
title: {top_left: title},
|
45
|
+
width: [100, max_line_length + 4].min,
|
46
|
+
padding: 1,
|
47
|
+
**options
|
48
|
+
) { content }
|
49
|
+
end
|
50
|
+
|
51
|
+
# Returns a pastel instance for colorizing text
|
52
|
+
# @return [Pastel] The pastel instance
|
53
|
+
def self.pastel
|
54
|
+
@pastel ||= Pastel.new
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns colored text
|
58
|
+
# @param text [String] The text to colorize
|
59
|
+
# @param color [Symbol] The color to use
|
60
|
+
# @return [String] The colorized text
|
61
|
+
def self.colorize(text, color)
|
62
|
+
pastel.send(color, text)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Returns a text with a colored status indicator
|
66
|
+
# @param text [String] The text to display
|
67
|
+
# @param status [Symbol] The status
|
68
|
+
# @return [String] The text with colored status
|
69
|
+
def self.status_text(text, status)
|
70
|
+
status_color = case status
|
71
|
+
when :success, :completed
|
72
|
+
:green
|
73
|
+
when :failure, :failed, :error
|
74
|
+
:red
|
75
|
+
when :warning, :pending
|
76
|
+
:yellow
|
77
|
+
when :info, :in_progress
|
78
|
+
:blue
|
79
|
+
else
|
80
|
+
:white
|
81
|
+
end
|
82
|
+
|
83
|
+
pastel.send(status_color, text)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Formats a duration in seconds to a human-readable string
|
87
|
+
# @param seconds [Float] The duration in seconds
|
88
|
+
# @return [String] The formatted duration
|
89
|
+
def self.format_duration(seconds)
|
90
|
+
if seconds < 1
|
91
|
+
"#{(seconds * 1000).round}ms"
|
92
|
+
elsif seconds < 60
|
93
|
+
"#{seconds.round(2)}s"
|
94
|
+
elsif seconds < 3600
|
95
|
+
minutes = (seconds / 60).floor
|
96
|
+
remaining_seconds = (seconds % 60).round
|
97
|
+
"#{minutes}m #{remaining_seconds}s"
|
98
|
+
else
|
99
|
+
hours = (seconds / 3600).floor
|
100
|
+
minutes = ((seconds % 3600) / 60).floor
|
101
|
+
"#{hours}h #{minutes}m"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Handles a long-running operation with a spinner
|
106
|
+
# @param message [String] The message to display
|
107
|
+
# @param quiet [Boolean] Whether to suppress output
|
108
|
+
# @yield The block to execute
|
109
|
+
# @return [Object] The return value of the block
|
110
|
+
def self.with_spinner(message, quiet: false)
|
111
|
+
return yield if quiet
|
112
|
+
|
113
|
+
spinner = self.spinner(message)
|
114
|
+
spinner.auto_spin
|
115
|
+
|
116
|
+
begin
|
117
|
+
result = yield
|
118
|
+
spinner.success("(#{colorize("✓", :green)}) #{message}")
|
119
|
+
result
|
120
|
+
rescue => e
|
121
|
+
spinner.error("(#{colorize("✗", :red)}) #{message}: #{e.message}")
|
122
|
+
raise
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns a task status indicator
|
127
|
+
# @param status [Symbol] The status
|
128
|
+
# @return [String] The status indicator
|
129
|
+
def self.task_status_indicator(status)
|
130
|
+
case status
|
131
|
+
when :completed
|
132
|
+
colorize("✓", :green)
|
133
|
+
when :failed
|
134
|
+
colorize("✗", :red)
|
135
|
+
when :in_progress
|
136
|
+
colorize("↻", :blue)
|
137
|
+
when :building_agent, :agent_ready
|
138
|
+
colorize("○", :yellow) # Pending task execution
|
139
|
+
when :pending
|
140
|
+
colorize("○", :yellow)
|
141
|
+
when :canceled
|
142
|
+
colorize("⨯", :yellow)
|
143
|
+
else
|
144
|
+
colorize("?", :white)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Creates a holistic task display table
|
149
|
+
# @param tasks [Array<Hash>] Array of task data with status, description, etc.
|
150
|
+
# @param options [Hash] Display options
|
151
|
+
# @return [String] The formatted table
|
152
|
+
def self.task_display_table(tasks, options = {})
|
153
|
+
return "" if tasks.empty?
|
154
|
+
|
155
|
+
# Use simplified headers when show_agent_column is false
|
156
|
+
show_agent_column = options.fetch(:show_agent_column, true)
|
157
|
+
headers = if show_agent_column
|
158
|
+
["Status", "Task", "Agent", "Duration", "Output"]
|
159
|
+
else
|
160
|
+
["Status", "Task", "Duration", "Output"]
|
161
|
+
end
|
162
|
+
|
163
|
+
table = TTY::Table.new(
|
164
|
+
header: headers,
|
165
|
+
rows: tasks.map { |task| format_task_row(task, show_agent_column: show_agent_column) }
|
166
|
+
)
|
167
|
+
|
168
|
+
table.render(:unicode,
|
169
|
+
padding: [0, 1],
|
170
|
+
**options)
|
171
|
+
end
|
172
|
+
|
173
|
+
# Returns a cursor instance for terminal positioning
|
174
|
+
# @return [TTY::Cursor] The cursor instance
|
175
|
+
def self.cursor
|
176
|
+
@cursor ||= TTY::Cursor
|
177
|
+
end
|
178
|
+
|
179
|
+
# Clears lines and repositions cursor for table updates
|
180
|
+
# @param line_count [Integer] Number of lines to clear
|
181
|
+
def self.clear_and_reposition(line_count)
|
182
|
+
print cursor.up(line_count) + cursor.clear_lines(line_count, :down)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Formats a single task row for the display table
|
186
|
+
# @param task [Hash] Task data including status, description, duration, output
|
187
|
+
# @param show_agent_column [Boolean] Whether to include agent information in the row
|
188
|
+
# @return [Array] Formatted row data
|
189
|
+
def self.format_task_row(task, show_agent_column: true)
|
190
|
+
status_indicator = task_status_indicator(task[:status])
|
191
|
+
|
192
|
+
# Truncate description for display
|
193
|
+
description = truncate_text(task[:description] || "Unknown task", 35)
|
194
|
+
|
195
|
+
# Format agent information
|
196
|
+
agent_info = if task[:status] == :building_agent
|
197
|
+
colorize("🤖 Building...", :blue)
|
198
|
+
elsif task[:agent_role]
|
199
|
+
if task[:agent_duration]
|
200
|
+
colorize("✓ #{task[:agent_role]} (#{format_duration(task[:agent_duration])})", :green)
|
201
|
+
else
|
202
|
+
colorize("✓ #{task[:agent_role]}", :green)
|
203
|
+
end
|
204
|
+
else
|
205
|
+
"-"
|
206
|
+
end
|
207
|
+
|
208
|
+
# Format duration
|
209
|
+
duration = if task[:duration]
|
210
|
+
format_duration(task[:duration])
|
211
|
+
elsif [:in_progress, :building_agent, :agent_ready].include?(task[:status]) && task[:start_time]
|
212
|
+
format_duration(Time.now - task[:start_time])
|
213
|
+
else
|
214
|
+
"-"
|
215
|
+
end
|
216
|
+
|
217
|
+
# Format output preview
|
218
|
+
output = if task[:output] && !task[:output].to_s.empty?
|
219
|
+
truncate_text(task[:output].to_s.strip, 25)
|
220
|
+
elsif task[:status] == :failed && task[:error]
|
221
|
+
colorize(truncate_text(task[:error], 25), :red)
|
222
|
+
else
|
223
|
+
"-"
|
224
|
+
end
|
225
|
+
|
226
|
+
if show_agent_column
|
227
|
+
[status_indicator, description, agent_info, duration, output]
|
228
|
+
else
|
229
|
+
[status_indicator, description, duration, output]
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# Truncates text to specified length with ellipsis
|
234
|
+
# @param text [String] Text to truncate
|
235
|
+
# @param max_length [Integer] Maximum length
|
236
|
+
# @return [String] Truncated text
|
237
|
+
def self.truncate_text(text, max_length)
|
238
|
+
return text if text.length <= max_length
|
239
|
+
"#{text[0..max_length - 4]}..."
|
240
|
+
end
|
241
|
+
|
242
|
+
private_class_method :format_task_row, :truncate_text
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Agentic
|
4
|
+
module Verification
|
5
|
+
# Framework for multi-perspective evaluation of task results
|
6
|
+
class CriticFramework
|
7
|
+
# @return [Array<Critic>] The critics registered with this framework
|
8
|
+
attr_reader :critics
|
9
|
+
|
10
|
+
# @return [Hash] Configuration options for the framework
|
11
|
+
attr_reader :config
|
12
|
+
|
13
|
+
# Initializes a new CriticFramework
|
14
|
+
# @param critics [Array<Critic>] The critics to register
|
15
|
+
# @param config [Hash] Configuration options for the framework
|
16
|
+
def initialize(critics: [], config: {})
|
17
|
+
@critics = critics
|
18
|
+
@config = config
|
19
|
+
end
|
20
|
+
|
21
|
+
# Adds a critic to the framework
|
22
|
+
# @param critic [Critic] The critic to add
|
23
|
+
# @return [void]
|
24
|
+
def add_critic(critic)
|
25
|
+
@critics << critic
|
26
|
+
end
|
27
|
+
|
28
|
+
# Evaluates a task result using all registered critics
|
29
|
+
# @param task [Task] The task to evaluate
|
30
|
+
# @param result [TaskResult] The result to evaluate
|
31
|
+
# @return [CriticResult] The combined evaluation result
|
32
|
+
def evaluate(task, result)
|
33
|
+
evaluations = @critics.map { |critic| critic.critique(task, result) }
|
34
|
+
|
35
|
+
# Aggregate critic evaluations
|
36
|
+
positive_critiques = evaluations.count(&:positive?)
|
37
|
+
total_critiques = evaluations.size
|
38
|
+
confidence = (total_critiques > 0) ? positive_critiques.to_f / total_critiques : 0.5
|
39
|
+
|
40
|
+
comments = evaluations.flat_map(&:comments)
|
41
|
+
|
42
|
+
CriticResult.new(
|
43
|
+
task_id: task.id,
|
44
|
+
confidence: confidence,
|
45
|
+
verdict: confidence >= 0.7, # Pass if 70% or more critics give positive evaluation
|
46
|
+
comments: comments
|
47
|
+
)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Represents the result of a critic's evaluation
|
52
|
+
class CriticResult
|
53
|
+
# @return [String] The ID of the task that was evaluated
|
54
|
+
attr_reader :task_id
|
55
|
+
|
56
|
+
# @return [Float] The confidence of the evaluation (0.0-1.0)
|
57
|
+
attr_reader :confidence
|
58
|
+
|
59
|
+
# @return [Boolean] The verdict of the evaluation (true = pass, false = fail)
|
60
|
+
attr_reader :verdict
|
61
|
+
|
62
|
+
# @return [Array<String>] Comments from the critic
|
63
|
+
attr_reader :comments
|
64
|
+
|
65
|
+
# Initializes a new CriticResult
|
66
|
+
# @param task_id [String] The ID of the task that was evaluated
|
67
|
+
# @param confidence [Float] The confidence of the evaluation (0.0-1.0)
|
68
|
+
# @param verdict [Boolean] The verdict of the evaluation (true = pass, false = fail)
|
69
|
+
# @param comments [Array<String>] Comments from the critic
|
70
|
+
def initialize(task_id:, confidence:, verdict:, comments: [])
|
71
|
+
@task_id = task_id
|
72
|
+
@confidence = confidence
|
73
|
+
@verdict = verdict
|
74
|
+
@comments = comments
|
75
|
+
end
|
76
|
+
|
77
|
+
# Checks if the evaluation is positive
|
78
|
+
# @return [Boolean] Whether the evaluation is positive
|
79
|
+
def positive?
|
80
|
+
@verdict
|
81
|
+
end
|
82
|
+
|
83
|
+
# Converts the critic result to a hash
|
84
|
+
# @return [Hash] The critic result as a hash
|
85
|
+
def to_h
|
86
|
+
{
|
87
|
+
task_id: @task_id,
|
88
|
+
confidence: @confidence,
|
89
|
+
verdict: @verdict,
|
90
|
+
comments: @comments
|
91
|
+
}
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Base class for critics
|
96
|
+
class Critic
|
97
|
+
# @return [Hash] Configuration options for the critic
|
98
|
+
attr_reader :config
|
99
|
+
|
100
|
+
# Initializes a new Critic
|
101
|
+
# @param config [Hash] Configuration options for the critic
|
102
|
+
def initialize(config = {})
|
103
|
+
@config = config
|
104
|
+
end
|
105
|
+
|
106
|
+
# Critiques a task result
|
107
|
+
# @param task [Task] The task to critique
|
108
|
+
# @param result [TaskResult] The result to critique
|
109
|
+
# @return [CriticResult] The critique result
|
110
|
+
# @raise [NotImplementedError] This method must be implemented by subclasses
|
111
|
+
def critique(task, result)
|
112
|
+
raise NotImplementedError, "Subclasses must implement critique"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "verification_strategy"
|
4
|
+
require_relative "verification_result"
|
5
|
+
|
6
|
+
module Agentic
|
7
|
+
module Verification
|
8
|
+
# Verifies task results using an LLM
|
9
|
+
class LlmVerificationStrategy < VerificationStrategy
|
10
|
+
# Initializes a new LlmVerificationStrategy
|
11
|
+
# @param llm_client [LlmClient] The LLM client to use for verification
|
12
|
+
# @param config [Hash] Configuration options for the strategy
|
13
|
+
def initialize(llm_client, config = {})
|
14
|
+
super(config)
|
15
|
+
@llm_client = llm_client
|
16
|
+
end
|
17
|
+
|
18
|
+
# Verifies a task result using an LLM
|
19
|
+
# @param task [Task] The task to verify
|
20
|
+
# @param result [TaskResult] The result to verify
|
21
|
+
# @return [VerificationResult] The verification result
|
22
|
+
def verify(task, result)
|
23
|
+
unless result.successful?
|
24
|
+
return VerificationResult.new(
|
25
|
+
task_id: task.id,
|
26
|
+
verified: false,
|
27
|
+
confidence: 0.0,
|
28
|
+
messages: ["Task failed, skipping LLM verification"]
|
29
|
+
)
|
30
|
+
end
|
31
|
+
|
32
|
+
# In a real implementation, we would send the task and result to the LLM
|
33
|
+
# and analyze the LLM's assessment
|
34
|
+
# For this stub, we'll simulate a response
|
35
|
+
|
36
|
+
# Example verification prompt
|
37
|
+
# Task Description: #{task.description}
|
38
|
+
# Task Input: #{task.input.inspect}
|
39
|
+
# Task Result: #{result.output.inspect}
|
40
|
+
#
|
41
|
+
# Verify if the result satisfies the task requirements.
|
42
|
+
# Consider correctness, completeness, and alignment with the task description.
|
43
|
+
# Provide your assessment with a boolean verdict (verified: true/false) and a confidence score (0.0-1.0).
|
44
|
+
|
45
|
+
# In a real implementation, we would use the LLM client here
|
46
|
+
# For this stub, we'll return a simulated verification result
|
47
|
+
verified = rand > 0.1 # 90% chance of success for simulation purposes
|
48
|
+
confidence = verified ? (0.8 + rand * 0.2) : (0.3 + rand * 0.3)
|
49
|
+
message = verified ? "Result meets task requirements" : "Result does not fully satisfy task requirements"
|
50
|
+
|
51
|
+
VerificationResult.new(
|
52
|
+
task_id: task.id,
|
53
|
+
verified: verified,
|
54
|
+
confidence: confidence,
|
55
|
+
messages: [message]
|
56
|
+
)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "verification_strategy"
|
4
|
+
require_relative "verification_result"
|
5
|
+
|
6
|
+
module Agentic
|
7
|
+
module Verification
|
8
|
+
# Verifies task results against a schema
|
9
|
+
class SchemaVerificationStrategy < VerificationStrategy
|
10
|
+
# Verifies a task result against a schema
|
11
|
+
# @param task [Task] The task to verify
|
12
|
+
# @param result [TaskResult] The result to verify
|
13
|
+
# @return [VerificationResult] The verification result
|
14
|
+
def verify(task, result)
|
15
|
+
unless result.successful?
|
16
|
+
return VerificationResult.new(
|
17
|
+
task_id: task.id,
|
18
|
+
verified: false,
|
19
|
+
confidence: 0.0,
|
20
|
+
messages: ["Task failed, skipping schema verification"]
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Extracting schema from task if available
|
25
|
+
schema = task.input["output_schema"] if task.input.is_a?(Hash)
|
26
|
+
|
27
|
+
unless schema
|
28
|
+
return VerificationResult.new(
|
29
|
+
task_id: task.id,
|
30
|
+
verified: true,
|
31
|
+
confidence: 0.5,
|
32
|
+
messages: ["No schema specified for verification, passing by default"]
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
# In a real implementation, we would validate the output against the schema
|
37
|
+
# For this stub, we'll assume validation passes
|
38
|
+
VerificationResult.new(
|
39
|
+
task_id: task.id,
|
40
|
+
verified: true,
|
41
|
+
confidence: 0.9,
|
42
|
+
messages: ["Output matches expected schema"]
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Agentic
|
4
|
+
module Verification
|
5
|
+
# Coordinates verification strategies and manages the verification process
|
6
|
+
class VerificationHub
|
7
|
+
# @return [Array<VerificationStrategy>] The registered verification strategies
|
8
|
+
attr_reader :strategies
|
9
|
+
|
10
|
+
# @return [Hash] Configuration options for the verification hub
|
11
|
+
attr_reader :config
|
12
|
+
|
13
|
+
# Initializes a new VerificationHub
|
14
|
+
# @param strategies [Array<VerificationStrategy>] The verification strategies to use
|
15
|
+
# @param config [Hash] Configuration options for the verification hub
|
16
|
+
def initialize(strategies: [], config: {})
|
17
|
+
@strategies = strategies
|
18
|
+
@config = config
|
19
|
+
end
|
20
|
+
|
21
|
+
# Adds a verification strategy
|
22
|
+
# @param strategy [VerificationStrategy] The strategy to add
|
23
|
+
# @return [void]
|
24
|
+
def add_strategy(strategy)
|
25
|
+
@strategies << strategy
|
26
|
+
end
|
27
|
+
|
28
|
+
# Verifies a task result using the registered strategies
|
29
|
+
# @param task [Task] The task to verify
|
30
|
+
# @param result [TaskResult] The result to verify
|
31
|
+
# @return [VerificationResult] The verification result
|
32
|
+
def verify(task, result)
|
33
|
+
# Skip verification for failed tasks
|
34
|
+
if result.failed?
|
35
|
+
return VerificationResult.new(
|
36
|
+
task_id: task.id,
|
37
|
+
verified: false,
|
38
|
+
confidence: 0.0,
|
39
|
+
messages: ["Task failed, skipping verification"]
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Apply all strategies
|
44
|
+
strategy_results = @strategies.map do |strategy|
|
45
|
+
strategy.verify(task, result)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Combine results
|
49
|
+
verified = strategy_results.all?(&:verified)
|
50
|
+
confidence = strategy_results.map(&:confidence).sum / strategy_results.size.to_f
|
51
|
+
messages = strategy_results.flat_map(&:messages)
|
52
|
+
|
53
|
+
VerificationResult.new(
|
54
|
+
task_id: task.id,
|
55
|
+
verified: verified,
|
56
|
+
confidence: confidence,
|
57
|
+
messages: messages
|
58
|
+
)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|