agentic 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/.agentic.yml +2 -0
  3. data/.architecture/decisions/ArchitecturalFeatureBuilder.md +136 -0
  4. data/.architecture/decisions/ArchitectureConsiderations.md +200 -0
  5. data/.architecture/decisions/adr_001_observer_pattern_implementation.md +196 -0
  6. data/.architecture/decisions/adr_002_plan_orchestrator.md +320 -0
  7. data/.architecture/decisions/adr_003_plan_orchestrator_interface.md +179 -0
  8. data/.architecture/decisions/adrs/ADR-001-dependency-management.md +147 -0
  9. data/.architecture/decisions/adrs/ADR-002-system-boundaries.md +162 -0
  10. data/.architecture/decisions/adrs/ADR-003-content-safety.md +158 -0
  11. data/.architecture/decisions/adrs/ADR-004-agent-permissions.md +161 -0
  12. data/.architecture/decisions/adrs/ADR-005-adaptation-engine.md +127 -0
  13. data/.architecture/decisions/adrs/ADR-006-extension-system.md +273 -0
  14. data/.architecture/decisions/adrs/ADR-007-learning-system.md +156 -0
  15. data/.architecture/decisions/adrs/ADR-008-prompt-generation.md +325 -0
  16. data/.architecture/decisions/adrs/ADR-009-task-failure-handling.md +353 -0
  17. data/.architecture/decisions/adrs/ADR-010-task-input-handling.md +251 -0
  18. data/.architecture/decisions/adrs/ADR-011-task-observable-pattern.md +391 -0
  19. data/.architecture/decisions/adrs/ADR-012-task-output-handling.md +205 -0
  20. data/.architecture/decisions/adrs/ADR-013-architecture-alignment.md +211 -0
  21. data/.architecture/decisions/adrs/ADR-014-agent-capability-registry.md +80 -0
  22. data/.architecture/decisions/adrs/ADR-015-persistent-agent-store.md +100 -0
  23. data/.architecture/decisions/adrs/ADR-016-agent-assembly-engine.md +117 -0
  24. data/.architecture/decisions/adrs/ADR-017-streaming-observability.md +171 -0
  25. data/.architecture/decisions/capability_tools_distinction.md +150 -0
  26. data/.architecture/decisions/cli_command_structure.md +61 -0
  27. data/.architecture/implementation/agent_self_assembly_implementation.md +267 -0
  28. data/.architecture/implementation/agent_self_assembly_summary.md +138 -0
  29. data/.architecture/members.yml +187 -0
  30. data/.architecture/planning/self_implementation_exercise.md +295 -0
  31. data/.architecture/planning/session_compaction_rule.md +43 -0
  32. data/.architecture/planning/streaming_observability_feature.md +223 -0
  33. data/.architecture/principles.md +151 -0
  34. data/.architecture/recalibration/0-2-0.md +92 -0
  35. data/.architecture/recalibration/agent_self_assembly.md +238 -0
  36. data/.architecture/recalibration/cli_command_structure.md +91 -0
  37. data/.architecture/recalibration/implementation_roadmap_0-2-0.md +301 -0
  38. data/.architecture/recalibration/progress_tracking_0-2-0.md +114 -0
  39. data/.architecture/recalibration_process.md +127 -0
  40. data/.architecture/reviews/0-2-0.md +181 -0
  41. data/.architecture/reviews/cli_command_duplication.md +98 -0
  42. data/.architecture/templates/adr.md +105 -0
  43. data/.architecture/templates/implementation_roadmap.md +125 -0
  44. data/.architecture/templates/progress_tracking.md +89 -0
  45. data/.architecture/templates/recalibration_plan.md +70 -0
  46. data/.architecture/templates/version_comparison.md +124 -0
  47. data/.claude/settings.local.json +13 -0
  48. data/.claude-sessions/001-task-class-architecture-implementation.md +129 -0
  49. data/.claude-sessions/002-plan-orchestrator-interface-review.md +105 -0
  50. data/.claude-sessions/architecture-governance-implementation.md +37 -0
  51. data/.claude-sessions/architecture-review-session.md +27 -0
  52. data/ArchitecturalFeatureBuilder.md +136 -0
  53. data/ArchitectureConsiderations.md +229 -0
  54. data/CHANGELOG.md +57 -2
  55. data/CLAUDE.md +111 -0
  56. data/CONTRIBUTING.md +286 -0
  57. data/MAINTAINING.md +301 -0
  58. data/README.md +582 -28
  59. data/docs/agent_capabilities_api.md +259 -0
  60. data/docs/artifact_extension_points.md +757 -0
  61. data/docs/artifact_generation_architecture.md +323 -0
  62. data/docs/artifact_implementation_plan.md +596 -0
  63. data/docs/artifact_integration_points.md +345 -0
  64. data/docs/artifact_verification_strategies.md +581 -0
  65. data/docs/streaming_observability_architecture.md +510 -0
  66. data/exe/agentic +6 -1
  67. data/lefthook.yml +5 -0
  68. data/lib/agentic/adaptation_engine.rb +124 -0
  69. data/lib/agentic/agent.rb +181 -4
  70. data/lib/agentic/agent_assembly_engine.rb +442 -0
  71. data/lib/agentic/agent_capability_registry.rb +260 -0
  72. data/lib/agentic/agent_config.rb +63 -0
  73. data/lib/agentic/agent_specification.rb +46 -0
  74. data/lib/agentic/capabilities/examples.rb +530 -0
  75. data/lib/agentic/capabilities.rb +14 -0
  76. data/lib/agentic/capability_provider.rb +146 -0
  77. data/lib/agentic/capability_specification.rb +118 -0
  78. data/lib/agentic/cli/agent.rb +31 -0
  79. data/lib/agentic/cli/capabilities.rb +191 -0
  80. data/lib/agentic/cli/config.rb +134 -0
  81. data/lib/agentic/cli/execution_observer.rb +796 -0
  82. data/lib/agentic/cli.rb +1068 -0
  83. data/lib/agentic/default_agent_provider.rb +35 -0
  84. data/lib/agentic/errors/llm_error.rb +184 -0
  85. data/lib/agentic/execution_plan.rb +53 -0
  86. data/lib/agentic/execution_result.rb +91 -0
  87. data/lib/agentic/expected_answer_format.rb +46 -0
  88. data/lib/agentic/extension/domain_adapter.rb +109 -0
  89. data/lib/agentic/extension/plugin_manager.rb +163 -0
  90. data/lib/agentic/extension/protocol_handler.rb +116 -0
  91. data/lib/agentic/extension.rb +45 -0
  92. data/lib/agentic/factory_methods.rb +9 -1
  93. data/lib/agentic/generation_stats.rb +61 -0
  94. data/lib/agentic/learning/README.md +84 -0
  95. data/lib/agentic/learning/capability_optimizer.rb +613 -0
  96. data/lib/agentic/learning/execution_history_store.rb +251 -0
  97. data/lib/agentic/learning/pattern_recognizer.rb +500 -0
  98. data/lib/agentic/learning/strategy_optimizer.rb +706 -0
  99. data/lib/agentic/learning.rb +131 -0
  100. data/lib/agentic/llm_assisted_composition_strategy.rb +188 -0
  101. data/lib/agentic/llm_client.rb +215 -15
  102. data/lib/agentic/llm_config.rb +65 -1
  103. data/lib/agentic/llm_response.rb +163 -0
  104. data/lib/agentic/logger.rb +1 -1
  105. data/lib/agentic/observable.rb +51 -0
  106. data/lib/agentic/persistent_agent_store.rb +385 -0
  107. data/lib/agentic/plan_execution_result.rb +129 -0
  108. data/lib/agentic/plan_orchestrator.rb +464 -0
  109. data/lib/agentic/plan_orchestrator_config.rb +57 -0
  110. data/lib/agentic/retry_config.rb +63 -0
  111. data/lib/agentic/retry_handler.rb +125 -0
  112. data/lib/agentic/structured_outputs.rb +1 -1
  113. data/lib/agentic/task.rb +193 -0
  114. data/lib/agentic/task_definition.rb +39 -0
  115. data/lib/agentic/task_execution_result.rb +92 -0
  116. data/lib/agentic/task_failure.rb +66 -0
  117. data/lib/agentic/task_output_schemas.rb +112 -0
  118. data/lib/agentic/task_planner.rb +54 -19
  119. data/lib/agentic/task_result.rb +48 -0
  120. data/lib/agentic/ui.rb +244 -0
  121. data/lib/agentic/verification/critic_framework.rb +116 -0
  122. data/lib/agentic/verification/llm_verification_strategy.rb +60 -0
  123. data/lib/agentic/verification/schema_verification_strategy.rb +47 -0
  124. data/lib/agentic/verification/verification_hub.rb +62 -0
  125. data/lib/agentic/verification/verification_result.rb +50 -0
  126. data/lib/agentic/verification/verification_strategy.rb +26 -0
  127. data/lib/agentic/version.rb +1 -1
  128. data/lib/agentic.rb +74 -2
  129. data/plugins/README.md +41 -0
  130. metadata +245 -6
@@ -1,15 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "execution_plan"
4
+ require_relative "agent_specification"
5
+ require_relative "task_definition"
6
+ require_relative "expected_answer_format"
7
+
3
8
  module Agentic
4
9
  # Handles the task planning process for Agentic using LLM
10
+ #
11
+ # This class follows separation of concerns by:
12
+ # 1. Focusing on core planning logic and data generation
13
+ # 2. Returning structured data (ExecutionPlan) instead of formatted strings
14
+ # 3. Delegating presentation concerns to the ExecutionPlan class
5
15
  class TaskPlanner
6
16
  # @return [String] The goal to be accomplished
7
17
  attr_reader :goal
8
18
 
9
- # @return [Array<Hash>] The list of tasks to accomplish the goal
19
+ # @return [Array<TaskDefinition>] The list of tasks to accomplish the goal
10
20
  attr_reader :tasks
11
21
 
12
- # @return [Hash] The expected answer format
22
+ # @return [ExpectedAnswerFormat] The expected answer format
13
23
  attr_reader :expected_answer
14
24
 
15
25
  # @return [LlmConfig] The configuration for the LLM
@@ -21,7 +31,11 @@ module Agentic
21
31
  def initialize(goal, llm_config = LlmConfig.new)
22
32
  @goal = goal
23
33
  @tasks = []
24
- @expected_answer = {}
34
+ @expected_answer = ExpectedAnswerFormat.new(
35
+ format: "Undetermined",
36
+ sections: [],
37
+ length: "Undetermined"
38
+ )
25
39
  @llm_config = llm_config
26
40
  end
27
41
 
@@ -51,7 +65,22 @@ module Agentic
51
65
  end
52
66
 
53
67
  response = llm_request(system_message, user_message, schema)
54
- @tasks = response[:content]["tasks"]
68
+
69
+ if response.successful?
70
+ @tasks = response.content["tasks"].map do |task_data|
71
+ TaskDefinition.new(
72
+ description: task_data["description"],
73
+ agent: AgentSpecification.new(
74
+ name: task_data["agent"]["name"],
75
+ description: task_data["agent"]["description"],
76
+ instructions: task_data["agent"]["instructions"]
77
+ )
78
+ )
79
+ end
80
+ else
81
+ Agentic.logger.error("Failed to analyze goal: #{response.error&.message || response.refusal}")
82
+ @tasks = []
83
+ end
55
84
  end
56
85
 
57
86
  # Determines the expected answer format using LLM
@@ -67,29 +96,35 @@ module Agentic
67
96
  end
68
97
 
69
98
  response = llm_request(system_message, user_message, schema)
70
- @expected_answer = response[:content]
71
- end
72
99
 
73
- # Displays the execution plan
74
- # @return [String] The formatted execution plan
75
- def display_plan
76
- plan = "Execution Plan:\n\n"
77
- @tasks.each_with_index do |task, index|
78
- plan += "#{index + 1}. #{task["description"]} (Agent: #{task["agent"].inspect})\n"
100
+ if response.successful?
101
+ @expected_answer = ExpectedAnswerFormat.new(
102
+ format: response.content["format"],
103
+ sections: response.content["sections"],
104
+ length: response.content["length"]
105
+ )
106
+ else
107
+ Agentic.logger.error("Failed to determine expected answer format: #{response.error&.message || response.refusal}")
108
+ @expected_answer = ExpectedAnswerFormat.new(
109
+ format: "Undetermined",
110
+ sections: [],
111
+ length: "Undetermined"
112
+ )
79
113
  end
80
- plan += "\nExpected Answer:\n"
81
- plan += "Format: #{@expected_answer["format"]}\n"
82
- plan += "Sections: #{@expected_answer["sections"].join(", ")}\n"
83
- plan += "Length: #{@expected_answer["length"]}\n"
84
- plan
114
+ end
115
+
116
+ # Returns an ExecutionPlan object representing the execution plan
117
+ # @return [ExecutionPlan] The structured execution plan
118
+ def execution_plan
119
+ ExecutionPlan.new(@tasks, @expected_answer)
85
120
  end
86
121
 
87
122
  # Executes the entire planning process
88
- # @return [String] The formatted execution plan
123
+ # @return [ExecutionPlan] The structured execution plan
89
124
  def plan
90
125
  analyze_goal
91
126
  determine_expected_answer
92
- display_plan
127
+ execution_plan
93
128
  end
94
129
 
95
130
  private
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Agentic
4
+ # Represents the result of a task execution
5
+ # @attr_reader [String] task_id The ID of the task that produced this result
6
+ # @attr_reader [Boolean] success Whether the task execution was successful
7
+ # @attr_reader [Hash, nil] output The output produced by the task, nil if unsuccessful
8
+ # @attr_reader [TaskFailure, nil] failure The failure information, nil if successful
9
+ class TaskResult
10
+ attr_reader :task_id, :success, :output, :failure
11
+
12
+ # Initializes a new task result
13
+ # @param task_id [String] The ID of the task that produced this result
14
+ # @param success [Boolean] Whether the task execution was successful
15
+ # @param output [Hash, nil] The output produced by the task
16
+ # @param failure [TaskFailure, nil] The failure information
17
+ # @return [TaskResult] A new task result instance
18
+ def initialize(task_id:, success:, output: nil, failure: nil)
19
+ @task_id = task_id
20
+ @success = success
21
+ @output = output
22
+ @failure = failure
23
+ end
24
+
25
+ # Checks if the task execution was successful
26
+ # @return [Boolean] True if successful, false otherwise
27
+ def successful?
28
+ @success
29
+ end
30
+
31
+ # Checks if the task execution failed
32
+ # @return [Boolean] True if failed, false otherwise
33
+ def failed?
34
+ !@success
35
+ end
36
+
37
+ # Returns a serializable representation of the result
38
+ # @return [Hash] The result as a hash
39
+ def to_h
40
+ {
41
+ task_id: @task_id,
42
+ success: @success,
43
+ output: @output,
44
+ failure: @failure&.to_h
45
+ }
46
+ end
47
+ end
48
+ end
data/lib/agentic/ui.rb ADDED
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tty-spinner"
4
+ require "tty-progressbar"
5
+ require "tty-box"
6
+ require "tty-table"
7
+ require "tty-cursor"
8
+ require "pastel"
9
+
10
+ module Agentic
11
+ # UI helpers for the CLI
12
+ module UI
13
+ # Creates and returns a new spinner
14
+ # @param message [String] The message to display with the spinner
15
+ # @param format [Symbol] The spinner format
16
+ # @return [TTY::Spinner] The spinner object
17
+ def self.spinner(message, format: :dots)
18
+ TTY::Spinner.new("[:spinner] #{message}", format: format)
19
+ end
20
+
21
+ # Creates and returns a new progress bar
22
+ # @param title [String] The progress bar title
23
+ # @param total [Integer] The total number of steps
24
+ # @param options [Hash] Additional options for the progress bar
25
+ # @return [TTY::ProgressBar] The progress bar object
26
+ def self.progress_bar(title, total, options = {})
27
+ TTY::ProgressBar.new("[:bar] #{title} :percent",
28
+ total: total,
29
+ width: 40,
30
+ **options)
31
+ end
32
+
33
+ # Creates a colored text box
34
+ # @param title [String] The box title
35
+ # @param content [String] The box content
36
+ # @param options [Hash] Additional options for the box
37
+ # @return [String] The formatted box
38
+ def self.box(title, content, options = {})
39
+ # Calculate width based on visible characters (strip ANSI codes)
40
+ visible_lines = content.lines.map { |line| line.gsub(/\e\[[0-9;]*m/, "") }
41
+ max_line_length = visible_lines.map(&:length).max || 0
42
+
43
+ TTY::Box.frame(
44
+ title: {top_left: title},
45
+ width: [100, max_line_length + 4].min,
46
+ padding: 1,
47
+ **options
48
+ ) { content }
49
+ end
50
+
51
+ # Returns a pastel instance for colorizing text
52
+ # @return [Pastel] The pastel instance
53
+ def self.pastel
54
+ @pastel ||= Pastel.new
55
+ end
56
+
57
+ # Returns colored text
58
+ # @param text [String] The text to colorize
59
+ # @param color [Symbol] The color to use
60
+ # @return [String] The colorized text
61
+ def self.colorize(text, color)
62
+ pastel.send(color, text)
63
+ end
64
+
65
+ # Returns a text with a colored status indicator
66
+ # @param text [String] The text to display
67
+ # @param status [Symbol] The status
68
+ # @return [String] The text with colored status
69
+ def self.status_text(text, status)
70
+ status_color = case status
71
+ when :success, :completed
72
+ :green
73
+ when :failure, :failed, :error
74
+ :red
75
+ when :warning, :pending
76
+ :yellow
77
+ when :info, :in_progress
78
+ :blue
79
+ else
80
+ :white
81
+ end
82
+
83
+ pastel.send(status_color, text)
84
+ end
85
+
86
+ # Formats a duration in seconds to a human-readable string
87
+ # @param seconds [Float] The duration in seconds
88
+ # @return [String] The formatted duration
89
+ def self.format_duration(seconds)
90
+ if seconds < 1
91
+ "#{(seconds * 1000).round}ms"
92
+ elsif seconds < 60
93
+ "#{seconds.round(2)}s"
94
+ elsif seconds < 3600
95
+ minutes = (seconds / 60).floor
96
+ remaining_seconds = (seconds % 60).round
97
+ "#{minutes}m #{remaining_seconds}s"
98
+ else
99
+ hours = (seconds / 3600).floor
100
+ minutes = ((seconds % 3600) / 60).floor
101
+ "#{hours}h #{minutes}m"
102
+ end
103
+ end
104
+
105
+ # Handles a long-running operation with a spinner
106
+ # @param message [String] The message to display
107
+ # @param quiet [Boolean] Whether to suppress output
108
+ # @yield The block to execute
109
+ # @return [Object] The return value of the block
110
+ def self.with_spinner(message, quiet: false)
111
+ return yield if quiet
112
+
113
+ spinner = self.spinner(message)
114
+ spinner.auto_spin
115
+
116
+ begin
117
+ result = yield
118
+ spinner.success("(#{colorize("✓", :green)}) #{message}")
119
+ result
120
+ rescue => e
121
+ spinner.error("(#{colorize("✗", :red)}) #{message}: #{e.message}")
122
+ raise
123
+ end
124
+ end
125
+
126
+ # Returns a task status indicator
127
+ # @param status [Symbol] The status
128
+ # @return [String] The status indicator
129
+ def self.task_status_indicator(status)
130
+ case status
131
+ when :completed
132
+ colorize("✓", :green)
133
+ when :failed
134
+ colorize("✗", :red)
135
+ when :in_progress
136
+ colorize("↻", :blue)
137
+ when :building_agent, :agent_ready
138
+ colorize("○", :yellow) # Pending task execution
139
+ when :pending
140
+ colorize("○", :yellow)
141
+ when :canceled
142
+ colorize("⨯", :yellow)
143
+ else
144
+ colorize("?", :white)
145
+ end
146
+ end
147
+
148
+ # Creates a holistic task display table
149
+ # @param tasks [Array<Hash>] Array of task data with status, description, etc.
150
+ # @param options [Hash] Display options
151
+ # @return [String] The formatted table
152
+ def self.task_display_table(tasks, options = {})
153
+ return "" if tasks.empty?
154
+
155
+ # Use simplified headers when show_agent_column is false
156
+ show_agent_column = options.fetch(:show_agent_column, true)
157
+ headers = if show_agent_column
158
+ ["Status", "Task", "Agent", "Duration", "Output"]
159
+ else
160
+ ["Status", "Task", "Duration", "Output"]
161
+ end
162
+
163
+ table = TTY::Table.new(
164
+ header: headers,
165
+ rows: tasks.map { |task| format_task_row(task, show_agent_column: show_agent_column) }
166
+ )
167
+
168
+ table.render(:unicode,
169
+ padding: [0, 1],
170
+ **options)
171
+ end
172
+
173
+ # Returns a cursor instance for terminal positioning
174
+ # @return [TTY::Cursor] The cursor instance
175
+ def self.cursor
176
+ @cursor ||= TTY::Cursor
177
+ end
178
+
179
+ # Clears lines and repositions cursor for table updates
180
+ # @param line_count [Integer] Number of lines to clear
181
+ def self.clear_and_reposition(line_count)
182
+ print cursor.up(line_count) + cursor.clear_lines(line_count, :down)
183
+ end
184
+
185
+ # Formats a single task row for the display table
186
+ # @param task [Hash] Task data including status, description, duration, output
187
+ # @param show_agent_column [Boolean] Whether to include agent information in the row
188
+ # @return [Array] Formatted row data
189
+ def self.format_task_row(task, show_agent_column: true)
190
+ status_indicator = task_status_indicator(task[:status])
191
+
192
+ # Truncate description for display
193
+ description = truncate_text(task[:description] || "Unknown task", 35)
194
+
195
+ # Format agent information
196
+ agent_info = if task[:status] == :building_agent
197
+ colorize("🤖 Building...", :blue)
198
+ elsif task[:agent_role]
199
+ if task[:agent_duration]
200
+ colorize("✓ #{task[:agent_role]} (#{format_duration(task[:agent_duration])})", :green)
201
+ else
202
+ colorize("✓ #{task[:agent_role]}", :green)
203
+ end
204
+ else
205
+ "-"
206
+ end
207
+
208
+ # Format duration
209
+ duration = if task[:duration]
210
+ format_duration(task[:duration])
211
+ elsif [:in_progress, :building_agent, :agent_ready].include?(task[:status]) && task[:start_time]
212
+ format_duration(Time.now - task[:start_time])
213
+ else
214
+ "-"
215
+ end
216
+
217
+ # Format output preview
218
+ output = if task[:output] && !task[:output].to_s.empty?
219
+ truncate_text(task[:output].to_s.strip, 25)
220
+ elsif task[:status] == :failed && task[:error]
221
+ colorize(truncate_text(task[:error], 25), :red)
222
+ else
223
+ "-"
224
+ end
225
+
226
+ if show_agent_column
227
+ [status_indicator, description, agent_info, duration, output]
228
+ else
229
+ [status_indicator, description, duration, output]
230
+ end
231
+ end
232
+
233
+ # Truncates text to specified length with ellipsis
234
+ # @param text [String] Text to truncate
235
+ # @param max_length [Integer] Maximum length
236
+ # @return [String] Truncated text
237
+ def self.truncate_text(text, max_length)
238
+ return text if text.length <= max_length
239
+ "#{text[0..max_length - 4]}..."
240
+ end
241
+
242
+ private_class_method :format_task_row, :truncate_text
243
+ end
244
+ end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Agentic
4
+ module Verification
5
+ # Framework for multi-perspective evaluation of task results
6
+ class CriticFramework
7
+ # @return [Array<Critic>] The critics registered with this framework
8
+ attr_reader :critics
9
+
10
+ # @return [Hash] Configuration options for the framework
11
+ attr_reader :config
12
+
13
+ # Initializes a new CriticFramework
14
+ # @param critics [Array<Critic>] The critics to register
15
+ # @param config [Hash] Configuration options for the framework
16
+ def initialize(critics: [], config: {})
17
+ @critics = critics
18
+ @config = config
19
+ end
20
+
21
+ # Adds a critic to the framework
22
+ # @param critic [Critic] The critic to add
23
+ # @return [void]
24
+ def add_critic(critic)
25
+ @critics << critic
26
+ end
27
+
28
+ # Evaluates a task result using all registered critics
29
+ # @param task [Task] The task to evaluate
30
+ # @param result [TaskResult] The result to evaluate
31
+ # @return [CriticResult] The combined evaluation result
32
+ def evaluate(task, result)
33
+ evaluations = @critics.map { |critic| critic.critique(task, result) }
34
+
35
+ # Aggregate critic evaluations
36
+ positive_critiques = evaluations.count(&:positive?)
37
+ total_critiques = evaluations.size
38
+ confidence = (total_critiques > 0) ? positive_critiques.to_f / total_critiques : 0.5
39
+
40
+ comments = evaluations.flat_map(&:comments)
41
+
42
+ CriticResult.new(
43
+ task_id: task.id,
44
+ confidence: confidence,
45
+ verdict: confidence >= 0.7, # Pass if 70% or more critics give positive evaluation
46
+ comments: comments
47
+ )
48
+ end
49
+ end
50
+
51
+ # Represents the result of a critic's evaluation
52
+ class CriticResult
53
+ # @return [String] The ID of the task that was evaluated
54
+ attr_reader :task_id
55
+
56
+ # @return [Float] The confidence of the evaluation (0.0-1.0)
57
+ attr_reader :confidence
58
+
59
+ # @return [Boolean] The verdict of the evaluation (true = pass, false = fail)
60
+ attr_reader :verdict
61
+
62
+ # @return [Array<String>] Comments from the critic
63
+ attr_reader :comments
64
+
65
+ # Initializes a new CriticResult
66
+ # @param task_id [String] The ID of the task that was evaluated
67
+ # @param confidence [Float] The confidence of the evaluation (0.0-1.0)
68
+ # @param verdict [Boolean] The verdict of the evaluation (true = pass, false = fail)
69
+ # @param comments [Array<String>] Comments from the critic
70
+ def initialize(task_id:, confidence:, verdict:, comments: [])
71
+ @task_id = task_id
72
+ @confidence = confidence
73
+ @verdict = verdict
74
+ @comments = comments
75
+ end
76
+
77
+ # Checks if the evaluation is positive
78
+ # @return [Boolean] Whether the evaluation is positive
79
+ def positive?
80
+ @verdict
81
+ end
82
+
83
+ # Converts the critic result to a hash
84
+ # @return [Hash] The critic result as a hash
85
+ def to_h
86
+ {
87
+ task_id: @task_id,
88
+ confidence: @confidence,
89
+ verdict: @verdict,
90
+ comments: @comments
91
+ }
92
+ end
93
+ end
94
+
95
+ # Base class for critics
96
+ class Critic
97
+ # @return [Hash] Configuration options for the critic
98
+ attr_reader :config
99
+
100
+ # Initializes a new Critic
101
+ # @param config [Hash] Configuration options for the critic
102
+ def initialize(config = {})
103
+ @config = config
104
+ end
105
+
106
+ # Critiques a task result
107
+ # @param task [Task] The task to critique
108
+ # @param result [TaskResult] The result to critique
109
+ # @return [CriticResult] The critique result
110
+ # @raise [NotImplementedError] This method must be implemented by subclasses
111
+ def critique(task, result)
112
+ raise NotImplementedError, "Subclasses must implement critique"
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "verification_strategy"
4
+ require_relative "verification_result"
5
+
6
+ module Agentic
7
+ module Verification
8
+ # Verifies task results using an LLM
9
+ class LlmVerificationStrategy < VerificationStrategy
10
+ # Initializes a new LlmVerificationStrategy
11
+ # @param llm_client [LlmClient] The LLM client to use for verification
12
+ # @param config [Hash] Configuration options for the strategy
13
+ def initialize(llm_client, config = {})
14
+ super(config)
15
+ @llm_client = llm_client
16
+ end
17
+
18
+ # Verifies a task result using an LLM
19
+ # @param task [Task] The task to verify
20
+ # @param result [TaskResult] The result to verify
21
+ # @return [VerificationResult] The verification result
22
+ def verify(task, result)
23
+ unless result.successful?
24
+ return VerificationResult.new(
25
+ task_id: task.id,
26
+ verified: false,
27
+ confidence: 0.0,
28
+ messages: ["Task failed, skipping LLM verification"]
29
+ )
30
+ end
31
+
32
+ # In a real implementation, we would send the task and result to the LLM
33
+ # and analyze the LLM's assessment
34
+ # For this stub, we'll simulate a response
35
+
36
+ # Example verification prompt
37
+ # Task Description: #{task.description}
38
+ # Task Input: #{task.input.inspect}
39
+ # Task Result: #{result.output.inspect}
40
+ #
41
+ # Verify if the result satisfies the task requirements.
42
+ # Consider correctness, completeness, and alignment with the task description.
43
+ # Provide your assessment with a boolean verdict (verified: true/false) and a confidence score (0.0-1.0).
44
+
45
+ # In a real implementation, we would use the LLM client here
46
+ # For this stub, we'll return a simulated verification result
47
+ verified = rand > 0.1 # 90% chance of success for simulation purposes
48
+ confidence = verified ? (0.8 + rand * 0.2) : (0.3 + rand * 0.3)
49
+ message = verified ? "Result meets task requirements" : "Result does not fully satisfy task requirements"
50
+
51
+ VerificationResult.new(
52
+ task_id: task.id,
53
+ verified: verified,
54
+ confidence: confidence,
55
+ messages: [message]
56
+ )
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "verification_strategy"
4
+ require_relative "verification_result"
5
+
6
+ module Agentic
7
+ module Verification
8
+ # Verifies task results against a schema
9
+ class SchemaVerificationStrategy < VerificationStrategy
10
+ # Verifies a task result against a schema
11
+ # @param task [Task] The task to verify
12
+ # @param result [TaskResult] The result to verify
13
+ # @return [VerificationResult] The verification result
14
+ def verify(task, result)
15
+ unless result.successful?
16
+ return VerificationResult.new(
17
+ task_id: task.id,
18
+ verified: false,
19
+ confidence: 0.0,
20
+ messages: ["Task failed, skipping schema verification"]
21
+ )
22
+ end
23
+
24
+ # Extracting schema from task if available
25
+ schema = task.input["output_schema"] if task.input.is_a?(Hash)
26
+
27
+ unless schema
28
+ return VerificationResult.new(
29
+ task_id: task.id,
30
+ verified: true,
31
+ confidence: 0.5,
32
+ messages: ["No schema specified for verification, passing by default"]
33
+ )
34
+ end
35
+
36
+ # In a real implementation, we would validate the output against the schema
37
+ # For this stub, we'll assume validation passes
38
+ VerificationResult.new(
39
+ task_id: task.id,
40
+ verified: true,
41
+ confidence: 0.9,
42
+ messages: ["Output matches expected schema"]
43
+ )
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Agentic
4
+ module Verification
5
+ # Coordinates verification strategies and manages the verification process
6
+ class VerificationHub
7
+ # @return [Array<VerificationStrategy>] The registered verification strategies
8
+ attr_reader :strategies
9
+
10
+ # @return [Hash] Configuration options for the verification hub
11
+ attr_reader :config
12
+
13
+ # Initializes a new VerificationHub
14
+ # @param strategies [Array<VerificationStrategy>] The verification strategies to use
15
+ # @param config [Hash] Configuration options for the verification hub
16
+ def initialize(strategies: [], config: {})
17
+ @strategies = strategies
18
+ @config = config
19
+ end
20
+
21
+ # Adds a verification strategy
22
+ # @param strategy [VerificationStrategy] The strategy to add
23
+ # @return [void]
24
+ def add_strategy(strategy)
25
+ @strategies << strategy
26
+ end
27
+
28
+ # Verifies a task result using the registered strategies
29
+ # @param task [Task] The task to verify
30
+ # @param result [TaskResult] The result to verify
31
+ # @return [VerificationResult] The verification result
32
+ def verify(task, result)
33
+ # Skip verification for failed tasks
34
+ if result.failed?
35
+ return VerificationResult.new(
36
+ task_id: task.id,
37
+ verified: false,
38
+ confidence: 0.0,
39
+ messages: ["Task failed, skipping verification"]
40
+ )
41
+ end
42
+
43
+ # Apply all strategies
44
+ strategy_results = @strategies.map do |strategy|
45
+ strategy.verify(task, result)
46
+ end
47
+
48
+ # Combine results
49
+ verified = strategy_results.all?(&:verified)
50
+ confidence = strategy_results.map(&:confidence).sum / strategy_results.size.to_f
51
+ messages = strategy_results.flat_map(&:messages)
52
+
53
+ VerificationResult.new(
54
+ task_id: task.id,
55
+ verified: verified,
56
+ confidence: confidence,
57
+ messages: messages
58
+ )
59
+ end
60
+ end
61
+ end
62
+ end