igniter 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +217 -0
  3. data/docs/APPLICATION_V1.md +253 -0
  4. data/docs/CAPABILITIES_V1.md +207 -0
  5. data/docs/CONSENSUS_V1.md +477 -0
  6. data/docs/CONTENT_ADDRESSING_V1.md +221 -0
  7. data/docs/DATAFLOW_V1.md +274 -0
  8. data/docs/MESH_V1.md +732 -0
  9. data/docs/NODE_CACHE_V1.md +324 -0
  10. data/docs/PROACTIVE_AGENTS_V1.md +293 -0
  11. data/docs/SERVER_V1.md +200 -1
  12. data/docs/SKILLS_V1.md +213 -0
  13. data/docs/STORE_ADAPTERS.md +41 -13
  14. data/docs/TEMPORAL_V1.md +174 -0
  15. data/docs/TOOLS_V1.md +347 -0
  16. data/docs/TRANSCRIPTION_V1.md +403 -0
  17. data/examples/README.md +37 -0
  18. data/examples/consensus.rb +239 -0
  19. data/examples/dataflow.rb +308 -0
  20. data/examples/elocal_webhook.rb +1 -0
  21. data/examples/incremental.rb +142 -0
  22. data/examples/llm_tools.rb +237 -0
  23. data/examples/mesh.rb +239 -0
  24. data/examples/mesh_discovery.rb +267 -0
  25. data/examples/mesh_gossip.rb +162 -0
  26. data/examples/ringcentral_routing.rb +1 -1
  27. data/lib/igniter/agents/ai/alert_agent.rb +111 -0
  28. data/lib/igniter/agents/ai/chain_agent.rb +127 -0
  29. data/lib/igniter/agents/ai/critic_agent.rb +163 -0
  30. data/lib/igniter/agents/ai/evaluator_agent.rb +193 -0
  31. data/lib/igniter/agents/ai/evolution_agent.rb +286 -0
  32. data/lib/igniter/agents/ai/health_check_agent.rb +122 -0
  33. data/lib/igniter/agents/ai/observer_agent.rb +184 -0
  34. data/lib/igniter/agents/ai/planner_agent.rb +210 -0
  35. data/lib/igniter/agents/ai/router_agent.rb +131 -0
  36. data/lib/igniter/agents/ai/self_reflection_agent.rb +175 -0
  37. data/lib/igniter/agents/observability/metrics_agent.rb +130 -0
  38. data/lib/igniter/agents/pipeline/batch_processor_agent.rb +131 -0
  39. data/lib/igniter/agents/proactive_agent.rb +208 -0
  40. data/lib/igniter/agents/reliability/retry_agent.rb +99 -0
  41. data/lib/igniter/agents/scheduling/cron_agent.rb +110 -0
  42. data/lib/igniter/agents.rb +56 -0
  43. data/lib/igniter/application/app_config.rb +32 -0
  44. data/lib/igniter/application/autoloader.rb +18 -0
  45. data/lib/igniter/application/generator.rb +157 -0
  46. data/lib/igniter/application/scheduler.rb +109 -0
  47. data/lib/igniter/application/yml_loader.rb +39 -0
  48. data/lib/igniter/application.rb +174 -0
  49. data/lib/igniter/capabilities.rb +68 -0
  50. data/lib/igniter/compiler/validators/dependencies_validator.rb +50 -2
  51. data/lib/igniter/compiler/validators/remote_validator.rb +2 -0
  52. data/lib/igniter/consensus/cluster.rb +183 -0
  53. data/lib/igniter/consensus/errors.rb +14 -0
  54. data/lib/igniter/consensus/executors.rb +43 -0
  55. data/lib/igniter/consensus/node.rb +320 -0
  56. data/lib/igniter/consensus/read_query.rb +30 -0
  57. data/lib/igniter/consensus/state_machine.rb +58 -0
  58. data/lib/igniter/consensus.rb +58 -0
  59. data/lib/igniter/content_addressing.rb +133 -0
  60. data/lib/igniter/contract.rb +12 -0
  61. data/lib/igniter/dataflow/aggregate_operators.rb +147 -0
  62. data/lib/igniter/dataflow/aggregate_state.rb +77 -0
  63. data/lib/igniter/dataflow/diff.rb +37 -0
  64. data/lib/igniter/dataflow/diff_state.rb +81 -0
  65. data/lib/igniter/dataflow/incremental_collection_result.rb +39 -0
  66. data/lib/igniter/dataflow/window_filter.rb +48 -0
  67. data/lib/igniter/dataflow.rb +65 -0
  68. data/lib/igniter/dsl/contract_builder.rb +71 -7
  69. data/lib/igniter/executor.rb +60 -0
  70. data/lib/igniter/extensions/capabilities.rb +39 -0
  71. data/lib/igniter/extensions/content_addressing.rb +5 -0
  72. data/lib/igniter/extensions/dataflow.rb +117 -0
  73. data/lib/igniter/extensions/incremental.rb +50 -0
  74. data/lib/igniter/extensions/mesh.rb +31 -0
  75. data/lib/igniter/fingerprint.rb +43 -0
  76. data/lib/igniter/incremental/formatter.rb +81 -0
  77. data/lib/igniter/incremental/result.rb +69 -0
  78. data/lib/igniter/incremental/tracker.rb +108 -0
  79. data/lib/igniter/incremental.rb +50 -0
  80. data/lib/igniter/integrations/llm/config.rb +48 -4
  81. data/lib/igniter/integrations/llm/executor.rb +221 -28
  82. data/lib/igniter/integrations/llm/providers/anthropic.rb +37 -4
  83. data/lib/igniter/integrations/llm/providers/openai.rb +34 -5
  84. data/lib/igniter/integrations/llm/transcription/providers/assemblyai.rb +200 -0
  85. data/lib/igniter/integrations/llm/transcription/providers/base.rb +122 -0
  86. data/lib/igniter/integrations/llm/transcription/providers/deepgram.rb +162 -0
  87. data/lib/igniter/integrations/llm/transcription/providers/openai.rb +102 -0
  88. data/lib/igniter/integrations/llm/transcription/transcriber.rb +145 -0
  89. data/lib/igniter/integrations/llm/transcription/transcript_result.rb +29 -0
  90. data/lib/igniter/integrations/llm.rb +37 -1
  91. data/lib/igniter/memory/agent_memory.rb +104 -0
  92. data/lib/igniter/memory/episode.rb +29 -0
  93. data/lib/igniter/memory/fact.rb +27 -0
  94. data/lib/igniter/memory/memorable.rb +90 -0
  95. data/lib/igniter/memory/reflection_cycle.rb +96 -0
  96. data/lib/igniter/memory/reflection_record.rb +28 -0
  97. data/lib/igniter/memory/store.rb +115 -0
  98. data/lib/igniter/memory/stores/in_memory.rb +136 -0
  99. data/lib/igniter/memory/stores/sqlite.rb +284 -0
  100. data/lib/igniter/memory.rb +80 -0
  101. data/lib/igniter/mesh/announcer.rb +55 -0
  102. data/lib/igniter/mesh/config.rb +45 -0
  103. data/lib/igniter/mesh/discovery.rb +39 -0
  104. data/lib/igniter/mesh/errors.rb +31 -0
  105. data/lib/igniter/mesh/gossip.rb +47 -0
  106. data/lib/igniter/mesh/peer.rb +21 -0
  107. data/lib/igniter/mesh/peer_registry.rb +51 -0
  108. data/lib/igniter/mesh/poller.rb +77 -0
  109. data/lib/igniter/mesh/router.rb +109 -0
  110. data/lib/igniter/mesh.rb +85 -0
  111. data/lib/igniter/metrics/collector.rb +131 -0
  112. data/lib/igniter/metrics/prometheus_exporter.rb +104 -0
  113. data/lib/igniter/metrics/snapshot.rb +8 -0
  114. data/lib/igniter/metrics.rb +37 -0
  115. data/lib/igniter/model/aggregate_node.rb +34 -0
  116. data/lib/igniter/model/collection_node.rb +3 -2
  117. data/lib/igniter/model/compute_node.rb +13 -0
  118. data/lib/igniter/model/remote_node.rb +18 -2
  119. data/lib/igniter/node_cache.rb +231 -0
  120. data/lib/igniter/replication/bootstrapper.rb +61 -0
  121. data/lib/igniter/replication/bootstrappers/gem.rb +32 -0
  122. data/lib/igniter/replication/bootstrappers/git.rb +39 -0
  123. data/lib/igniter/replication/bootstrappers/tarball.rb +56 -0
  124. data/lib/igniter/replication/expansion_plan.rb +38 -0
  125. data/lib/igniter/replication/expansion_planner.rb +142 -0
  126. data/lib/igniter/replication/manifest.rb +45 -0
  127. data/lib/igniter/replication/network_topology.rb +123 -0
  128. data/lib/igniter/replication/node_role.rb +42 -0
  129. data/lib/igniter/replication/reflective_replication_agent.rb +238 -0
  130. data/lib/igniter/replication/replication_agent.rb +87 -0
  131. data/lib/igniter/replication/role_registry.rb +73 -0
  132. data/lib/igniter/replication/ssh_session.rb +77 -0
  133. data/lib/igniter/replication.rb +54 -0
  134. data/lib/igniter/runtime/cache.rb +35 -6
  135. data/lib/igniter/runtime/execution.rb +26 -2
  136. data/lib/igniter/runtime/input_validator.rb +6 -2
  137. data/lib/igniter/runtime/node_state.rb +7 -2
  138. data/lib/igniter/runtime/resolver.rb +323 -31
  139. data/lib/igniter/runtime/stores/redis_store.rb +41 -4
  140. data/lib/igniter/server/client.rb +44 -1
  141. data/lib/igniter/server/config.rb +13 -6
  142. data/lib/igniter/server/handlers/event_handler.rb +4 -0
  143. data/lib/igniter/server/handlers/execute_handler.rb +6 -0
  144. data/lib/igniter/server/handlers/liveness_handler.rb +20 -0
  145. data/lib/igniter/server/handlers/manifest_handler.rb +34 -0
  146. data/lib/igniter/server/handlers/metrics_handler.rb +51 -0
  147. data/lib/igniter/server/handlers/peers_handler.rb +115 -0
  148. data/lib/igniter/server/handlers/readiness_handler.rb +47 -0
  149. data/lib/igniter/server/http_server.rb +54 -17
  150. data/lib/igniter/server/router.rb +54 -21
  151. data/lib/igniter/server/server_logger.rb +52 -0
  152. data/lib/igniter/server.rb +6 -0
  153. data/lib/igniter/skill/feedback.rb +116 -0
  154. data/lib/igniter/skill/output_schema.rb +110 -0
  155. data/lib/igniter/skill.rb +218 -0
  156. data/lib/igniter/temporal.rb +84 -0
  157. data/lib/igniter/tool/discoverable.rb +151 -0
  158. data/lib/igniter/tool.rb +52 -0
  159. data/lib/igniter/tool_registry.rb +144 -0
  160. data/lib/igniter/version.rb +1 -1
  161. data/lib/igniter.rb +17 -0
  162. metadata +128 -1
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../proactive_agent"
4
+
5
+ module Igniter
6
+ module Agents
7
+ # Threshold-based alerting agent.
8
+ #
9
+ # AlertAgent extends ProactiveAgent with an opinionated DSL for declaring
10
+ # numeric thresholds. A single +monitor+ / +threshold+ pair registers both
11
+ # the watcher and the trigger automatically.
12
+ #
13
+ # @example
14
+ # class ApiAlerts < Igniter::Agents::AlertAgent
15
+ # intent "Alert when API error rate or latency spikes"
16
+ # scan_interval 15.0
17
+ #
18
+ # monitor :error_rate, source: -> { Metrics.error_rate }
19
+ # monitor :p99_latency, source: -> { Metrics.p99 }
20
+ #
21
+ # threshold :error_rate, above: 0.05
22
+ # threshold :p99_latency, above: 500
23
+ # end
24
+ #
25
+ # ref = ApiAlerts.start
26
+ # alerts = ref.call(:alerts) # => Array<AlertRecord>
27
+ class AlertAgent < ProactiveAgent
28
+ # Immutable record created when a threshold is breached.
29
+ AlertRecord = Struct.new(:metric, :value, :kind, :threshold,
30
+ :fired_at, keyword_init: true)
31
+
32
+ proactive_initial_state alerts: [], silenced: false
33
+
34
+ class << self
35
+ # Register a polling source for a named metric.
36
+ # Usually called before +threshold+ for the same metric name.
37
+ #
38
+ # @param metric [Symbol, String]
39
+ # @param source [#call] — zero-argument callable returning a Numeric
40
+ def monitor(metric, source:)
41
+ watch(metric.to_sym, poll: source)
42
+ end
43
+
44
+ # Declare a threshold for a watched metric.
45
+ # Automatically registers a trigger that fires when the condition is met.
46
+ #
47
+ # @param metric [Symbol, String]
48
+ # @param above [Numeric, nil] — breach if value > above
49
+ # @param below [Numeric, nil] — breach if value < below
50
+ def threshold(metric, above: nil, below: nil)
51
+ name = metric.to_sym
52
+
53
+ trigger(:"threshold_#{name}",
54
+ condition: ->(ctx) {
55
+ val = ctx[name]
56
+ return false if val.nil?
57
+
58
+ (above && val.to_f > above.to_f) ||
59
+ (below && val.to_f < below.to_f)
60
+ },
61
+ action: ->(state:, context:) {
62
+ next state if state[:silenced]
63
+
64
+ val = context[name]
65
+ kind = above && val.to_f > above.to_f ? :above : :below
66
+ rec = AlertRecord.new(
67
+ metric: name,
68
+ value: val,
69
+ kind: kind,
70
+ threshold: kind == :above ? above : below,
71
+ fired_at: Time.now
72
+ )
73
+ state.merge(alerts: (state[:alerts] + [rec]).last(200))
74
+ }
75
+ )
76
+ end
77
+ end
78
+
79
+ # ── Inheritance ────────────────────────────────────────────────────────
80
+ # Re-inject AlertAgent-specific handlers into every subclass so that
81
+ # anonymous test classes (Class.new(AlertAgent)) also have them.
82
+ def self.inherited(subclass)
83
+ super # ProactiveAgent.inherited → resets @handlers, injects proactive ones
84
+ inject_alert_handlers!(subclass)
85
+ end
86
+
87
+ private_class_method def self.inject_alert_handlers!(klass)
88
+ klass.on(:silence) { |state:, **| state.merge(silenced: true) }
89
+ klass.on(:unsilence) { |state:, **| state.merge(silenced: false) }
90
+ klass.on(:alerts) { |state:, **| state.fetch(:alerts, []).dup }
91
+ klass.on(:clear_alerts) { |state:, **| state.merge(alerts: []) }
92
+ end
93
+
94
+ # Suppress alert creation (scans and condition checks still run).
95
+ on :silence do |state:, **| state.merge(silenced: true) end
96
+ on :unsilence do |state:, **| state.merge(silenced: false) end
97
+
98
+ # Sync query — all recorded AlertRecord objects.
99
+ #
100
+ # @return [Array<AlertRecord>]
101
+ on :alerts do |state:, **|
102
+ state.fetch(:alerts, []).dup
103
+ end
104
+
105
+ # Clear alert history.
106
+ on :clear_alerts do |state:, **|
107
+ state.merge(alerts: [])
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Igniter
4
+ module Agents
5
+ # Executes a sequence of callables where each step's output becomes the
6
+ # next step's input.
7
+ #
8
+ # Each step callable receives:
9
+ # input: [Object] — output from the previous step (or initial input)
10
+ # context: [Hash] — shared context unchanged throughout the run
11
+ # results: [Array<StepResult>] — results from all preceding steps
12
+ #
13
+ # On error the chain stops immediately by default. Pass +stop_on_error: false+
14
+ # to continue with the error message as the next step's input.
15
+ #
16
+ # @example Summarise → translate → format
17
+ # ref = ChainAgent.start
18
+ # ref.send(:set_chain, steps: [
19
+ # { name: :summarise, callable: ->(input:, **) { SummariseSkill.call(text: input) } },
20
+ # { name: :translate, callable: ->(input:, **) { TranslateSkill.call(text: input) } },
21
+ # { name: :format, callable: ->(input:, **) { FormatSkill.call(content: input) } }
22
+ # ])
23
+ # ref.send(:run, input: long_article, context: { target_lang: "es" })
24
+ # steps = ref.call(:results)
25
+ class ChainAgent < Igniter::Agent
26
+ # Immutable record for one completed step.
27
+ StepResult = Struct.new(:name, :input, :output, :status, keyword_init: true)
28
+
29
+ initial_state chain: [], results: [], context: {}
30
+
31
+ # Append a step to the end of the chain.
32
+ #
33
+ # Payload keys:
34
+ # name [String, Symbol] — step identifier
35
+ # callable [#call] — receives (input:, context:, results:)
36
+ on :add_step do |state:, payload:|
37
+ step = { name: payload.fetch(:name).to_s, callable: payload.fetch(:callable) }
38
+ state.merge(chain: state[:chain] + [step])
39
+ end
40
+
41
+ # Replace the entire chain.
42
+ #
43
+ # Payload keys:
44
+ # steps [Array<Hash>] — each element must have :name and :callable keys
45
+ on :set_chain do |state:, payload:|
46
+ steps = Array(payload.fetch(:steps)).map do |s|
47
+ { name: s.fetch(:name).to_s, callable: s.fetch(:callable) }
48
+ end
49
+ state.merge(chain: steps)
50
+ end
51
+
52
+ # Remove a step by name.
53
+ #
54
+ # Payload keys:
55
+ # name [String, Symbol]
56
+ on :remove_step do |state:, payload:|
57
+ name = payload.fetch(:name).to_s
58
+ state.merge(chain: state[:chain].reject { |s| s[:name] == name })
59
+ end
60
+
61
+ # Execute the chain with an initial input.
62
+ #
63
+ # Payload keys:
64
+ # input [Object] — starting value for the first step
65
+ # context [Hash] — shared context passed to every step (default: {})
66
+ # stop_on_error [Boolean] — halt on first error (default: true)
67
+ on :run do |state:, payload:|
68
+ agent = new
69
+ agent.send(:run_chain, state, payload)
70
+ end
71
+
72
+ # Sync query — step results from the most recent run.
73
+ #
74
+ # @return [Array<StepResult>]
75
+ on :results do |state:, **|
76
+ state[:results]
77
+ end
78
+
79
+ # Sync query — list registered step names.
80
+ #
81
+ # @return [Array<String>]
82
+ on :steps do |state:, **|
83
+ state[:chain].map { |s| s[:name] }
84
+ end
85
+
86
+ # Clear results and context from the last run (chain is preserved).
87
+ on :reset do |state:, **|
88
+ state.merge(results: [], context: {})
89
+ end
90
+
91
+ private
92
+
93
+ def run_chain(state, payload)
94
+ input = payload.fetch(:input)
95
+ context = payload.fetch(:context, state[:context])
96
+ stop_on_error = payload.fetch(:stop_on_error, true)
97
+ results = []
98
+ current = input
99
+
100
+ state[:chain].each do |step|
101
+ output, status = invoke_step(step[:callable], current, context, results)
102
+
103
+ results << StepResult.new(
104
+ name: step[:name],
105
+ input: current,
106
+ output: output,
107
+ status: status
108
+ )
109
+
110
+ break if status == :error && stop_on_error
111
+
112
+ current = output
113
+ end
114
+
115
+ state.merge(results: results, context: context)
116
+ end
117
+
118
+ # @return [[output, status]]
119
+ def invoke_step(callable, input, context, results)
120
+ output = callable.call(input: input, context: context, results: results)
121
+ [output, :ok]
122
+ rescue StandardError => e
123
+ [e.message, :error]
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Igniter
4
+ module Agents
5
+ # Evaluates output quality and optionally retries generation until a score
6
+ # threshold is met.
7
+ #
8
+ # Two evaluation modes:
9
+ # * **Rule-based** (default) — heuristics based on length and emptiness;
10
+ # no external dependencies.
11
+ # * **LLM-assisted** — delegates to any callable that accepts +output:+ and
12
+ # +criteria:+ and returns +{ score: Float, feedback: String }+ or a
13
+ # String that will be parsed for a numeric score.
14
+ #
15
+ # Scores are on a 0–10 scale. The default passing threshold is 7.0.
16
+ #
17
+ # @example Rule-based gate
18
+ # ref = CriticAgent.start
19
+ # ref.send(:evaluate, output: "Short answer", criteria: "completeness")
20
+ # ev = ref.call(:last_evaluation)
21
+ # puts ev.passed # => false (too short)
22
+ #
23
+ # @example LLM-assisted with retry
24
+ # evaluator = ->(output:, criteria:) {
25
+ # result = MyGraderSkill.call(output: output, criteria: criteria)
26
+ # { score: result.score, feedback: result.feedback }
27
+ # }
28
+ # ref = CriticAgent.start(initial_state: { evaluator: evaluator, threshold: 8.0 })
29
+ # ref.send(:evaluate_and_retry,
30
+ # output: first_draft,
31
+ # criteria: "accuracy, completeness",
32
+ # max_retries: 2,
33
+ # generator: ->(draft:) { improve_draft(draft) },
34
+ # generator_args: { draft: first_draft }
35
+ # )
36
+ class CriticAgent < Igniter::Agent
37
+ # Immutable evaluation result.
38
+ Evaluation = Struct.new(:score, :feedback, :passed, :criteria, keyword_init: true)
39
+
40
+ initial_state evaluator: nil, threshold: 7.0, evaluations: []
41
+
42
+ # Evaluate a single output.
43
+ #
44
+ # Payload keys:
45
+ # output [String, Object] — required; the artifact to evaluate
46
+ # criteria [String] — evaluation criteria (default: "quality, relevance")
47
+ # evaluator [#call, nil] — override state evaluator for this call
48
+ # threshold [Float, nil] — override state threshold for this call
49
+ on :evaluate do |state:, payload:|
50
+ agent = new
51
+ ev = agent.send(:run_evaluation, payload, state)
52
+ state.merge(evaluations: state[:evaluations] + [ev])
53
+ end
54
+
55
+ # Evaluate and re-generate until the score passes or retries are exhausted.
56
+ #
57
+ # Payload keys:
58
+ # output [String] — initial output to evaluate
59
+ # criteria [String] — evaluation criteria
60
+ # generator [#call] — required; called with **generator_args to produce a new output
61
+ # generator_args [Hash] — arguments forwarded to generator (default: {})
62
+ # max_retries [Integer] — maximum re-generation attempts (default: 3)
63
+ # evaluator [#call] — override state evaluator
64
+ # threshold [Float] — override state threshold
65
+ on :evaluate_and_retry do |state:, payload:|
66
+ agent = new
67
+ agent.send(:run_evaluate_and_retry, state, payload)
68
+ end
69
+
70
+ # Set default evaluator and/or threshold.
71
+ #
72
+ # Payload keys:
73
+ # evaluator [#call] — new default evaluator
74
+ # threshold [Float] — new default threshold
75
+ on :configure do |state:, payload:|
76
+ state.merge(
77
+ evaluator: payload.fetch(:evaluator, state[:evaluator]),
78
+ threshold: payload.fetch(:threshold, state[:threshold]).to_f
79
+ )
80
+ end
81
+
82
+ # Sync query — returns the most recent Evaluation, or nil.
83
+ on :last_evaluation do |state:, **|
84
+ state[:evaluations].last
85
+ end
86
+
87
+ # Sync query — returns all recorded Evaluation structs.
88
+ on :evaluations do |state:, **|
89
+ state[:evaluations]
90
+ end
91
+
92
+ # Clear evaluation history.
93
+ on :clear do |state:, **|
94
+ state.merge(evaluations: [])
95
+ end
96
+
97
+ private
98
+
99
+ # @return [Evaluation]
100
+ def run_evaluation(payload, state)
101
+ output = payload.fetch(:output)
102
+ criteria = payload.fetch(:criteria, "quality, relevance")
103
+ evaluator = payload.fetch(:evaluator, state[:evaluator])
104
+ threshold = payload.fetch(:threshold, state[:threshold]).to_f
105
+
106
+ score, feedback = evaluator ? llm_score(evaluator, output, criteria)
107
+ : rule_score(output)
108
+
109
+ Evaluation.new(
110
+ score: score.to_f,
111
+ feedback: feedback.to_s,
112
+ passed: score.to_f >= threshold,
113
+ criteria: criteria
114
+ )
115
+ end
116
+
117
+ # @return [Hash] updated state with all evaluations recorded
118
+ def run_evaluate_and_retry(state, payload)
119
+ max_retries = payload.fetch(:max_retries, 3)
120
+ generator = payload.fetch(:generator)
121
+ generator_args = payload.fetch(:generator_args, {})
122
+ output = payload.fetch(:output)
123
+ all_evals = []
124
+
125
+ (max_retries + 1).times do |attempt|
126
+ output = generator.call(**generator_args) if attempt.positive?
127
+ ev = run_evaluation(payload.merge(output: output), state)
128
+ all_evals << ev
129
+ break if ev.passed
130
+ end
131
+
132
+ state.merge(evaluations: state[:evaluations] + all_evals)
133
+ end
134
+
135
+ # Call the user-supplied evaluator. Accepts two return shapes:
136
+ # Hash with :score / :feedback keys
137
+ # String (we scan for the first number in the text as the score)
138
+ def llm_score(evaluator, output, criteria)
139
+ result = evaluator.call(output: output, criteria: criteria)
140
+ case result
141
+ when Hash
142
+ [result.fetch(:score, 5.0), result.fetch(:feedback, "")]
143
+ else
144
+ text = result.to_s
145
+ score = text.match(/\b(\d+(?:\.\d+)?)\b/)&.captures&.first&.to_f || 5.0
146
+ [score, text]
147
+ end
148
+ rescue StandardError => e
149
+ [0.0, "Evaluator error: #{e.message}"]
150
+ end
151
+
152
+ # Minimal rule-based heuristic (no LLM required).
153
+ def rule_score(output)
154
+ text = output.to_s.strip
155
+ return [0.0, "Output is empty"] if text.empty?
156
+ return [3.0, "Output is very short"] if text.length < 50
157
+ return [5.5, "Output is below average length"] if text.length < 200
158
+
159
+ [7.5, "Output meets basic length criteria"]
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Igniter
4
+ module Agents
5
+ # Tracks performance metrics for named subjects (agents, services,
6
+ # contracts), computes weighted aggregate scores, and compares subjects
7
+ # against each other or against stored baselines.
8
+ #
9
+ # Scoring:
10
+ # * Metrics are grouped by name; the last 20 readings are averaged.
11
+ # * Per-subject or global weights scale each metric's contribution.
12
+ # * When a baseline is set the score is normalised to 0–100 relative to it;
13
+ # without a baseline the raw weighted average is returned.
14
+ # * Grades: A ≥ 90, B ≥ 75, C ≥ 60, D otherwise.
15
+ #
16
+ # @example Track two services and compare
17
+ # ref = EvaluatorAgent.start
18
+ # ref.send(:record_metric, subject: :api, name: :throughput, value: 850)
19
+ # ref.send(:record_metric, subject: :api, name: :error_rate, value: 2.1)
20
+ # ref.send(:set_baseline, subject: :api, baseline: 800)
21
+ # ref.send(:evaluate, subject: :api)
22
+ # ev = ref.call(:evaluations, subject: :api).last
23
+ # puts ev.grade # => "A"
24
+ class EvaluatorAgent < Igniter::Agent
25
+ MetricRecord = Struct.new(:name, :value, :recorded_at, keyword_init: true)
26
+ Evaluation = Struct.new(:subject, :score, :grade, :metrics,
27
+ :recorded_at, keyword_init: true)
28
+ Comparison = Struct.new(:subject_a, :subject_b, :winner, :delta,
29
+ keyword_init: true)
30
+
31
+ GRADES = [
32
+ [90.0, "A"],
33
+ [75.0, "B"],
34
+ [60.0, "C"],
35
+ [ 0.0, "D"]
36
+ ].freeze
37
+
38
+ # subjects: Hash<String, { metrics: Array<MetricRecord>, baseline: Float?, weights: Hash }>
39
+ initial_state \
40
+ subjects: {},
41
+ evaluations: [],
42
+ weights: {}
43
+
44
+ # Record a metric reading for a subject.
45
+ #
46
+ # Payload keys:
47
+ # subject [String, Symbol] — subject identifier
48
+ # name [String, Symbol] — metric name
49
+ # value [Numeric] — metric value
50
+ on :record_metric do |state:, payload:|
51
+ subject = payload.fetch(:subject).to_s
52
+ metric = MetricRecord.new(
53
+ name: payload.fetch(:name).to_s,
54
+ value: payload.fetch(:value).to_f,
55
+ recorded_at: Time.now
56
+ )
57
+ entry = state[:subjects].fetch(subject, { metrics: [], baseline: nil, weights: {} })
58
+ updated = entry.merge(metrics: (entry[:metrics] + [metric]).last(200))
59
+ state.merge(subjects: state[:subjects].merge(subject => updated))
60
+ end
61
+
62
+ # Set the reference baseline value for a subject.
63
+ # Scores will be expressed as a percentage of this baseline.
64
+ #
65
+ # Payload keys:
66
+ # subject [String, Symbol]
67
+ # baseline [Numeric]
68
+ on :set_baseline do |state:, payload:|
69
+ subject = payload.fetch(:subject).to_s
70
+ baseline = payload.fetch(:baseline).to_f
71
+ entry = state[:subjects].fetch(subject, { metrics: [], baseline: nil, weights: {} })
72
+ updated = entry.merge(baseline: baseline)
73
+ state.merge(subjects: state[:subjects].merge(subject => updated))
74
+ end
75
+
76
+ # Set per-metric weights for a subject.
77
+ #
78
+ # Payload keys:
79
+ # subject [String, Symbol]
80
+ # weights [Hash<String, Numeric>] — metric name → weight
81
+ on :set_weights do |state:, payload:|
82
+ subject = payload.fetch(:subject).to_s
83
+ weights = payload.fetch(:weights).transform_keys(&:to_s)
84
+ entry = state[:subjects].fetch(subject, { metrics: [], baseline: nil, weights: {} })
85
+ updated = entry.merge(weights: weights)
86
+ state.merge(subjects: state[:subjects].merge(subject => updated))
87
+ end
88
+
89
+ # Compute and store an Evaluation for a subject.
90
+ #
91
+ # Payload keys:
92
+ # subject [String, Symbol]
93
+ on :evaluate do |state:, payload:|
94
+ agent = new
95
+ ev = agent.send(:compute_evaluation, payload.fetch(:subject).to_s, state)
96
+ next state unless ev
97
+
98
+ state.merge(evaluations: state[:evaluations] + [ev])
99
+ end
100
+
101
+ # Sync query — compare the most recent evaluations of two subjects.
102
+ #
103
+ # Payload keys:
104
+ # a [String, Symbol] — first subject
105
+ # b [String, Symbol] — second subject
106
+ #
107
+ # @return [Comparison, nil]
108
+ on :compare do |state:, payload:|
109
+ a = state[:evaluations].select { |e| e.subject == payload.fetch(:a).to_s }.last
110
+ b = state[:evaluations].select { |e| e.subject == payload.fetch(:b).to_s }.last
111
+ next nil unless a && b
112
+
113
+ delta = (a.score - b.score).round(4)
114
+ winner = if delta > 0 then a.subject
115
+ elsif delta < 0 then b.subject
116
+ else :tie
117
+ end
118
+ Comparison.new(subject_a: a.subject, subject_b: b.subject,
119
+ winner: winner, delta: delta.abs)
120
+ end
121
+
122
+ # Sync query — all evaluations, optionally filtered by subject.
123
+ #
124
+ # Payload keys:
125
+ # subject [String, Symbol, nil]
126
+ #
127
+ # @return [Array<Evaluation>]
128
+ on :evaluations do |state:, payload:|
129
+ filter = payload&.fetch(:subject, nil)
130
+ evs = state[:evaluations]
131
+ evs = evs.select { |e| e.subject == filter.to_s } if filter
132
+ evs.dup
133
+ end
134
+
135
+ # Sync query — list registered subject names.
136
+ #
137
+ # @return [Array<String>]
138
+ on :subjects do |state:, **|
139
+ state[:subjects].keys
140
+ end
141
+
142
+ # Set global default weights (applied when a subject has no per-metric weight).
143
+ #
144
+ # Payload keys:
145
+ # weights [Hash<String, Numeric>]
146
+ on :configure do |state:, payload:|
147
+ state.merge(payload.slice(:weights).compact)
148
+ end
149
+
150
+ # Clear all subjects and evaluations.
151
+ on :reset do |state:, **|
152
+ state.merge(subjects: {}, evaluations: [])
153
+ end
154
+
155
+ private
156
+
157
+ def compute_evaluation(name, state)
158
+ data = state[:subjects][name]
159
+ return nil unless data && data[:metrics].any?
160
+
161
+ weights = state[:weights].merge(data[:weights] || {})
162
+ metrics = data[:metrics]
163
+ baseline = data[:baseline]
164
+
165
+ grouped = metrics.group_by(&:name)
166
+ score_parts = grouped.map do |mname, records|
167
+ avg = records.last(20).sum(&:value) / [records.last(20).size, 1].max
168
+ weight = (weights[mname] || 1.0).to_f
169
+ [avg * weight, weight]
170
+ end
171
+
172
+ total_weight = [score_parts.sum { |_, w| w }, 0.001].max
173
+ raw = score_parts.sum { |v, _| v } / total_weight
174
+
175
+ score = if baseline && baseline > 0
176
+ [(raw / baseline * 100).round(4), 100.0].min
177
+ else
178
+ raw.round(4)
179
+ end
180
+
181
+ grade = GRADES.find { |threshold, _| score.to_f >= threshold }&.last || "D"
182
+
183
+ Evaluation.new(
184
+ subject: name,
185
+ score: score,
186
+ grade: grade,
187
+ metrics: grouped.keys,
188
+ recorded_at: Time.now
189
+ )
190
+ end
191
+ end
192
+ end
193
+ end