igniter 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +217 -0
- data/docs/APPLICATION_V1.md +253 -0
- data/docs/CAPABILITIES_V1.md +207 -0
- data/docs/CONSENSUS_V1.md +477 -0
- data/docs/CONTENT_ADDRESSING_V1.md +221 -0
- data/docs/DATAFLOW_V1.md +274 -0
- data/docs/MESH_V1.md +732 -0
- data/docs/NODE_CACHE_V1.md +324 -0
- data/docs/PROACTIVE_AGENTS_V1.md +293 -0
- data/docs/SERVER_V1.md +200 -1
- data/docs/SKILLS_V1.md +213 -0
- data/docs/STORE_ADAPTERS.md +41 -13
- data/docs/TEMPORAL_V1.md +174 -0
- data/docs/TOOLS_V1.md +347 -0
- data/docs/TRANSCRIPTION_V1.md +403 -0
- data/examples/README.md +37 -0
- data/examples/consensus.rb +239 -0
- data/examples/dataflow.rb +308 -0
- data/examples/elocal_webhook.rb +1 -0
- data/examples/incremental.rb +142 -0
- data/examples/llm_tools.rb +237 -0
- data/examples/mesh.rb +239 -0
- data/examples/mesh_discovery.rb +267 -0
- data/examples/mesh_gossip.rb +162 -0
- data/examples/ringcentral_routing.rb +1 -1
- data/lib/igniter/agents/ai/alert_agent.rb +111 -0
- data/lib/igniter/agents/ai/chain_agent.rb +127 -0
- data/lib/igniter/agents/ai/critic_agent.rb +163 -0
- data/lib/igniter/agents/ai/evaluator_agent.rb +193 -0
- data/lib/igniter/agents/ai/evolution_agent.rb +286 -0
- data/lib/igniter/agents/ai/health_check_agent.rb +122 -0
- data/lib/igniter/agents/ai/observer_agent.rb +184 -0
- data/lib/igniter/agents/ai/planner_agent.rb +210 -0
- data/lib/igniter/agents/ai/router_agent.rb +131 -0
- data/lib/igniter/agents/ai/self_reflection_agent.rb +175 -0
- data/lib/igniter/agents/observability/metrics_agent.rb +130 -0
- data/lib/igniter/agents/pipeline/batch_processor_agent.rb +131 -0
- data/lib/igniter/agents/proactive_agent.rb +208 -0
- data/lib/igniter/agents/reliability/retry_agent.rb +99 -0
- data/lib/igniter/agents/scheduling/cron_agent.rb +110 -0
- data/lib/igniter/agents.rb +56 -0
- data/lib/igniter/application/app_config.rb +32 -0
- data/lib/igniter/application/autoloader.rb +18 -0
- data/lib/igniter/application/generator.rb +157 -0
- data/lib/igniter/application/scheduler.rb +109 -0
- data/lib/igniter/application/yml_loader.rb +39 -0
- data/lib/igniter/application.rb +174 -0
- data/lib/igniter/capabilities.rb +68 -0
- data/lib/igniter/compiler/validators/dependencies_validator.rb +50 -2
- data/lib/igniter/compiler/validators/remote_validator.rb +2 -0
- data/lib/igniter/consensus/cluster.rb +183 -0
- data/lib/igniter/consensus/errors.rb +14 -0
- data/lib/igniter/consensus/executors.rb +43 -0
- data/lib/igniter/consensus/node.rb +320 -0
- data/lib/igniter/consensus/read_query.rb +30 -0
- data/lib/igniter/consensus/state_machine.rb +58 -0
- data/lib/igniter/consensus.rb +58 -0
- data/lib/igniter/content_addressing.rb +133 -0
- data/lib/igniter/contract.rb +12 -0
- data/lib/igniter/dataflow/aggregate_operators.rb +147 -0
- data/lib/igniter/dataflow/aggregate_state.rb +77 -0
- data/lib/igniter/dataflow/diff.rb +37 -0
- data/lib/igniter/dataflow/diff_state.rb +81 -0
- data/lib/igniter/dataflow/incremental_collection_result.rb +39 -0
- data/lib/igniter/dataflow/window_filter.rb +48 -0
- data/lib/igniter/dataflow.rb +65 -0
- data/lib/igniter/dsl/contract_builder.rb +71 -7
- data/lib/igniter/executor.rb +60 -0
- data/lib/igniter/extensions/capabilities.rb +39 -0
- data/lib/igniter/extensions/content_addressing.rb +5 -0
- data/lib/igniter/extensions/dataflow.rb +117 -0
- data/lib/igniter/extensions/incremental.rb +50 -0
- data/lib/igniter/extensions/mesh.rb +31 -0
- data/lib/igniter/fingerprint.rb +43 -0
- data/lib/igniter/incremental/formatter.rb +81 -0
- data/lib/igniter/incremental/result.rb +69 -0
- data/lib/igniter/incremental/tracker.rb +108 -0
- data/lib/igniter/incremental.rb +50 -0
- data/lib/igniter/integrations/llm/config.rb +48 -4
- data/lib/igniter/integrations/llm/executor.rb +221 -28
- data/lib/igniter/integrations/llm/providers/anthropic.rb +37 -4
- data/lib/igniter/integrations/llm/providers/openai.rb +34 -5
- data/lib/igniter/integrations/llm/transcription/providers/assemblyai.rb +200 -0
- data/lib/igniter/integrations/llm/transcription/providers/base.rb +122 -0
- data/lib/igniter/integrations/llm/transcription/providers/deepgram.rb +162 -0
- data/lib/igniter/integrations/llm/transcription/providers/openai.rb +102 -0
- data/lib/igniter/integrations/llm/transcription/transcriber.rb +145 -0
- data/lib/igniter/integrations/llm/transcription/transcript_result.rb +29 -0
- data/lib/igniter/integrations/llm.rb +37 -1
- data/lib/igniter/memory/agent_memory.rb +104 -0
- data/lib/igniter/memory/episode.rb +29 -0
- data/lib/igniter/memory/fact.rb +27 -0
- data/lib/igniter/memory/memorable.rb +90 -0
- data/lib/igniter/memory/reflection_cycle.rb +96 -0
- data/lib/igniter/memory/reflection_record.rb +28 -0
- data/lib/igniter/memory/store.rb +115 -0
- data/lib/igniter/memory/stores/in_memory.rb +136 -0
- data/lib/igniter/memory/stores/sqlite.rb +284 -0
- data/lib/igniter/memory.rb +80 -0
- data/lib/igniter/mesh/announcer.rb +55 -0
- data/lib/igniter/mesh/config.rb +45 -0
- data/lib/igniter/mesh/discovery.rb +39 -0
- data/lib/igniter/mesh/errors.rb +31 -0
- data/lib/igniter/mesh/gossip.rb +47 -0
- data/lib/igniter/mesh/peer.rb +21 -0
- data/lib/igniter/mesh/peer_registry.rb +51 -0
- data/lib/igniter/mesh/poller.rb +77 -0
- data/lib/igniter/mesh/router.rb +109 -0
- data/lib/igniter/mesh.rb +85 -0
- data/lib/igniter/metrics/collector.rb +131 -0
- data/lib/igniter/metrics/prometheus_exporter.rb +104 -0
- data/lib/igniter/metrics/snapshot.rb +8 -0
- data/lib/igniter/metrics.rb +37 -0
- data/lib/igniter/model/aggregate_node.rb +34 -0
- data/lib/igniter/model/collection_node.rb +3 -2
- data/lib/igniter/model/compute_node.rb +13 -0
- data/lib/igniter/model/remote_node.rb +18 -2
- data/lib/igniter/node_cache.rb +231 -0
- data/lib/igniter/replication/bootstrapper.rb +61 -0
- data/lib/igniter/replication/bootstrappers/gem.rb +32 -0
- data/lib/igniter/replication/bootstrappers/git.rb +39 -0
- data/lib/igniter/replication/bootstrappers/tarball.rb +56 -0
- data/lib/igniter/replication/expansion_plan.rb +38 -0
- data/lib/igniter/replication/expansion_planner.rb +142 -0
- data/lib/igniter/replication/manifest.rb +45 -0
- data/lib/igniter/replication/network_topology.rb +123 -0
- data/lib/igniter/replication/node_role.rb +42 -0
- data/lib/igniter/replication/reflective_replication_agent.rb +238 -0
- data/lib/igniter/replication/replication_agent.rb +87 -0
- data/lib/igniter/replication/role_registry.rb +73 -0
- data/lib/igniter/replication/ssh_session.rb +77 -0
- data/lib/igniter/replication.rb +54 -0
- data/lib/igniter/runtime/cache.rb +35 -6
- data/lib/igniter/runtime/execution.rb +26 -2
- data/lib/igniter/runtime/input_validator.rb +6 -2
- data/lib/igniter/runtime/node_state.rb +7 -2
- data/lib/igniter/runtime/resolver.rb +323 -31
- data/lib/igniter/runtime/stores/redis_store.rb +41 -4
- data/lib/igniter/server/client.rb +44 -1
- data/lib/igniter/server/config.rb +13 -6
- data/lib/igniter/server/handlers/event_handler.rb +4 -0
- data/lib/igniter/server/handlers/execute_handler.rb +6 -0
- data/lib/igniter/server/handlers/liveness_handler.rb +20 -0
- data/lib/igniter/server/handlers/manifest_handler.rb +34 -0
- data/lib/igniter/server/handlers/metrics_handler.rb +51 -0
- data/lib/igniter/server/handlers/peers_handler.rb +115 -0
- data/lib/igniter/server/handlers/readiness_handler.rb +47 -0
- data/lib/igniter/server/http_server.rb +54 -17
- data/lib/igniter/server/router.rb +54 -21
- data/lib/igniter/server/server_logger.rb +52 -0
- data/lib/igniter/server.rb +6 -0
- data/lib/igniter/skill/feedback.rb +116 -0
- data/lib/igniter/skill/output_schema.rb +110 -0
- data/lib/igniter/skill.rb +218 -0
- data/lib/igniter/temporal.rb +84 -0
- data/lib/igniter/tool/discoverable.rb +151 -0
- data/lib/igniter/tool.rb +52 -0
- data/lib/igniter/tool_registry.rb +144 -0
- data/lib/igniter/version.rb +1 -1
- data/lib/igniter.rb +17 -0
- metadata +128 -1
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../proactive_agent"
|
|
4
|
+
|
|
5
|
+
module Igniter
|
|
6
|
+
module Agents
|
|
7
|
+
# Threshold-based alerting agent.
|
|
8
|
+
#
|
|
9
|
+
# AlertAgent extends ProactiveAgent with an opinionated DSL for declaring
|
|
10
|
+
# numeric thresholds. A single +monitor+ / +threshold+ pair registers both
|
|
11
|
+
# the watcher and the trigger automatically.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# class ApiAlerts < Igniter::Agents::AlertAgent
|
|
15
|
+
# intent "Alert when API error rate or latency spikes"
|
|
16
|
+
# scan_interval 15.0
|
|
17
|
+
#
|
|
18
|
+
# monitor :error_rate, source: -> { Metrics.error_rate }
|
|
19
|
+
# monitor :p99_latency, source: -> { Metrics.p99 }
|
|
20
|
+
#
|
|
21
|
+
# threshold :error_rate, above: 0.05
|
|
22
|
+
# threshold :p99_latency, above: 500
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# ref = ApiAlerts.start
|
|
26
|
+
# alerts = ref.call(:alerts) # => Array<AlertRecord>
|
|
27
|
+
class AlertAgent < ProactiveAgent
|
|
28
|
+
# Immutable record created when a threshold is breached.
|
|
29
|
+
AlertRecord = Struct.new(:metric, :value, :kind, :threshold,
|
|
30
|
+
:fired_at, keyword_init: true)
|
|
31
|
+
|
|
32
|
+
proactive_initial_state alerts: [], silenced: false
|
|
33
|
+
|
|
34
|
+
class << self
|
|
35
|
+
# Register a polling source for a named metric.
|
|
36
|
+
# Usually called before +threshold+ for the same metric name.
|
|
37
|
+
#
|
|
38
|
+
# @param metric [Symbol, String]
|
|
39
|
+
# @param source [#call] — zero-argument callable returning a Numeric
|
|
40
|
+
def monitor(metric, source:)
|
|
41
|
+
watch(metric.to_sym, poll: source)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Declare a threshold for a watched metric.
|
|
45
|
+
# Automatically registers a trigger that fires when the condition is met.
|
|
46
|
+
#
|
|
47
|
+
# @param metric [Symbol, String]
|
|
48
|
+
# @param above [Numeric, nil] — breach if value > above
|
|
49
|
+
# @param below [Numeric, nil] — breach if value < below
|
|
50
|
+
def threshold(metric, above: nil, below: nil)
|
|
51
|
+
name = metric.to_sym
|
|
52
|
+
|
|
53
|
+
trigger(:"threshold_#{name}",
|
|
54
|
+
condition: ->(ctx) {
|
|
55
|
+
val = ctx[name]
|
|
56
|
+
return false if val.nil?
|
|
57
|
+
|
|
58
|
+
(above && val.to_f > above.to_f) ||
|
|
59
|
+
(below && val.to_f < below.to_f)
|
|
60
|
+
},
|
|
61
|
+
action: ->(state:, context:) {
|
|
62
|
+
next state if state[:silenced]
|
|
63
|
+
|
|
64
|
+
val = context[name]
|
|
65
|
+
kind = above && val.to_f > above.to_f ? :above : :below
|
|
66
|
+
rec = AlertRecord.new(
|
|
67
|
+
metric: name,
|
|
68
|
+
value: val,
|
|
69
|
+
kind: kind,
|
|
70
|
+
threshold: kind == :above ? above : below,
|
|
71
|
+
fired_at: Time.now
|
|
72
|
+
)
|
|
73
|
+
state.merge(alerts: (state[:alerts] + [rec]).last(200))
|
|
74
|
+
}
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# ── Inheritance ────────────────────────────────────────────────────────
|
|
80
|
+
# Re-inject AlertAgent-specific handlers into every subclass so that
|
|
81
|
+
# anonymous test classes (Class.new(AlertAgent)) also have them.
|
|
82
|
+
def self.inherited(subclass)
|
|
83
|
+
super # ProactiveAgent.inherited → resets @handlers, injects proactive ones
|
|
84
|
+
inject_alert_handlers!(subclass)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private_class_method def self.inject_alert_handlers!(klass)
|
|
88
|
+
klass.on(:silence) { |state:, **| state.merge(silenced: true) }
|
|
89
|
+
klass.on(:unsilence) { |state:, **| state.merge(silenced: false) }
|
|
90
|
+
klass.on(:alerts) { |state:, **| state.fetch(:alerts, []).dup }
|
|
91
|
+
klass.on(:clear_alerts) { |state:, **| state.merge(alerts: []) }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Suppress alert creation (scans and condition checks still run).
|
|
95
|
+
on :silence do |state:, **| state.merge(silenced: true) end
|
|
96
|
+
on :unsilence do |state:, **| state.merge(silenced: false) end
|
|
97
|
+
|
|
98
|
+
# Sync query — all recorded AlertRecord objects.
|
|
99
|
+
#
|
|
100
|
+
# @return [Array<AlertRecord>]
|
|
101
|
+
on :alerts do |state:, **|
|
|
102
|
+
state.fetch(:alerts, []).dup
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Clear alert history.
|
|
106
|
+
on :clear_alerts do |state:, **|
|
|
107
|
+
state.merge(alerts: [])
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Igniter
|
|
4
|
+
module Agents
|
|
5
|
+
# Executes a sequence of callables where each step's output becomes the
|
|
6
|
+
# next step's input.
|
|
7
|
+
#
|
|
8
|
+
# Each step callable receives:
|
|
9
|
+
# input: [Object] — output from the previous step (or initial input)
|
|
10
|
+
# context: [Hash] — shared context unchanged throughout the run
|
|
11
|
+
# results: [Array<StepResult>] — results from all preceding steps
|
|
12
|
+
#
|
|
13
|
+
# On error the chain stops immediately by default. Pass +stop_on_error: false+
|
|
14
|
+
# to continue with the error message as the next step's input.
|
|
15
|
+
#
|
|
16
|
+
# @example Summarise → translate → format
|
|
17
|
+
# ref = ChainAgent.start
|
|
18
|
+
# ref.send(:set_chain, steps: [
|
|
19
|
+
# { name: :summarise, callable: ->(input:, **) { SummariseSkill.call(text: input) } },
|
|
20
|
+
# { name: :translate, callable: ->(input:, **) { TranslateSkill.call(text: input) } },
|
|
21
|
+
# { name: :format, callable: ->(input:, **) { FormatSkill.call(content: input) } }
|
|
22
|
+
# ])
|
|
23
|
+
# ref.send(:run, input: long_article, context: { target_lang: "es" })
|
|
24
|
+
# steps = ref.call(:results)
|
|
25
|
+
class ChainAgent < Igniter::Agent
|
|
26
|
+
# Immutable record for one completed step.
|
|
27
|
+
StepResult = Struct.new(:name, :input, :output, :status, keyword_init: true)
|
|
28
|
+
|
|
29
|
+
initial_state chain: [], results: [], context: {}
|
|
30
|
+
|
|
31
|
+
# Append a step to the end of the chain.
|
|
32
|
+
#
|
|
33
|
+
# Payload keys:
|
|
34
|
+
# name [String, Symbol] — step identifier
|
|
35
|
+
# callable [#call] — receives (input:, context:, results:)
|
|
36
|
+
on :add_step do |state:, payload:|
|
|
37
|
+
step = { name: payload.fetch(:name).to_s, callable: payload.fetch(:callable) }
|
|
38
|
+
state.merge(chain: state[:chain] + [step])
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Replace the entire chain.
|
|
42
|
+
#
|
|
43
|
+
# Payload keys:
|
|
44
|
+
# steps [Array<Hash>] — each element must have :name and :callable keys
|
|
45
|
+
on :set_chain do |state:, payload:|
|
|
46
|
+
steps = Array(payload.fetch(:steps)).map do |s|
|
|
47
|
+
{ name: s.fetch(:name).to_s, callable: s.fetch(:callable) }
|
|
48
|
+
end
|
|
49
|
+
state.merge(chain: steps)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Remove a step by name.
|
|
53
|
+
#
|
|
54
|
+
# Payload keys:
|
|
55
|
+
# name [String, Symbol]
|
|
56
|
+
on :remove_step do |state:, payload:|
|
|
57
|
+
name = payload.fetch(:name).to_s
|
|
58
|
+
state.merge(chain: state[:chain].reject { |s| s[:name] == name })
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Execute the chain with an initial input.
|
|
62
|
+
#
|
|
63
|
+
# Payload keys:
|
|
64
|
+
# input [Object] — starting value for the first step
|
|
65
|
+
# context [Hash] — shared context passed to every step (default: {})
|
|
66
|
+
# stop_on_error [Boolean] — halt on first error (default: true)
|
|
67
|
+
on :run do |state:, payload:|
|
|
68
|
+
agent = new
|
|
69
|
+
agent.send(:run_chain, state, payload)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Sync query — step results from the most recent run.
|
|
73
|
+
#
|
|
74
|
+
# @return [Array<StepResult>]
|
|
75
|
+
on :results do |state:, **|
|
|
76
|
+
state[:results]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Sync query — list registered step names.
|
|
80
|
+
#
|
|
81
|
+
# @return [Array<String>]
|
|
82
|
+
on :steps do |state:, **|
|
|
83
|
+
state[:chain].map { |s| s[:name] }
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Clear results and context from the last run (chain is preserved).
|
|
87
|
+
on :reset do |state:, **|
|
|
88
|
+
state.merge(results: [], context: {})
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def run_chain(state, payload)
|
|
94
|
+
input = payload.fetch(:input)
|
|
95
|
+
context = payload.fetch(:context, state[:context])
|
|
96
|
+
stop_on_error = payload.fetch(:stop_on_error, true)
|
|
97
|
+
results = []
|
|
98
|
+
current = input
|
|
99
|
+
|
|
100
|
+
state[:chain].each do |step|
|
|
101
|
+
output, status = invoke_step(step[:callable], current, context, results)
|
|
102
|
+
|
|
103
|
+
results << StepResult.new(
|
|
104
|
+
name: step[:name],
|
|
105
|
+
input: current,
|
|
106
|
+
output: output,
|
|
107
|
+
status: status
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
break if status == :error && stop_on_error
|
|
111
|
+
|
|
112
|
+
current = output
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
state.merge(results: results, context: context)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# @return [[output, status]]
|
|
119
|
+
def invoke_step(callable, input, context, results)
|
|
120
|
+
output = callable.call(input: input, context: context, results: results)
|
|
121
|
+
[output, :ok]
|
|
122
|
+
rescue StandardError => e
|
|
123
|
+
[e.message, :error]
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Igniter
|
|
4
|
+
module Agents
|
|
5
|
+
# Evaluates output quality and optionally retries generation until a score
|
|
6
|
+
# threshold is met.
|
|
7
|
+
#
|
|
8
|
+
# Two evaluation modes:
|
|
9
|
+
# * **Rule-based** (default) — heuristics based on length and emptiness;
|
|
10
|
+
# no external dependencies.
|
|
11
|
+
# * **LLM-assisted** — delegates to any callable that accepts +output:+ and
|
|
12
|
+
# +criteria:+ and returns +{ score: Float, feedback: String }+ or a
|
|
13
|
+
# String that will be parsed for a numeric score.
|
|
14
|
+
#
|
|
15
|
+
# Scores are on a 0–10 scale. The default passing threshold is 7.0.
|
|
16
|
+
#
|
|
17
|
+
# @example Rule-based gate
|
|
18
|
+
# ref = CriticAgent.start
|
|
19
|
+
# ref.send(:evaluate, output: "Short answer", criteria: "completeness")
|
|
20
|
+
# ev = ref.call(:last_evaluation)
|
|
21
|
+
# puts ev.passed # => false (too short)
|
|
22
|
+
#
|
|
23
|
+
# @example LLM-assisted with retry
|
|
24
|
+
# evaluator = ->(output:, criteria:) {
|
|
25
|
+
# result = MyGraderSkill.call(output: output, criteria: criteria)
|
|
26
|
+
# { score: result.score, feedback: result.feedback }
|
|
27
|
+
# }
|
|
28
|
+
# ref = CriticAgent.start(initial_state: { evaluator: evaluator, threshold: 8.0 })
|
|
29
|
+
# ref.send(:evaluate_and_retry,
|
|
30
|
+
# output: first_draft,
|
|
31
|
+
# criteria: "accuracy, completeness",
|
|
32
|
+
# max_retries: 2,
|
|
33
|
+
# generator: ->(draft:) { improve_draft(draft) },
|
|
34
|
+
# generator_args: { draft: first_draft }
|
|
35
|
+
# )
|
|
36
|
+
class CriticAgent < Igniter::Agent
|
|
37
|
+
# Immutable evaluation result.
|
|
38
|
+
Evaluation = Struct.new(:score, :feedback, :passed, :criteria, keyword_init: true)
|
|
39
|
+
|
|
40
|
+
initial_state evaluator: nil, threshold: 7.0, evaluations: []
|
|
41
|
+
|
|
42
|
+
# Evaluate a single output.
|
|
43
|
+
#
|
|
44
|
+
# Payload keys:
|
|
45
|
+
# output [String, Object] — required; the artifact to evaluate
|
|
46
|
+
# criteria [String] — evaluation criteria (default: "quality, relevance")
|
|
47
|
+
# evaluator [#call, nil] — override state evaluator for this call
|
|
48
|
+
# threshold [Float, nil] — override state threshold for this call
|
|
49
|
+
on :evaluate do |state:, payload:|
|
|
50
|
+
agent = new
|
|
51
|
+
ev = agent.send(:run_evaluation, payload, state)
|
|
52
|
+
state.merge(evaluations: state[:evaluations] + [ev])
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Evaluate and re-generate until the score passes or retries are exhausted.
|
|
56
|
+
#
|
|
57
|
+
# Payload keys:
|
|
58
|
+
# output [String] — initial output to evaluate
|
|
59
|
+
# criteria [String] — evaluation criteria
|
|
60
|
+
# generator [#call] — required; called with **generator_args to produce a new output
|
|
61
|
+
# generator_args [Hash] — arguments forwarded to generator (default: {})
|
|
62
|
+
# max_retries [Integer] — maximum re-generation attempts (default: 3)
|
|
63
|
+
# evaluator [#call] — override state evaluator
|
|
64
|
+
# threshold [Float] — override state threshold
|
|
65
|
+
on :evaluate_and_retry do |state:, payload:|
|
|
66
|
+
agent = new
|
|
67
|
+
agent.send(:run_evaluate_and_retry, state, payload)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Set default evaluator and/or threshold.
|
|
71
|
+
#
|
|
72
|
+
# Payload keys:
|
|
73
|
+
# evaluator [#call] — new default evaluator
|
|
74
|
+
# threshold [Float] — new default threshold
|
|
75
|
+
on :configure do |state:, payload:|
|
|
76
|
+
state.merge(
|
|
77
|
+
evaluator: payload.fetch(:evaluator, state[:evaluator]),
|
|
78
|
+
threshold: payload.fetch(:threshold, state[:threshold]).to_f
|
|
79
|
+
)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Sync query — returns the most recent Evaluation, or nil.
|
|
83
|
+
on :last_evaluation do |state:, **|
|
|
84
|
+
state[:evaluations].last
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Sync query — returns all recorded Evaluation structs.
|
|
88
|
+
on :evaluations do |state:, **|
|
|
89
|
+
state[:evaluations]
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Clear evaluation history.
|
|
93
|
+
on :clear do |state:, **|
|
|
94
|
+
state.merge(evaluations: [])
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
# @return [Evaluation]
|
|
100
|
+
def run_evaluation(payload, state)
|
|
101
|
+
output = payload.fetch(:output)
|
|
102
|
+
criteria = payload.fetch(:criteria, "quality, relevance")
|
|
103
|
+
evaluator = payload.fetch(:evaluator, state[:evaluator])
|
|
104
|
+
threshold = payload.fetch(:threshold, state[:threshold]).to_f
|
|
105
|
+
|
|
106
|
+
score, feedback = evaluator ? llm_score(evaluator, output, criteria)
|
|
107
|
+
: rule_score(output)
|
|
108
|
+
|
|
109
|
+
Evaluation.new(
|
|
110
|
+
score: score.to_f,
|
|
111
|
+
feedback: feedback.to_s,
|
|
112
|
+
passed: score.to_f >= threshold,
|
|
113
|
+
criteria: criteria
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# @return [Hash] updated state with all evaluations recorded
|
|
118
|
+
def run_evaluate_and_retry(state, payload)
|
|
119
|
+
max_retries = payload.fetch(:max_retries, 3)
|
|
120
|
+
generator = payload.fetch(:generator)
|
|
121
|
+
generator_args = payload.fetch(:generator_args, {})
|
|
122
|
+
output = payload.fetch(:output)
|
|
123
|
+
all_evals = []
|
|
124
|
+
|
|
125
|
+
(max_retries + 1).times do |attempt|
|
|
126
|
+
output = generator.call(**generator_args) if attempt.positive?
|
|
127
|
+
ev = run_evaluation(payload.merge(output: output), state)
|
|
128
|
+
all_evals << ev
|
|
129
|
+
break if ev.passed
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
state.merge(evaluations: state[:evaluations] + all_evals)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Call the user-supplied evaluator. Accepts two return shapes:
|
|
136
|
+
# Hash with :score / :feedback keys
|
|
137
|
+
# String (we scan for the first number in the text as the score)
|
|
138
|
+
def llm_score(evaluator, output, criteria)
|
|
139
|
+
result = evaluator.call(output: output, criteria: criteria)
|
|
140
|
+
case result
|
|
141
|
+
when Hash
|
|
142
|
+
[result.fetch(:score, 5.0), result.fetch(:feedback, "")]
|
|
143
|
+
else
|
|
144
|
+
text = result.to_s
|
|
145
|
+
score = text.match(/\b(\d+(?:\.\d+)?)\b/)&.captures&.first&.to_f || 5.0
|
|
146
|
+
[score, text]
|
|
147
|
+
end
|
|
148
|
+
rescue StandardError => e
|
|
149
|
+
[0.0, "Evaluator error: #{e.message}"]
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Minimal rule-based heuristic (no LLM required).
|
|
153
|
+
def rule_score(output)
|
|
154
|
+
text = output.to_s.strip
|
|
155
|
+
return [0.0, "Output is empty"] if text.empty?
|
|
156
|
+
return [3.0, "Output is very short"] if text.length < 50
|
|
157
|
+
return [5.5, "Output is below average length"] if text.length < 200
|
|
158
|
+
|
|
159
|
+
[7.5, "Output meets basic length criteria"]
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Igniter
|
|
4
|
+
module Agents
|
|
5
|
+
# Tracks performance metrics for named subjects (agents, services,
|
|
6
|
+
# contracts), computes weighted aggregate scores, and compares subjects
|
|
7
|
+
# against each other or against stored baselines.
|
|
8
|
+
#
|
|
9
|
+
# Scoring:
|
|
10
|
+
# * Metrics are grouped by name; the last 20 readings are averaged.
|
|
11
|
+
# * Per-subject or global weights scale each metric's contribution.
|
|
12
|
+
# * When a baseline is set the score is normalised to 0–100 relative to it;
|
|
13
|
+
# without a baseline the raw weighted average is returned.
|
|
14
|
+
# * Grades: A ≥ 90, B ≥ 75, C ≥ 60, D otherwise.
|
|
15
|
+
#
|
|
16
|
+
# @example Track two services and compare
|
|
17
|
+
# ref = EvaluatorAgent.start
|
|
18
|
+
# ref.send(:record_metric, subject: :api, name: :throughput, value: 850)
|
|
19
|
+
# ref.send(:record_metric, subject: :api, name: :error_rate, value: 2.1)
|
|
20
|
+
# ref.send(:set_baseline, subject: :api, baseline: 800)
|
|
21
|
+
# ref.send(:evaluate, subject: :api)
|
|
22
|
+
# ev = ref.call(:evaluations, subject: :api).last
|
|
23
|
+
# puts ev.grade # => "A"
|
|
24
|
+
class EvaluatorAgent < Igniter::Agent
|
|
25
|
+
MetricRecord = Struct.new(:name, :value, :recorded_at, keyword_init: true)
|
|
26
|
+
Evaluation = Struct.new(:subject, :score, :grade, :metrics,
|
|
27
|
+
:recorded_at, keyword_init: true)
|
|
28
|
+
Comparison = Struct.new(:subject_a, :subject_b, :winner, :delta,
|
|
29
|
+
keyword_init: true)
|
|
30
|
+
|
|
31
|
+
GRADES = [
|
|
32
|
+
[90.0, "A"],
|
|
33
|
+
[75.0, "B"],
|
|
34
|
+
[60.0, "C"],
|
|
35
|
+
[ 0.0, "D"]
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# subjects: Hash<String, { metrics: Array<MetricRecord>, baseline: Float?, weights: Hash }>
|
|
39
|
+
initial_state \
|
|
40
|
+
subjects: {},
|
|
41
|
+
evaluations: [],
|
|
42
|
+
weights: {}
|
|
43
|
+
|
|
44
|
+
# Record a metric reading for a subject.
|
|
45
|
+
#
|
|
46
|
+
# Payload keys:
|
|
47
|
+
# subject [String, Symbol] — subject identifier
|
|
48
|
+
# name [String, Symbol] — metric name
|
|
49
|
+
# value [Numeric] — metric value
|
|
50
|
+
on :record_metric do |state:, payload:|
|
|
51
|
+
subject = payload.fetch(:subject).to_s
|
|
52
|
+
metric = MetricRecord.new(
|
|
53
|
+
name: payload.fetch(:name).to_s,
|
|
54
|
+
value: payload.fetch(:value).to_f,
|
|
55
|
+
recorded_at: Time.now
|
|
56
|
+
)
|
|
57
|
+
entry = state[:subjects].fetch(subject, { metrics: [], baseline: nil, weights: {} })
|
|
58
|
+
updated = entry.merge(metrics: (entry[:metrics] + [metric]).last(200))
|
|
59
|
+
state.merge(subjects: state[:subjects].merge(subject => updated))
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Set the reference baseline value for a subject.
|
|
63
|
+
# Scores will be expressed as a percentage of this baseline.
|
|
64
|
+
#
|
|
65
|
+
# Payload keys:
|
|
66
|
+
# subject [String, Symbol]
|
|
67
|
+
# baseline [Numeric]
|
|
68
|
+
on :set_baseline do |state:, payload:|
|
|
69
|
+
subject = payload.fetch(:subject).to_s
|
|
70
|
+
baseline = payload.fetch(:baseline).to_f
|
|
71
|
+
entry = state[:subjects].fetch(subject, { metrics: [], baseline: nil, weights: {} })
|
|
72
|
+
updated = entry.merge(baseline: baseline)
|
|
73
|
+
state.merge(subjects: state[:subjects].merge(subject => updated))
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Set per-metric weights for a subject.
|
|
77
|
+
#
|
|
78
|
+
# Payload keys:
|
|
79
|
+
# subject [String, Symbol]
|
|
80
|
+
# weights [Hash<String, Numeric>] — metric name → weight
|
|
81
|
+
on :set_weights do |state:, payload:|
|
|
82
|
+
subject = payload.fetch(:subject).to_s
|
|
83
|
+
weights = payload.fetch(:weights).transform_keys(&:to_s)
|
|
84
|
+
entry = state[:subjects].fetch(subject, { metrics: [], baseline: nil, weights: {} })
|
|
85
|
+
updated = entry.merge(weights: weights)
|
|
86
|
+
state.merge(subjects: state[:subjects].merge(subject => updated))
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Compute and store an Evaluation for a subject.
|
|
90
|
+
#
|
|
91
|
+
# Payload keys:
|
|
92
|
+
# subject [String, Symbol]
|
|
93
|
+
on :evaluate do |state:, payload:|
|
|
94
|
+
agent = new
|
|
95
|
+
ev = agent.send(:compute_evaluation, payload.fetch(:subject).to_s, state)
|
|
96
|
+
next state unless ev
|
|
97
|
+
|
|
98
|
+
state.merge(evaluations: state[:evaluations] + [ev])
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Sync query — compare the most recent evaluations of two subjects.
|
|
102
|
+
#
|
|
103
|
+
# Payload keys:
|
|
104
|
+
# a [String, Symbol] — first subject
|
|
105
|
+
# b [String, Symbol] — second subject
|
|
106
|
+
#
|
|
107
|
+
# @return [Comparison, nil]
|
|
108
|
+
on :compare do |state:, payload:|
|
|
109
|
+
a = state[:evaluations].select { |e| e.subject == payload.fetch(:a).to_s }.last
|
|
110
|
+
b = state[:evaluations].select { |e| e.subject == payload.fetch(:b).to_s }.last
|
|
111
|
+
next nil unless a && b
|
|
112
|
+
|
|
113
|
+
delta = (a.score - b.score).round(4)
|
|
114
|
+
winner = if delta > 0 then a.subject
|
|
115
|
+
elsif delta < 0 then b.subject
|
|
116
|
+
else :tie
|
|
117
|
+
end
|
|
118
|
+
Comparison.new(subject_a: a.subject, subject_b: b.subject,
|
|
119
|
+
winner: winner, delta: delta.abs)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Sync query — all evaluations, optionally filtered by subject.
|
|
123
|
+
#
|
|
124
|
+
# Payload keys:
|
|
125
|
+
# subject [String, Symbol, nil]
|
|
126
|
+
#
|
|
127
|
+
# @return [Array<Evaluation>]
|
|
128
|
+
on :evaluations do |state:, payload:|
|
|
129
|
+
filter = payload&.fetch(:subject, nil)
|
|
130
|
+
evs = state[:evaluations]
|
|
131
|
+
evs = evs.select { |e| e.subject == filter.to_s } if filter
|
|
132
|
+
evs.dup
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Sync query — list registered subject names.
|
|
136
|
+
#
|
|
137
|
+
# @return [Array<String>]
|
|
138
|
+
on :subjects do |state:, **|
|
|
139
|
+
state[:subjects].keys
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Set global default weights (applied when a subject has no per-metric weight).
|
|
143
|
+
#
|
|
144
|
+
# Payload keys:
|
|
145
|
+
# weights [Hash<String, Numeric>]
|
|
146
|
+
on :configure do |state:, payload:|
|
|
147
|
+
state.merge(payload.slice(:weights).compact)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Clear all subjects and evaluations.
|
|
151
|
+
on :reset do |state:, **|
|
|
152
|
+
state.merge(subjects: {}, evaluations: [])
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
def compute_evaluation(name, state)
|
|
158
|
+
data = state[:subjects][name]
|
|
159
|
+
return nil unless data && data[:metrics].any?
|
|
160
|
+
|
|
161
|
+
weights = state[:weights].merge(data[:weights] || {})
|
|
162
|
+
metrics = data[:metrics]
|
|
163
|
+
baseline = data[:baseline]
|
|
164
|
+
|
|
165
|
+
grouped = metrics.group_by(&:name)
|
|
166
|
+
score_parts = grouped.map do |mname, records|
|
|
167
|
+
avg = records.last(20).sum(&:value) / [records.last(20).size, 1].max
|
|
168
|
+
weight = (weights[mname] || 1.0).to_f
|
|
169
|
+
[avg * weight, weight]
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
total_weight = [score_parts.sum { |_, w| w }, 0.001].max
|
|
173
|
+
raw = score_parts.sum { |v, _| v } / total_weight
|
|
174
|
+
|
|
175
|
+
score = if baseline && baseline > 0
|
|
176
|
+
[(raw / baseline * 100).round(4), 100.0].min
|
|
177
|
+
else
|
|
178
|
+
raw.round(4)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
grade = GRADES.find { |threshold, _| score.to_f >= threshold }&.last || "D"
|
|
182
|
+
|
|
183
|
+
Evaluation.new(
|
|
184
|
+
subject: name,
|
|
185
|
+
score: score,
|
|
186
|
+
grade: grade,
|
|
187
|
+
metrics: grouped.keys,
|
|
188
|
+
recorded_at: Time.now
|
|
189
|
+
)
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|