igniter 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +217 -0
- data/docs/APPLICATION_V1.md +253 -0
- data/docs/CAPABILITIES_V1.md +207 -0
- data/docs/CONSENSUS_V1.md +477 -0
- data/docs/CONTENT_ADDRESSING_V1.md +221 -0
- data/docs/DATAFLOW_V1.md +274 -0
- data/docs/MESH_V1.md +732 -0
- data/docs/NODE_CACHE_V1.md +324 -0
- data/docs/PROACTIVE_AGENTS_V1.md +293 -0
- data/docs/SERVER_V1.md +200 -1
- data/docs/SKILLS_V1.md +213 -0
- data/docs/STORE_ADAPTERS.md +41 -13
- data/docs/TEMPORAL_V1.md +174 -0
- data/docs/TOOLS_V1.md +347 -0
- data/docs/TRANSCRIPTION_V1.md +403 -0
- data/examples/README.md +37 -0
- data/examples/consensus.rb +239 -0
- data/examples/dataflow.rb +308 -0
- data/examples/elocal_webhook.rb +1 -0
- data/examples/incremental.rb +142 -0
- data/examples/llm_tools.rb +237 -0
- data/examples/mesh.rb +239 -0
- data/examples/mesh_discovery.rb +267 -0
- data/examples/mesh_gossip.rb +162 -0
- data/examples/ringcentral_routing.rb +1 -1
- data/lib/igniter/agents/ai/alert_agent.rb +111 -0
- data/lib/igniter/agents/ai/chain_agent.rb +127 -0
- data/lib/igniter/agents/ai/critic_agent.rb +163 -0
- data/lib/igniter/agents/ai/evaluator_agent.rb +193 -0
- data/lib/igniter/agents/ai/evolution_agent.rb +286 -0
- data/lib/igniter/agents/ai/health_check_agent.rb +122 -0
- data/lib/igniter/agents/ai/observer_agent.rb +184 -0
- data/lib/igniter/agents/ai/planner_agent.rb +210 -0
- data/lib/igniter/agents/ai/router_agent.rb +131 -0
- data/lib/igniter/agents/ai/self_reflection_agent.rb +175 -0
- data/lib/igniter/agents/observability/metrics_agent.rb +130 -0
- data/lib/igniter/agents/pipeline/batch_processor_agent.rb +131 -0
- data/lib/igniter/agents/proactive_agent.rb +208 -0
- data/lib/igniter/agents/reliability/retry_agent.rb +99 -0
- data/lib/igniter/agents/scheduling/cron_agent.rb +110 -0
- data/lib/igniter/agents.rb +56 -0
- data/lib/igniter/application/app_config.rb +32 -0
- data/lib/igniter/application/autoloader.rb +18 -0
- data/lib/igniter/application/generator.rb +157 -0
- data/lib/igniter/application/scheduler.rb +109 -0
- data/lib/igniter/application/yml_loader.rb +39 -0
- data/lib/igniter/application.rb +174 -0
- data/lib/igniter/capabilities.rb +68 -0
- data/lib/igniter/compiler/validators/dependencies_validator.rb +50 -2
- data/lib/igniter/compiler/validators/remote_validator.rb +2 -0
- data/lib/igniter/consensus/cluster.rb +183 -0
- data/lib/igniter/consensus/errors.rb +14 -0
- data/lib/igniter/consensus/executors.rb +43 -0
- data/lib/igniter/consensus/node.rb +320 -0
- data/lib/igniter/consensus/read_query.rb +30 -0
- data/lib/igniter/consensus/state_machine.rb +58 -0
- data/lib/igniter/consensus.rb +58 -0
- data/lib/igniter/content_addressing.rb +133 -0
- data/lib/igniter/contract.rb +12 -0
- data/lib/igniter/dataflow/aggregate_operators.rb +147 -0
- data/lib/igniter/dataflow/aggregate_state.rb +77 -0
- data/lib/igniter/dataflow/diff.rb +37 -0
- data/lib/igniter/dataflow/diff_state.rb +81 -0
- data/lib/igniter/dataflow/incremental_collection_result.rb +39 -0
- data/lib/igniter/dataflow/window_filter.rb +48 -0
- data/lib/igniter/dataflow.rb +65 -0
- data/lib/igniter/dsl/contract_builder.rb +71 -7
- data/lib/igniter/executor.rb +60 -0
- data/lib/igniter/extensions/capabilities.rb +39 -0
- data/lib/igniter/extensions/content_addressing.rb +5 -0
- data/lib/igniter/extensions/dataflow.rb +117 -0
- data/lib/igniter/extensions/incremental.rb +50 -0
- data/lib/igniter/extensions/mesh.rb +31 -0
- data/lib/igniter/fingerprint.rb +43 -0
- data/lib/igniter/incremental/formatter.rb +81 -0
- data/lib/igniter/incremental/result.rb +69 -0
- data/lib/igniter/incremental/tracker.rb +108 -0
- data/lib/igniter/incremental.rb +50 -0
- data/lib/igniter/integrations/llm/config.rb +48 -4
- data/lib/igniter/integrations/llm/executor.rb +221 -28
- data/lib/igniter/integrations/llm/providers/anthropic.rb +37 -4
- data/lib/igniter/integrations/llm/providers/openai.rb +34 -5
- data/lib/igniter/integrations/llm/transcription/providers/assemblyai.rb +200 -0
- data/lib/igniter/integrations/llm/transcription/providers/base.rb +122 -0
- data/lib/igniter/integrations/llm/transcription/providers/deepgram.rb +162 -0
- data/lib/igniter/integrations/llm/transcription/providers/openai.rb +102 -0
- data/lib/igniter/integrations/llm/transcription/transcriber.rb +145 -0
- data/lib/igniter/integrations/llm/transcription/transcript_result.rb +29 -0
- data/lib/igniter/integrations/llm.rb +37 -1
- data/lib/igniter/memory/agent_memory.rb +104 -0
- data/lib/igniter/memory/episode.rb +29 -0
- data/lib/igniter/memory/fact.rb +27 -0
- data/lib/igniter/memory/memorable.rb +90 -0
- data/lib/igniter/memory/reflection_cycle.rb +96 -0
- data/lib/igniter/memory/reflection_record.rb +28 -0
- data/lib/igniter/memory/store.rb +115 -0
- data/lib/igniter/memory/stores/in_memory.rb +136 -0
- data/lib/igniter/memory/stores/sqlite.rb +284 -0
- data/lib/igniter/memory.rb +80 -0
- data/lib/igniter/mesh/announcer.rb +55 -0
- data/lib/igniter/mesh/config.rb +45 -0
- data/lib/igniter/mesh/discovery.rb +39 -0
- data/lib/igniter/mesh/errors.rb +31 -0
- data/lib/igniter/mesh/gossip.rb +47 -0
- data/lib/igniter/mesh/peer.rb +21 -0
- data/lib/igniter/mesh/peer_registry.rb +51 -0
- data/lib/igniter/mesh/poller.rb +77 -0
- data/lib/igniter/mesh/router.rb +109 -0
- data/lib/igniter/mesh.rb +85 -0
- data/lib/igniter/metrics/collector.rb +131 -0
- data/lib/igniter/metrics/prometheus_exporter.rb +104 -0
- data/lib/igniter/metrics/snapshot.rb +8 -0
- data/lib/igniter/metrics.rb +37 -0
- data/lib/igniter/model/aggregate_node.rb +34 -0
- data/lib/igniter/model/collection_node.rb +3 -2
- data/lib/igniter/model/compute_node.rb +13 -0
- data/lib/igniter/model/remote_node.rb +18 -2
- data/lib/igniter/node_cache.rb +231 -0
- data/lib/igniter/replication/bootstrapper.rb +61 -0
- data/lib/igniter/replication/bootstrappers/gem.rb +32 -0
- data/lib/igniter/replication/bootstrappers/git.rb +39 -0
- data/lib/igniter/replication/bootstrappers/tarball.rb +56 -0
- data/lib/igniter/replication/expansion_plan.rb +38 -0
- data/lib/igniter/replication/expansion_planner.rb +142 -0
- data/lib/igniter/replication/manifest.rb +45 -0
- data/lib/igniter/replication/network_topology.rb +123 -0
- data/lib/igniter/replication/node_role.rb +42 -0
- data/lib/igniter/replication/reflective_replication_agent.rb +238 -0
- data/lib/igniter/replication/replication_agent.rb +87 -0
- data/lib/igniter/replication/role_registry.rb +73 -0
- data/lib/igniter/replication/ssh_session.rb +77 -0
- data/lib/igniter/replication.rb +54 -0
- data/lib/igniter/runtime/cache.rb +35 -6
- data/lib/igniter/runtime/execution.rb +26 -2
- data/lib/igniter/runtime/input_validator.rb +6 -2
- data/lib/igniter/runtime/node_state.rb +7 -2
- data/lib/igniter/runtime/resolver.rb +323 -31
- data/lib/igniter/runtime/stores/redis_store.rb +41 -4
- data/lib/igniter/server/client.rb +44 -1
- data/lib/igniter/server/config.rb +13 -6
- data/lib/igniter/server/handlers/event_handler.rb +4 -0
- data/lib/igniter/server/handlers/execute_handler.rb +6 -0
- data/lib/igniter/server/handlers/liveness_handler.rb +20 -0
- data/lib/igniter/server/handlers/manifest_handler.rb +34 -0
- data/lib/igniter/server/handlers/metrics_handler.rb +51 -0
- data/lib/igniter/server/handlers/peers_handler.rb +115 -0
- data/lib/igniter/server/handlers/readiness_handler.rb +47 -0
- data/lib/igniter/server/http_server.rb +54 -17
- data/lib/igniter/server/router.rb +54 -21
- data/lib/igniter/server/server_logger.rb +52 -0
- data/lib/igniter/server.rb +6 -0
- data/lib/igniter/skill/feedback.rb +116 -0
- data/lib/igniter/skill/output_schema.rb +110 -0
- data/lib/igniter/skill.rb +218 -0
- data/lib/igniter/temporal.rb +84 -0
- data/lib/igniter/tool/discoverable.rb +151 -0
- data/lib/igniter/tool.rb +52 -0
- data/lib/igniter/tool_registry.rb +144 -0
- data/lib/igniter/version.rb +1 -1
- data/lib/igniter.rb +17 -0
- metadata +128 -1
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Igniter
|
|
4
|
+
module Replication
|
|
5
|
+
# Immutable value object returned by ExpansionPlanner.
|
|
6
|
+
#
|
|
7
|
+
# Carries an ordered list of actions the replication system should execute
|
|
8
|
+
# and a human-readable rationale explaining why each action was chosen.
|
|
9
|
+
#
|
|
10
|
+
# Recognised action hashes:
|
|
11
|
+
# { action: :replicate_role, role: :worker, host: "10.0.0.2" }
|
|
12
|
+
# { action: :retire_node, node_id: "abc", host: "10.0.0.1" }
|
|
13
|
+
# { action: :no_op }
|
|
14
|
+
class ExpansionPlan
|
|
15
|
+
attr_reader :actions, :rationale
|
|
16
|
+
|
|
17
|
+
# @param actions [Array<Hash>]
|
|
18
|
+
# @param rationale [String, nil]
|
|
19
|
+
def initialize(actions:, rationale: nil)
|
|
20
|
+
@actions = Array(actions).freeze
|
|
21
|
+
@rationale = rationale
|
|
22
|
+
freeze
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# True when there is nothing to do.
|
|
26
|
+
#
|
|
27
|
+
# @return [Boolean]
|
|
28
|
+
def no_op?
|
|
29
|
+
@actions.all? { |a| a[:action] == :no_op }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# @return [Hash]
|
|
33
|
+
def to_h
|
|
34
|
+
{ actions: @actions, rationale: @rationale }
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Igniter
|
|
4
|
+
module Replication
|
|
5
|
+
# Analyses network topology + episodic memory to produce an ExpansionPlan.
|
|
6
|
+
#
|
|
7
|
+
# Two modes of operation:
|
|
8
|
+
# * **Rule-based** (default) — applies fixed heuristics, no LLM required.
|
|
9
|
+
# * **LLM-assisted** — delegates to an LLM executor for intent-rich reasoning.
|
|
10
|
+
#
|
|
11
|
+
# Rule-based heuristics (applied in order):
|
|
12
|
+
# 1. Retire nodes marked unhealthy in the topology.
|
|
13
|
+
# 2. Spawn a new node for each required role absent from the topology.
|
|
14
|
+
# 3. Annotate the rationale when recent replication failures exceed the threshold.
|
|
15
|
+
# 4. Honour :scale_signal episodes (content: "scale_out:<role>").
|
|
16
|
+
#
|
|
17
|
+
# LLM mode receives topology snapshot + recent episodes and expects the
|
|
18
|
+
# executor to return { actions: [...], rationale: "..." }.
|
|
19
|
+
#
|
|
20
|
+
# @example Rule-based
|
|
21
|
+
# topology = NetworkTopology.new
|
|
22
|
+
# topology.register(node_id: "x", host: "10.0.0.1", role: :worker)
|
|
23
|
+
#
|
|
24
|
+
# planner = ExpansionPlanner.new(
|
|
25
|
+
# topology: topology,
|
|
26
|
+
# required_roles: [:worker, :coordinator],
|
|
27
|
+
# host_pool: ["10.0.0.2"]
|
|
28
|
+
# )
|
|
29
|
+
# plan = planner.plan
|
|
30
|
+
# # plan.actions => [{ action: :replicate_role, role: :coordinator, host: "10.0.0.2" }]
|
|
31
|
+
class ExpansionPlanner
|
|
32
|
+
DEFAULT_FAILURE_THRESHOLD = 3
|
|
33
|
+
|
|
34
|
+
# @param topology [NetworkTopology] current node topology
|
|
35
|
+
# @param memory [AgentMemory, nil] episodic memory (optional)
|
|
36
|
+
# @param required_roles [Array<Symbol>] roles that must always be present
|
|
37
|
+
# @param failure_threshold [Integer] replication failures before warning
|
|
38
|
+
# @param host_pool [Array<String>] candidate hosts for new nodes
|
|
39
|
+
# @param llm [#call, nil] optional LLM executor
|
|
40
|
+
def initialize(topology:, memory: nil, required_roles: [],
|
|
41
|
+
failure_threshold: DEFAULT_FAILURE_THRESHOLD,
|
|
42
|
+
host_pool: [], llm: nil)
|
|
43
|
+
@topology = topology
|
|
44
|
+
@memory = memory
|
|
45
|
+
@required_roles = Array(required_roles).map(&:to_sym)
|
|
46
|
+
@failure_threshold = failure_threshold
|
|
47
|
+
@host_pool = host_pool.dup
|
|
48
|
+
@llm = llm
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Produce an ExpansionPlan.
|
|
52
|
+
#
|
|
53
|
+
# @return [ExpansionPlan]
|
|
54
|
+
def plan
|
|
55
|
+
@llm ? smart_plan : rule_based_plan
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def rule_based_plan
|
|
61
|
+
actions = []
|
|
62
|
+
rationale = []
|
|
63
|
+
|
|
64
|
+
retire_unhealthy(actions, rationale)
|
|
65
|
+
ensure_required_roles(actions, rationale)
|
|
66
|
+
check_failure_signal(rationale)
|
|
67
|
+
apply_scale_signals(actions, rationale)
|
|
68
|
+
|
|
69
|
+
actions << { action: :no_op } if actions.empty?
|
|
70
|
+
ExpansionPlan.new(actions: actions, rationale: rationale.join("; "))
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def retire_unhealthy(actions, rationale)
|
|
74
|
+
@topology.nodes.reject(&:healthy).each do |node|
|
|
75
|
+
actions << { action: :retire_node, node_id: node.node_id, host: node.host }
|
|
76
|
+
rationale << "node #{node.node_id} (#{node.host}) is unhealthy"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def ensure_required_roles(actions, rationale)
|
|
81
|
+
@required_roles.each do |role|
|
|
82
|
+
next unless @topology.needs_role?(role)
|
|
83
|
+
|
|
84
|
+
host = next_available_host
|
|
85
|
+
if host
|
|
86
|
+
actions << { action: :replicate_role, role: role, host: host }
|
|
87
|
+
rationale << "role :#{role} absent; targeting #{host}"
|
|
88
|
+
else
|
|
89
|
+
rationale << "role :#{role} absent but no available host in pool"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def check_failure_signal(rationale)
|
|
95
|
+
return unless @memory
|
|
96
|
+
|
|
97
|
+
failures = @memory.recent(last: 20, type: :replication_event)
|
|
98
|
+
.count { |e| e.outcome == "failure" }
|
|
99
|
+
if failures >= @failure_threshold
|
|
100
|
+
rationale << "#{failures} recent replication failures — check SSH credentials"
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def apply_scale_signals(actions, rationale)
|
|
105
|
+
return unless @memory
|
|
106
|
+
|
|
107
|
+
@memory.recent(last: 10, type: :scale_signal).each do |ep|
|
|
108
|
+
m = ep.content.to_s.match(/\Ascale_out:(\w+)\z/)
|
|
109
|
+
next unless m
|
|
110
|
+
|
|
111
|
+
role = m[1].to_sym
|
|
112
|
+
host = next_available_host
|
|
113
|
+
if host
|
|
114
|
+
actions << { action: :replicate_role, role: role, host: host }
|
|
115
|
+
rationale << "scale_signal requests :#{role} at #{host}"
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def smart_plan
|
|
121
|
+
episodes = @memory&.recent(last: 50) || []
|
|
122
|
+
result = @llm.call(
|
|
123
|
+
topology: @topology.nodes.map { |n|
|
|
124
|
+
{ node_id: n.node_id, host: n.host,
|
|
125
|
+
role: n.role, healthy: n.healthy }
|
|
126
|
+
},
|
|
127
|
+
episodes: episodes.map { |e|
|
|
128
|
+
{ type: e.type, content: e.content, outcome: e.outcome }
|
|
129
|
+
},
|
|
130
|
+
required_roles: @required_roles,
|
|
131
|
+
host_pool: @host_pool
|
|
132
|
+
)
|
|
133
|
+
ExpansionPlan.new(actions: Array(result[:actions]), rationale: result[:rationale])
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def next_available_host
|
|
137
|
+
used = @topology.nodes.map(&:host).to_set
|
|
138
|
+
@host_pool.find { |h| !used.include?(h) }
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module Igniter
|
|
6
|
+
module Replication
|
|
7
|
+
# Self-description of a running Igniter instance, used during replication.
|
|
8
|
+
#
|
|
9
|
+
# Call Manifest.current to capture the current process's metadata.
|
|
10
|
+
class Manifest
|
|
11
|
+
attr_reader :gem_version, :ruby_version, :source_path,
|
|
12
|
+
:startup_command, :instance_id
|
|
13
|
+
|
|
14
|
+
def self.current
|
|
15
|
+
spec = defined?(Gem) && Gem.loaded_specs["igniter"]
|
|
16
|
+
new(
|
|
17
|
+
gem_version: Igniter::VERSION,
|
|
18
|
+
ruby_version: RUBY_VERSION,
|
|
19
|
+
source_path: spec&.gem_dir || File.expand_path("../../..", __dir__),
|
|
20
|
+
startup_command: $PROGRAM_NAME,
|
|
21
|
+
instance_id: SecureRandom.uuid
|
|
22
|
+
)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def initialize(gem_version:, ruby_version:, source_path:, startup_command:, instance_id:)
|
|
26
|
+
@gem_version = gem_version
|
|
27
|
+
@ruby_version = ruby_version
|
|
28
|
+
@source_path = source_path
|
|
29
|
+
@startup_command = startup_command
|
|
30
|
+
@instance_id = instance_id
|
|
31
|
+
freeze
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_h
|
|
35
|
+
{
|
|
36
|
+
gem_version: gem_version,
|
|
37
|
+
ruby_version: ruby_version,
|
|
38
|
+
source_path: source_path,
|
|
39
|
+
startup_command: startup_command,
|
|
40
|
+
instance_id: instance_id
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Igniter
|
|
4
|
+
module Replication
|
|
5
|
+
# Thread-safe in-memory registry of known nodes in the deployment network.
|
|
6
|
+
#
|
|
7
|
+
# Updated by ReflectiveReplicationAgent as nodes are spawned, heartbeat-ed,
|
|
8
|
+
# or removed. Can be shared across agent handler invocations via state.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# topology = NetworkTopology.new
|
|
12
|
+
# topology.register(node_id: "abc", host: "10.0.0.2", role: :worker)
|
|
13
|
+
# topology.nodes(role: :worker) # => [NodeEntry]
|
|
14
|
+
# topology.needs_role?(:coordinator) # => true
|
|
15
|
+
class NetworkTopology
|
|
16
|
+
# Mutable record for a single live node (mutated only inside the Mutex).
|
|
17
|
+
NodeEntry = Struct.new(:node_id, :host, :role,
|
|
18
|
+
:registered_at, :last_seen_at, :healthy,
|
|
19
|
+
keyword_init: true)
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@nodes = {}
|
|
23
|
+
@mutex = Mutex.new
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Register or overwrite a node entry.
|
|
27
|
+
#
|
|
28
|
+
# @param node_id [String]
|
|
29
|
+
# @param host [String]
|
|
30
|
+
# @param role [Symbol, nil]
|
|
31
|
+
# @return [NodeEntry]
|
|
32
|
+
def register(node_id:, host:, role: nil)
|
|
33
|
+
now = Time.now
|
|
34
|
+
entry = NodeEntry.new(
|
|
35
|
+
node_id: node_id,
|
|
36
|
+
host: host,
|
|
37
|
+
role: role&.to_sym,
|
|
38
|
+
registered_at: now,
|
|
39
|
+
last_seen_at: now,
|
|
40
|
+
healthy: true
|
|
41
|
+
)
|
|
42
|
+
@mutex.synchronize { @nodes[node_id] = entry }
|
|
43
|
+
entry
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Update last_seen_at for a known node (heartbeat).
|
|
47
|
+
#
|
|
48
|
+
# @param node_id [String]
|
|
49
|
+
# @return [Boolean] true if the node was found
|
|
50
|
+
def touch(node_id:)
|
|
51
|
+
@mutex.synchronize do
|
|
52
|
+
entry = @nodes[node_id]
|
|
53
|
+
return false unless entry
|
|
54
|
+
|
|
55
|
+
entry.last_seen_at = Time.now
|
|
56
|
+
true
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Mark a node as unhealthy (e.g. SSH unreachable).
|
|
61
|
+
#
|
|
62
|
+
# @param node_id [String]
|
|
63
|
+
# @return [Boolean] true if the node was found
|
|
64
|
+
def mark_unhealthy(node_id:)
|
|
65
|
+
@mutex.synchronize do
|
|
66
|
+
entry = @nodes[node_id]
|
|
67
|
+
return false unless entry
|
|
68
|
+
|
|
69
|
+
entry.healthy = false
|
|
70
|
+
true
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Remove a node from the topology.
|
|
75
|
+
#
|
|
76
|
+
# @param node_id [String]
|
|
77
|
+
# @return [NodeEntry, nil] the removed entry, or nil if not found
|
|
78
|
+
def remove(node_id:)
|
|
79
|
+
@mutex.synchronize { @nodes.delete(node_id) }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Return nodes, optionally filtered by role.
|
|
83
|
+
#
|
|
84
|
+
# @param role [Symbol, nil]
|
|
85
|
+
# @return [Array<NodeEntry>]
|
|
86
|
+
def nodes(role: nil)
|
|
87
|
+
@mutex.synchronize do
|
|
88
|
+
entries = @nodes.values.dup
|
|
89
|
+
role ? entries.select { |e| e.role == role.to_sym } : entries
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# True when no healthy node with the given role exists.
|
|
94
|
+
#
|
|
95
|
+
# @param role [Symbol, String]
|
|
96
|
+
# @return [Boolean]
|
|
97
|
+
def needs_role?(role)
|
|
98
|
+
nodes(role: role).none?(&:healthy)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Count of healthy nodes across all roles.
|
|
102
|
+
#
|
|
103
|
+
# @return [Integer]
|
|
104
|
+
def healthy_count
|
|
105
|
+
@mutex.synchronize { @nodes.values.count(&:healthy) }
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Total number of registered nodes.
|
|
109
|
+
#
|
|
110
|
+
# @return [Integer]
|
|
111
|
+
def size
|
|
112
|
+
@mutex.synchronize { @nodes.size }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# All registered node IDs.
|
|
116
|
+
#
|
|
117
|
+
# @return [Array<String>]
|
|
118
|
+
def node_ids
|
|
119
|
+
@mutex.synchronize { @nodes.keys.dup }
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Igniter
|
|
4
|
+
module Replication
|
|
5
|
+
# Immutable description of a specialised role a differentiated node can assume.
|
|
6
|
+
#
|
|
7
|
+
# When a node replicates with differentiation it carries a NodeRole that
|
|
8
|
+
# shapes its remote configuration: which contracts to activate, which env
|
|
9
|
+
# vars to inject, and which capability tags to advertise in the mesh.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# role = NodeRole.new(
|
|
13
|
+
# name: :worker,
|
|
14
|
+
# contracts: ["ComputeContract"],
|
|
15
|
+
# capabilities: [:compute],
|
|
16
|
+
# env_overrides: { "WORKER_POOL" => "8" },
|
|
17
|
+
# tags: [:cpu_heavy]
|
|
18
|
+
# )
|
|
19
|
+
class NodeRole
|
|
20
|
+
attr_reader :name, :contracts, :capabilities, :env_overrides, :tags
|
|
21
|
+
|
|
22
|
+
def initialize(name:, contracts: [], capabilities: [], env_overrides: {}, tags: [])
|
|
23
|
+
@name = name.to_sym
|
|
24
|
+
@contracts = Array(contracts).map(&:to_s).freeze
|
|
25
|
+
@capabilities = Array(capabilities).map(&:to_sym).freeze
|
|
26
|
+
@env_overrides = Hash(env_overrides).transform_keys(&:to_s).freeze
|
|
27
|
+
@tags = Array(tags).map(&:to_sym).freeze
|
|
28
|
+
freeze
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def to_h
|
|
32
|
+
{
|
|
33
|
+
name: @name,
|
|
34
|
+
contracts: @contracts,
|
|
35
|
+
capabilities: @capabilities,
|
|
36
|
+
env_overrides: @env_overrides,
|
|
37
|
+
tags: @tags
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
require_relative "replication_agent"
|
|
5
|
+
require_relative "network_topology"
|
|
6
|
+
require_relative "expansion_plan"
|
|
7
|
+
require_relative "expansion_planner"
|
|
8
|
+
require_relative "role_registry"
|
|
9
|
+
|
|
10
|
+
module Igniter
|
|
11
|
+
module Replication
|
|
12
|
+
# ReplicationAgent extended with episodic memory, self-reflection, and
|
|
13
|
+
# topology-aware network expansion.
|
|
14
|
+
#
|
|
15
|
+
# == Additional message types
|
|
16
|
+
#
|
|
17
|
+
# :assess_network — run ExpansionPlanner; execute replicate_role/retire_node actions
|
|
18
|
+
# :reflect — run a ReflectionCycle over recent episodes; store summary in state
|
|
19
|
+
# :register_node — register a remote node in the local NetworkTopology
|
|
20
|
+
# :node_heartbeat — update last_seen_at for a known node
|
|
21
|
+
# :signal_scale — emit a :scale_signal episode (e.g. from load monitors)
|
|
22
|
+
#
|
|
23
|
+
# == State keys (in addition to inherited :events)
|
|
24
|
+
#
|
|
25
|
+
# :topology — NetworkTopology instance (created lazily on first access)
|
|
26
|
+
# :host_pool — Array<String> of candidate hosts
|
|
27
|
+
# :required_roles — Array<Symbol> of roles that must always be present
|
|
28
|
+
# :last_plan — Hash from the most recent ExpansionPlan
|
|
29
|
+
# :last_reflection — String summary from the most recent reflection cycle
|
|
30
|
+
#
|
|
31
|
+
# == Memory
|
|
32
|
+
#
|
|
33
|
+
# Call +enable_class_memory+ in the class body to activate episodic memory.
|
|
34
|
+
# Memory is class-level (shared across handler invocations on this class).
|
|
35
|
+
#
|
|
36
|
+
# == Auto-assessment
|
|
37
|
+
#
|
|
38
|
+
# Call +auto_assess(every: N)+ to schedule periodic topology assessment.
|
|
39
|
+
#
|
|
40
|
+
# @example
|
|
41
|
+
# RoleRegistry.define(:worker, env_overrides: { "POOL" => "4" })
|
|
42
|
+
#
|
|
43
|
+
# class MyAgent < ReflectiveReplicationAgent
|
|
44
|
+
# enable_class_memory
|
|
45
|
+
# auto_assess every: 60
|
|
46
|
+
# end
|
|
47
|
+
#
|
|
48
|
+
# ref = MyAgent.start(initial_state: {
|
|
49
|
+
# topology: NetworkTopology.new,
|
|
50
|
+
# required_roles: [:worker],
|
|
51
|
+
# host_pool: ["10.0.0.2", "10.0.0.3"]
|
|
52
|
+
# })
|
|
53
|
+
# ref.call(:assess_network)
|
|
54
|
+
class ReflectiveReplicationAgent < ReplicationAgent
|
|
55
|
+
initial_state topology: nil, host_pool: [], required_roles: [],
|
|
56
|
+
last_plan: nil, last_reflection: nil
|
|
57
|
+
|
|
58
|
+
# ── Class-level memory ─────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
class << self
|
|
61
|
+
# Activate episodic memory for this class.
|
|
62
|
+
#
|
|
63
|
+
# @param store [Memory::Store, nil] backing store; defaults to global default
|
|
64
|
+
# @return [void]
|
|
65
|
+
def enable_class_memory(store: nil)
|
|
66
|
+
require "igniter/memory"
|
|
67
|
+
@class_memory_store = store || Igniter::Memory.default_store
|
|
68
|
+
@class_memory_enabled = true
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Returns true when class-level memory has been activated.
|
|
72
|
+
#
|
|
73
|
+
# @return [Boolean]
|
|
74
|
+
def class_memory_enabled?
|
|
75
|
+
@class_memory_enabled || false
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Returns the AgentMemory facade bound to this class, or nil when disabled.
|
|
79
|
+
#
|
|
80
|
+
# @return [Memory::AgentMemory, nil]
|
|
81
|
+
def class_memory
|
|
82
|
+
return nil unless class_memory_enabled?
|
|
83
|
+
|
|
84
|
+
@class_memory ||= Igniter::Memory::AgentMemory.new(
|
|
85
|
+
store: @class_memory_store,
|
|
86
|
+
agent_id: name.to_s
|
|
87
|
+
)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Reset class-level memory state. Intended for use in tests.
|
|
91
|
+
#
|
|
92
|
+
# @return [void]
|
|
93
|
+
def reset_class_memory!
|
|
94
|
+
@class_memory = nil
|
|
95
|
+
@class_memory_store = nil
|
|
96
|
+
@class_memory_enabled = false
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Register a recurring topology assessment.
|
|
100
|
+
#
|
|
101
|
+
# @param every [Numeric] interval in seconds
|
|
102
|
+
# @return [void]
|
|
103
|
+
def auto_assess(every:)
|
|
104
|
+
schedule(:auto_assessment, every: every) do |state:|
|
|
105
|
+
agent = new
|
|
106
|
+
agent.send(:run_assess_network, state, {})
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# ── deliver: intercept lifecycle events into memory ────────────────────────
|
|
112
|
+
|
|
113
|
+
# Override the no-op deliver from ReplicationAgent to record events.
|
|
114
|
+
# Subclasses can call +super+ and then add their own routing.
|
|
115
|
+
#
|
|
116
|
+
# @param type [Symbol]
|
|
117
|
+
# @param payload [Hash]
|
|
118
|
+
def deliver(type, payload = {})
|
|
119
|
+
self.class.class_memory&.record(
|
|
120
|
+
type: :replication_event,
|
|
121
|
+
content: "#{type}: #{payload.inspect}",
|
|
122
|
+
outcome: type == :replication_failed ? "failure" : "success",
|
|
123
|
+
importance: 0.6
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# ── Handlers ───────────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
# Re-define :replicate (parent's handler is cleared by Agent.inherited).
|
|
130
|
+
on :replicate do |state:, payload:, **|
|
|
131
|
+
agent = new
|
|
132
|
+
agent.send(:run_replicate, payload)
|
|
133
|
+
state
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
on :assess_network do |state:, payload:, **|
|
|
137
|
+
agent = new
|
|
138
|
+
agent.send(:run_assess_network, state, payload)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
on :reflect do |state:, payload:, **|
|
|
142
|
+
next state unless class_memory_enabled?
|
|
143
|
+
|
|
144
|
+
rec = class_memory.reflect
|
|
145
|
+
class_memory.record(
|
|
146
|
+
type: :reflection,
|
|
147
|
+
content: rec.summary,
|
|
148
|
+
outcome: "success",
|
|
149
|
+
importance: 0.8
|
|
150
|
+
)
|
|
151
|
+
state.merge(last_reflection: rec.summary)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
on :register_node do |state:, payload:, **|
|
|
155
|
+
topology = state[:topology] || NetworkTopology.new
|
|
156
|
+
topology.register(
|
|
157
|
+
node_id: payload.fetch(:node_id),
|
|
158
|
+
host: payload.fetch(:host),
|
|
159
|
+
role: payload[:role]
|
|
160
|
+
)
|
|
161
|
+
state.merge(topology: topology)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
on :node_heartbeat do |state:, payload:, **|
|
|
165
|
+
state[:topology]&.touch(node_id: payload.fetch(:node_id))
|
|
166
|
+
state
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
on :signal_scale do |state:, payload:, **|
|
|
170
|
+
role = payload.fetch(:role)
|
|
171
|
+
class_memory&.record(
|
|
172
|
+
type: :scale_signal,
|
|
173
|
+
content: "scale_out:#{role}",
|
|
174
|
+
outcome: nil
|
|
175
|
+
)
|
|
176
|
+
state
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
private
|
|
180
|
+
|
|
181
|
+
# Assess the network topology and execute the resulting plan.
|
|
182
|
+
# Returns the updated state hash.
|
|
183
|
+
#
|
|
184
|
+
# @param state [Hash]
|
|
185
|
+
# @param payload [Hash]
|
|
186
|
+
# @return [Hash]
|
|
187
|
+
def run_assess_network(state, payload)
|
|
188
|
+
topology = state[:topology] || NetworkTopology.new
|
|
189
|
+
planner = ExpansionPlanner.new(
|
|
190
|
+
topology: topology,
|
|
191
|
+
memory: self.class.class_memory,
|
|
192
|
+
required_roles: Array(payload[:required_roles] || state[:required_roles]),
|
|
193
|
+
host_pool: Array(payload[:host_pool] || state[:host_pool])
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
plan = planner.plan
|
|
197
|
+
|
|
198
|
+
plan.actions.each do |action|
|
|
199
|
+
case action[:action]
|
|
200
|
+
when :replicate_role
|
|
201
|
+
run_replicate_role(action, topology)
|
|
202
|
+
when :retire_node
|
|
203
|
+
topology.remove(node_id: action[:node_id])
|
|
204
|
+
deliver(:node_retired, node_id: action[:node_id], host: action[:host])
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
self.class.class_memory&.record(
|
|
209
|
+
type: :assessment,
|
|
210
|
+
content: plan.rationale.to_s,
|
|
211
|
+
outcome: "success"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
state.merge(topology: topology, last_plan: plan.to_h)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Execute a :replicate_role action: call run_replicate + register in topology.
|
|
218
|
+
#
|
|
219
|
+
# @param action [Hash]
|
|
220
|
+
# @param topology [NetworkTopology]
|
|
221
|
+
def run_replicate_role(action, topology)
|
|
222
|
+
role_obj = RoleRegistry.registered?(action[:role]) ? RoleRegistry.fetch(action[:role]) : nil
|
|
223
|
+
env = role_obj&.env_overrides || {}
|
|
224
|
+
|
|
225
|
+
run_replicate(
|
|
226
|
+
host: action.fetch(:host),
|
|
227
|
+
user: action.fetch(:user, "deploy"),
|
|
228
|
+
strategy: action.fetch(:strategy, :git),
|
|
229
|
+
env: env,
|
|
230
|
+
bootstrapper_options: action.fetch(:bootstrapper_options, {})
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
topology.register(node_id: SecureRandom.uuid, host: action[:host], role: action[:role])
|
|
234
|
+
deliver(:role_replicated, host: action[:host], role: action[:role])
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|