textus 0.51.0 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +19 -19
- data/SPEC.md +41 -39
- data/docs/architecture/README.md +9 -9
- data/docs/reference/conventions.md +8 -8
- data/lib/textus/boot.rb +7 -5
- data/lib/textus/cli/runner.rb +2 -2
- data/lib/textus/cli/verb/put.rb +1 -1
- data/lib/textus/cli/verb/serve.rb +19 -0
- data/lib/textus/dispatcher.rb +3 -1
- data/lib/textus/doctor/check/generator_drift.rb +1 -1
- data/lib/textus/doctor/check/sentinels.rb +2 -2
- data/lib/textus/domain/freshness/evaluator.rb +2 -2
- data/lib/textus/domain/jobs/job.rb +58 -0
- data/lib/textus/domain/jobs/registry.rb +37 -0
- data/lib/textus/domain/policy/base_guards.rb +1 -1
- data/lib/textus/domain/policy/retention.rb +1 -1
- data/lib/textus/domain/policy/source.rb +4 -10
- data/lib/textus/errors.rb +2 -2
- data/lib/textus/hooks/catalog.rb +0 -1
- data/lib/textus/init/templates/machine_intake.rb +1 -1
- data/lib/textus/init.rb +4 -4
- data/lib/textus/jobs/handlers.rb +62 -0
- data/lib/textus/jobs/scheduler.rb +36 -0
- data/lib/textus/jobs/seeder.rb +57 -0
- data/lib/textus/layout.rb +8 -0
- data/lib/textus/maintenance/drain.rb +42 -0
- data/lib/textus/maintenance/retention/apply.rb +52 -0
- data/lib/textus/maintenance/serve.rb +30 -0
- data/lib/textus/maintenance/worker.rb +74 -0
- data/lib/textus/manifest/capabilities.rb +1 -1
- data/lib/textus/manifest/data.rb +16 -1
- data/lib/textus/manifest/schema/keys.rb +1 -1
- data/lib/textus/manifest/schema/validator.rb +3 -3
- data/lib/textus/manifest/schema/vocabulary.rb +2 -2
- data/lib/textus/mcp/server.rb +1 -1
- data/lib/textus/ports/build_lock.rb +1 -1
- data/lib/textus/ports/produce_on_write_subscriber.rb +28 -24
- data/lib/textus/ports/queue.rb +130 -0
- data/lib/textus/produce/acquire/handler.rb +1 -1
- data/lib/textus/produce/acquire/intake.rb +3 -3
- data/lib/textus/produce/engine.rb +10 -58
- data/lib/textus/produce/events.rb +1 -1
- data/lib/textus/read/freshness.rb +2 -2
- data/lib/textus/read/get.rb +3 -3
- data/lib/textus/read/jobs.rb +31 -0
- data/lib/textus/role.rb +1 -1
- data/lib/textus/version.rb +1 -1
- data/lib/textus/write/enqueue.rb +50 -0
- metadata +14 -2
- data/lib/textus/maintenance/reconcile.rb +0 -160
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
module Textus
|
|
2
|
+
module Maintenance
|
|
3
|
+
# Converge-and-exit: seed the full convergence set for the scope, run the
|
|
4
|
+
# worker until the queue is empty, return a health summary. Exits not-ok if
|
|
5
|
+
# any job dead-lettered. This is the converge entry point and what CI
|
|
6
|
+
# runs. Single-pass (serial) on purpose: each produce job self-locks via
|
|
7
|
+
# Produce::Engine.converge, so running them in turn keeps the build lock
|
|
8
|
+
# uncontended; a concurrent pool would make all-but-one produce job hit
|
|
9
|
+
# BuildInProgress and skip.
|
|
10
|
+
class Drain
|
|
11
|
+
extend Textus::Contract::DSL
|
|
12
|
+
|
|
13
|
+
verb :drain
|
|
14
|
+
summary "Converge everything now: seed produce + retention jobs and drain the queue to empty."
|
|
15
|
+
surfaces :cli, :mcp
|
|
16
|
+
cli "drain"
|
|
17
|
+
arg :prefix, String, description: "restrict convergence to keys under this dotted prefix"
|
|
18
|
+
arg :zone, String, description: "restrict convergence to entries in this zone"
|
|
19
|
+
|
|
20
|
+
def initialize(container:, call:)
|
|
21
|
+
@container = container
|
|
22
|
+
@call = call
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def call(prefix: nil, zone: nil)
|
|
26
|
+
queue = Textus::Ports::Queue.new(root: @container.root)
|
|
27
|
+
Textus::Jobs::Seeder.new(container: @container, queue: queue, call: @call).seed(prefix: prefix, zone: zone)
|
|
28
|
+
|
|
29
|
+
summary = Worker.for(container: @container, queue: queue).drain
|
|
30
|
+
health = Read::Doctor.new(container: @container, call: @call).call
|
|
31
|
+
|
|
32
|
+
{
|
|
33
|
+
"protocol" => Textus::PROTOCOL,
|
|
34
|
+
"ok" => summary.failed.zero?,
|
|
35
|
+
"completed" => summary.completed,
|
|
36
|
+
"failed" => summary.failed,
|
|
37
|
+
"health" => health,
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require "fileutils"
|
|
2
|
+
|
|
3
|
+
module Textus
|
|
4
|
+
module Maintenance
|
|
5
|
+
module Retention
|
|
6
|
+
# The destructive half of convergence: apply retention rows (drop/archive).
|
|
7
|
+
# Lifted verbatim from the legacy reconcile apply/archive_leaf so drain/serve and
|
|
8
|
+
# the `sweep` job handler share one path. Runs as the caller's role — never
|
|
9
|
+
# self-elevates (ADR 0079/0093: destructiveness decides authority).
|
|
10
|
+
class Apply
|
|
11
|
+
def initialize(container:, call:)
|
|
12
|
+
@container = container
|
|
13
|
+
@call = call
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call(rows)
|
|
17
|
+
out = { dropped: [], archived: [], failed: [] }
|
|
18
|
+
delete = Write::KeyDelete.new(container: @container, call: @call)
|
|
19
|
+
rows.each do |row|
|
|
20
|
+
key = row["key"]
|
|
21
|
+
begin
|
|
22
|
+
case row["action"]
|
|
23
|
+
when "drop"
|
|
24
|
+
delete.call(key)
|
|
25
|
+
out[:dropped] << key
|
|
26
|
+
when "archive"
|
|
27
|
+
archive_leaf(row)
|
|
28
|
+
delete.call(key)
|
|
29
|
+
out[:archived] << key
|
|
30
|
+
end
|
|
31
|
+
rescue Textus::Error => e
|
|
32
|
+
out[:failed] << { "key" => key, "error" => e.message }
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
out
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
# Copy the leaf into <store>/archive/<relative-path> before deletion.
|
|
41
|
+
def archive_leaf(row)
|
|
42
|
+
src = row["path"]
|
|
43
|
+
root = @container.root.to_s
|
|
44
|
+
rel = src.delete_prefix("#{root}/")
|
|
45
|
+
dest = File.join(root, "archive", rel)
|
|
46
|
+
FileUtils.mkdir_p(File.dirname(dest))
|
|
47
|
+
FileUtils.cp(src, dest)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module Textus
|
|
2
|
+
module Maintenance
|
|
3
|
+
# The convergence daemon loop: seed scheduled work (TTL re-pull + sweep),
|
|
4
|
+
# reclaim crashed leases, drain the queue, sleep, repeat. `tick` is one
|
|
5
|
+
# iteration (unit-testable); `run` loops forever. Drains serially for the
|
|
6
|
+
# same reason as Drain — each produce job self-locks, so running them in turn
|
|
7
|
+
# keeps the build lock uncontended.
|
|
8
|
+
class Serve
|
|
9
|
+
def initialize(container:, call:)
|
|
10
|
+
@container = container
|
|
11
|
+
@call = call
|
|
12
|
+
@queue = Textus::Ports::Queue.new(root: container.root)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def tick
|
|
16
|
+
Textus::Jobs::Scheduler.new(container: @container, queue: @queue).run_once
|
|
17
|
+
@queue.reclaim(now: Textus::Ports::Clock.new.now)
|
|
18
|
+
Worker.for(container: @container, queue: @queue).drain
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def run(poll: nil)
|
|
22
|
+
interval = poll || @container.manifest.data.worker_config[:poll]
|
|
23
|
+
loop do
|
|
24
|
+
tick
|
|
25
|
+
sleep(interval)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module Textus
|
|
2
|
+
module Maintenance
|
|
3
|
+
# Drains the job queue: lease a job, look up its handler in the registry, run
|
|
4
|
+
# it (as the job's stamped authority — wired in a later phase), then ack on
|
|
5
|
+
# success or fail (requeue/dead-letter) on a raise. `drain` runs until the
|
|
6
|
+
# queue is empty and returns a summary. Delivery is at-least-once.
|
|
7
|
+
class Worker
|
|
8
|
+
Summary = Struct.new(:completed, :failed, keyword_init: true)
|
|
9
|
+
|
|
10
|
+
# The standard convergence worker: the closed handler allow-list plus the
|
|
11
|
+
# lease TTL from worker_config. Both `drain` and `serve` build it this way.
|
|
12
|
+
def self.for(container:, queue:)
|
|
13
|
+
new(
|
|
14
|
+
queue: queue, registry: Textus::Jobs::Handlers.registry,
|
|
15
|
+
container: container, lease_ttl: container.manifest.data.worker_config[:lease_ttl]
|
|
16
|
+
)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def initialize(queue:, registry:, container:, lease_ttl: 60)
|
|
20
|
+
@queue = queue
|
|
21
|
+
@registry = registry
|
|
22
|
+
@container = container
|
|
23
|
+
@lease_ttl = lease_ttl
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def drain(worker_id: "drain-#{Process.pid}")
|
|
27
|
+
completed = 0
|
|
28
|
+
failed = 0
|
|
29
|
+
loop do
|
|
30
|
+
leased = @queue.lease(worker_id: worker_id, lease_ttl: @lease_ttl)
|
|
31
|
+
break unless leased
|
|
32
|
+
|
|
33
|
+
case run_one(leased)
|
|
34
|
+
when :completed then completed += 1
|
|
35
|
+
when :dead_lettered then failed += 1
|
|
36
|
+
# :requeued -> a transient failure; it re-leases on a later iteration
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
Summary.new(completed: completed, failed: failed)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def drain_pool(pool: 4)
|
|
43
|
+
summaries = []
|
|
44
|
+
mutex = Mutex.new
|
|
45
|
+
threads = Array.new(pool) do |i|
|
|
46
|
+
Thread.new do
|
|
47
|
+
s = drain(worker_id: "pool-#{Process.pid}-#{i}")
|
|
48
|
+
mutex.synchronize { summaries << s }
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
threads.each(&:join)
|
|
52
|
+
Summary.new(
|
|
53
|
+
completed: summaries.sum(&:completed),
|
|
54
|
+
failed: summaries.sum(&:failed),
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
# Returns :completed on ack, or the queue's failure verdict (:requeued |
|
|
61
|
+
# :dead_lettered) on a raise. A requeued job re-leases on the next loop
|
|
62
|
+
# iteration, so a transient failure still drains; only a dead-letter is a
|
|
63
|
+
# terminal failure that counts toward the summary.
|
|
64
|
+
def run_one(leased)
|
|
65
|
+
entry = @registry.lookup(leased.job.type)
|
|
66
|
+
entry.handler.call(job: leased.job, container: @container)
|
|
67
|
+
@queue.ack(leased)
|
|
68
|
+
:completed
|
|
69
|
+
rescue StandardError => e
|
|
70
|
+
@queue.fail(leased, error: e.message)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -13,7 +13,7 @@ module Textus
|
|
|
13
13
|
DEFAULT_MAPPING = {
|
|
14
14
|
Textus::Role::HUMAN => %w[author propose].freeze,
|
|
15
15
|
Textus::Role::AGENT => %w[propose].freeze,
|
|
16
|
-
Textus::Role::AUTOMATION => %w[
|
|
16
|
+
Textus::Role::AUTOMATION => %w[converge].freeze,
|
|
17
17
|
}.freeze
|
|
18
18
|
|
|
19
19
|
# Returns { role_name => [verbs] }. When `roles:` is declared we use
|
data/lib/textus/manifest/data.rb
CHANGED
|
@@ -10,10 +10,11 @@ module Textus
|
|
|
10
10
|
# resolution, rules) lives on Manifest::Policy / Resolver / Rules.
|
|
11
11
|
class Data
|
|
12
12
|
AUDIT_DEFAULTS = { max_size: 10_485_760, keep: 5 }.freeze
|
|
13
|
+
WORKER_DEFAULTS = { pool: 4, poll: 5, lease_ttl: 60, max_attempts: 3 }.freeze
|
|
13
14
|
|
|
14
15
|
attr_reader :raw, :root, :entries, :declared_zone_kinds,
|
|
15
16
|
:zone_descs, :zone_owners,
|
|
16
|
-
:audit_config, :role_caps, :policy
|
|
17
|
+
:audit_config, :worker_config, :role_caps, :policy
|
|
17
18
|
|
|
18
19
|
def self.validate_key!(key)
|
|
19
20
|
raise UsageError.new("empty key") if key.nil? || key.empty?
|
|
@@ -47,6 +48,7 @@ module Textus
|
|
|
47
48
|
# future `zone_owners.key?(name)` means "owner declared", not "zone exists".
|
|
48
49
|
@zone_owners = Array(raw["zones"]).to_h { |z| [z["name"], z["owner"]] }.compact
|
|
49
50
|
@audit_config = build_audit_config(raw)
|
|
51
|
+
@worker_config = build_worker_config(raw)
|
|
50
52
|
@role_caps = Capabilities.resolve(raw["roles"])
|
|
51
53
|
# Policy is constructed before entries because Entry validators
|
|
52
54
|
# use the entry's own `derived?` and similar helpers that call into
|
|
@@ -67,6 +69,19 @@ module Textus
|
|
|
67
69
|
}.freeze
|
|
68
70
|
end
|
|
69
71
|
|
|
72
|
+
# Worker/queue tunables (ADR: job-queue execution model). All optional;
|
|
73
|
+
# the daemon (serve) and batch drain read these, falling back to defaults
|
|
74
|
+
# so a manifest with no `worker:` block runs the queue out of the box.
|
|
75
|
+
def build_worker_config(raw)
|
|
76
|
+
w = raw["worker"] || {}
|
|
77
|
+
{
|
|
78
|
+
pool: w["pool"] || WORKER_DEFAULTS[:pool],
|
|
79
|
+
poll: w["poll"] || WORKER_DEFAULTS[:poll],
|
|
80
|
+
lease_ttl: w["lease_ttl"] || WORKER_DEFAULTS[:lease_ttl],
|
|
81
|
+
max_attempts: w["max_attempts"] || WORKER_DEFAULTS[:max_attempts],
|
|
82
|
+
}.freeze
|
|
83
|
+
end
|
|
84
|
+
|
|
70
85
|
def build_entries(raw)
|
|
71
86
|
Array(raw["entries"]).map do |e|
|
|
72
87
|
entry = Manifest::Entry::Parser.call(e)
|
|
@@ -23,7 +23,7 @@ module Textus
|
|
|
23
23
|
# `inject_boot`/`provenance` fields are kept here so the schema walk can
|
|
24
24
|
# still emit the migration hint rather than a bare "unknown key".
|
|
25
25
|
SOURCE_KEYS = %w[
|
|
26
|
-
from handler config template project command sources ttl
|
|
26
|
+
from handler config template project command sources ttl inject_boot provenance
|
|
27
27
|
select pluck sort_by transform
|
|
28
28
|
].freeze
|
|
29
29
|
# ADR 0093: rule-level GC slot. drop/archive only (refresh gone).
|
|
@@ -168,7 +168,7 @@ module Textus
|
|
|
168
168
|
hints = {
|
|
169
169
|
"lifecycle" => "age GC moved to the `retention:` rule ({ ttl, action: drop|archive }); " \
|
|
170
170
|
"intake cadence to the entry's `source: { ttl }`",
|
|
171
|
-
"materialize" => "
|
|
171
|
+
"materialize" => "removed — materialization is automatic (a write enqueues a job; run `drain`)",
|
|
172
172
|
}
|
|
173
173
|
hints.each do |old, hint|
|
|
174
174
|
next unless rule.key?(old)
|
|
@@ -194,10 +194,10 @@ module Textus
|
|
|
194
194
|
Array(r["can"]).each do |verb|
|
|
195
195
|
next if CAPABILITIES.include?(verb)
|
|
196
196
|
|
|
197
|
-
# The quarantine capability folded into
|
|
197
|
+
# The quarantine capability folded into the converge capability (ADR 0090); a
|
|
198
198
|
# manifest still naming the old quarantine capability (`ingest`, or
|
|
199
199
|
# legacy `fetch`) gets a pointed hint rather than a bare error.
|
|
200
|
-
hint = %w[ingest fetch].include?(verb) ? " — the quarantine capability folded into '
|
|
200
|
+
hint = %w[ingest fetch].include?(verb) ? " — the quarantine capability folded into 'converge' (ADR 0090)" : ""
|
|
201
201
|
raise BadManifest.new(
|
|
202
202
|
"unknown capability '#{verb}' for role '#{name}' at '#{path}' " \
|
|
203
203
|
"(known: #{CAPABILITIES.join(", ")})#{hint}",
|
|
@@ -5,13 +5,13 @@ module Textus
|
|
|
5
5
|
# 0034; the quarantine + derived ZONE-KINDS folded into one `machine` kind
|
|
6
6
|
# in ADR 0091). Each kind pairs with the capability that authorizes
|
|
7
7
|
# originating bytes in it. ONE source of truth; the derived constants below
|
|
8
|
-
# cannot drift. A BIJECTION again (0090 had two kinds →
|
|
8
|
+
# cannot drift. A BIJECTION again (0090 had two kinds → the converge capability; 0091
|
|
9
9
|
# collapses them, so kind ↔ capability is 1:1).
|
|
10
10
|
module Vocabulary
|
|
11
11
|
LANES = {
|
|
12
12
|
"canon" => "author",
|
|
13
13
|
"workspace" => "keep",
|
|
14
|
-
"machine" => "
|
|
14
|
+
"machine" => "converge",
|
|
15
15
|
"queue" => "propose",
|
|
16
16
|
}.freeze
|
|
17
17
|
|
data/lib/textus/mcp/server.rb
CHANGED
|
@@ -94,7 +94,7 @@ module Textus
|
|
|
94
94
|
|
|
95
95
|
# ADR 0083: the contract-drift guard gates mutating verbs — every MCP
|
|
96
96
|
# verb that is NOT a pure read (Write:: + the destructive Maintenance::
|
|
97
|
-
# verbs
|
|
97
|
+
# verbs drain/zone_mv/key_*_prefix). Reads and boot bypass it (a stale
|
|
98
98
|
# read returns on-disk truth; boot re-orients). Keying on read_verbs
|
|
99
99
|
# (not write_verbs) keeps the destructive Maintenance:: verbs gated.
|
|
100
100
|
@session.check_etag!(contract_etag) unless Catalog.read_verbs.include?(name)
|
|
@@ -5,7 +5,7 @@ require "time"
|
|
|
5
5
|
module Textus
|
|
6
6
|
module Ports
|
|
7
7
|
# Cross-process build lock: a pid/host-stamped lockfile under the store root
|
|
8
|
-
# that serializes
|
|
8
|
+
# that serializes converge's produce/sweep. An instantiable class — it holds
|
|
9
9
|
# the root and lock state; `self.with(root:)` is a convenience that constructs
|
|
10
10
|
# one and runs the block under the held lock. It already satisfied ADR 0109's
|
|
11
11
|
# single-shape rule (every port is an instantiable class) before that ADR's
|
|
@@ -2,37 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
module Textus
|
|
4
4
|
module Ports
|
|
5
|
-
# ADR 0093: on a canon write,
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
# fan out (recursion guard).
|
|
11
|
-
#
|
|
5
|
+
# ADR 0093 / job-queue model: on a canon write, enqueue a `materialize` job
|
|
6
|
+
# for each derived entry that depends on the written key (rdeps ∩ producible).
|
|
7
|
+
# Async-only — the write returns immediately; a worker (drain/serve) converges
|
|
8
|
+
# the jobs. There is no inline `sync` path and no in-process thread: freshness
|
|
9
|
+
# is re-homed to drain (at the commit/CI gate) and the daemon. A write INTO a
|
|
10
|
+
# derived entry does not fan out (recursion guard). Produce self-elevates, so
|
|
11
|
+
# the job is stamped automation. Attached at Store boot, alongside
|
|
12
|
+
# AuditSubscriber.
|
|
12
13
|
class ProduceOnWriteSubscriber
|
|
13
14
|
def initialize(container)
|
|
14
15
|
@container = container
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def attach(bus)
|
|
18
|
-
bus.on(:entry_written, :produce_on_write) do |
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
bus.on(:entry_written, :produce_on_write) do |key:, **|
|
|
20
|
+
on_write(key: key)
|
|
21
|
+
end
|
|
22
|
+
# Closes the ADR 0087 gap: a delete/rename of a source must re-materialize
|
|
23
|
+
# its orphaned dependents too, not just a write. These fire distinct
|
|
24
|
+
# events (:entry_deleted / :entry_renamed), so subscribe to each.
|
|
25
|
+
bus.on(:entry_deleted, :produce_on_delete) do |key:, **|
|
|
26
|
+
on_write(key: key)
|
|
27
|
+
end
|
|
28
|
+
bus.on(:entry_renamed, :produce_on_rename) do |from_key:, to_key:, **|
|
|
29
|
+
on_write(key: from_key)
|
|
30
|
+
on_write(key: to_key)
|
|
21
31
|
end
|
|
22
32
|
self
|
|
23
33
|
end
|
|
24
34
|
|
|
25
|
-
def on_write(key
|
|
35
|
+
def on_write(key:)
|
|
26
36
|
return if derived_write?(key) # recursion guard: produce output is not a source change
|
|
27
37
|
|
|
28
38
|
affected = Textus::Read::Rdeps.new(container: @container).call(key)["rdeps"]
|
|
29
39
|
producible = affected.select { |k| producible?(k) }
|
|
30
40
|
return if producible.empty?
|
|
31
41
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
42
|
+
queue = Textus::Ports::Queue.new(root: @container.root)
|
|
43
|
+
producible.each do |k|
|
|
44
|
+
queue.enqueue(
|
|
45
|
+
Textus::Domain::Jobs::Job.new(
|
|
46
|
+
type: "materialize", args: { "key" => k }, enqueued_by: Textus::Role::AUTOMATION,
|
|
47
|
+
),
|
|
48
|
+
)
|
|
36
49
|
end
|
|
37
50
|
end
|
|
38
51
|
|
|
@@ -55,15 +68,6 @@ module Textus
|
|
|
55
68
|
rescue Textus::Error
|
|
56
69
|
false
|
|
57
70
|
end
|
|
58
|
-
|
|
59
|
-
# Only derived entries carry a source with on_write semantics; a nested
|
|
60
|
-
# publish_tree entry has no source and defaults to async.
|
|
61
|
-
def any_sync?(keys)
|
|
62
|
-
keys.any? do |k|
|
|
63
|
-
entry = @container.manifest.resolver.resolve(k).entry
|
|
64
|
-
entry.derived? && entry.source.sync?
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
71
|
end
|
|
68
72
|
end
|
|
69
73
|
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
require "fileutils"
|
|
2
|
+
require "json"
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
module Textus
|
|
6
|
+
module Ports
|
|
7
|
+
# File-backed durable job queue under `<root>/.run/queue/`. Each job state
|
|
8
|
+
# is a directory; a job is one `<id>.json` file. Claiming is an atomic
|
|
9
|
+
# `rename(2)` from ready/ to leased/ — the rename winner owns the job, so a
|
|
10
|
+
# worker pool needs no central lock. Dedup falls out of the id-as-filename:
|
|
11
|
+
# enqueueing an id that already exists is a no-op. ADR 0038 (runtime subtree),
|
|
12
|
+
# ADR 0108 (instantiable port).
|
|
13
|
+
class Queue
|
|
14
|
+
STATES = %i[ready leased done failed].freeze
|
|
15
|
+
|
|
16
|
+
def initialize(root:)
|
|
17
|
+
@root = root
|
|
18
|
+
STATES.each { |s| FileUtils.mkdir_p(Textus::Layout.queue_state(root, s)) }
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def enqueue(job)
|
|
22
|
+
dest = path(:ready, job.id)
|
|
23
|
+
return if File.exist?(dest) # dedup: identical work already queued
|
|
24
|
+
|
|
25
|
+
write_atomic(dest, job.to_h)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def ready_ids
|
|
29
|
+
Dir.children(Textus::Layout.queue_state(@root, :ready)).map { |f| File.basename(f, ".json") }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# A claimed job plus the path it lives at, so ack/fail act on this copy.
|
|
33
|
+
Leased = Struct.new(:job, :leased_path, keyword_init: true)
|
|
34
|
+
|
|
35
|
+
def lease(worker_id:, lease_ttl:)
|
|
36
|
+
ready_dir = Textus::Layout.queue_state(@root, :ready)
|
|
37
|
+
Dir.children(ready_dir).each do |name|
|
|
38
|
+
src = File.join(ready_dir, name)
|
|
39
|
+
dst = File.join(Textus::Layout.queue_state(@root, :leased), name)
|
|
40
|
+
begin
|
|
41
|
+
File.rename(src, dst) # atomic claim; loser's rename raises ENOENT
|
|
42
|
+
rescue Errno::ENOENT
|
|
43
|
+
next # another worker won this one
|
|
44
|
+
end
|
|
45
|
+
job = Textus::Domain::Jobs::Job.from_h(JSON.parse(File.read(dst)))
|
|
46
|
+
stamp_lease(dst, worker_id: worker_id, expires_at: Time.now.utc + lease_ttl)
|
|
47
|
+
return Leased.new(job: job, leased_path: dst)
|
|
48
|
+
end
|
|
49
|
+
nil
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def ack(leased)
|
|
53
|
+
dest = File.join(Textus::Layout.queue_state(@root, :done), File.basename(leased.leased_path))
|
|
54
|
+
File.rename(leased.leased_path, dest)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Increment attempts and either requeue (transient) or dead-letter (attempts
|
|
58
|
+
# exhausted). Returns :requeued or :dead_lettered so the worker can count
|
|
59
|
+
# terminal failures distinctly from retries.
|
|
60
|
+
def fail(leased, error:)
|
|
61
|
+
job = leased.job
|
|
62
|
+
job.attempts += 1
|
|
63
|
+
job.last_error = error
|
|
64
|
+
dead = job.attempts >= job.max_attempts
|
|
65
|
+
write_atomic(path(dead ? :failed : :ready, job.id), job.to_h)
|
|
66
|
+
File.delete(leased.leased_path)
|
|
67
|
+
dead ? :dead_lettered : :requeued
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Return expired leases to ready/ (the holding worker crashed). Returns the
|
|
71
|
+
# count reclaimed. At-least-once delivery: a job whose handler actually
|
|
72
|
+
# finished but whose ack was lost will re-run — handlers must be idempotent.
|
|
73
|
+
def reclaim(now:)
|
|
74
|
+
leased_dir = Textus::Layout.queue_state(@root, :leased)
|
|
75
|
+
count = 0
|
|
76
|
+
Dir.children(leased_dir).each do |name|
|
|
77
|
+
src = File.join(leased_dir, name)
|
|
78
|
+
data = JSON.parse(File.read(src))
|
|
79
|
+
expires = data.dig("lease", "expires_at")
|
|
80
|
+
next if expires && Time.parse(expires) > now
|
|
81
|
+
|
|
82
|
+
dst = File.join(Textus::Layout.queue_state(@root, :ready), name)
|
|
83
|
+
data.delete("lease")
|
|
84
|
+
File.write(src, JSON.pretty_generate(data))
|
|
85
|
+
File.rename(src, dst)
|
|
86
|
+
count += 1
|
|
87
|
+
rescue Errno::ENOENT
|
|
88
|
+
next # raced with another reclaimer / the worker's ack
|
|
89
|
+
end
|
|
90
|
+
count
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def list(state)
|
|
94
|
+
Dir.children(Textus::Layout.queue_state(@root, state.to_sym)).map { |f| File.basename(f, ".json") }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def retry_failed(job_id)
|
|
98
|
+
src = path(:failed, job_id)
|
|
99
|
+
data = JSON.parse(File.read(src))
|
|
100
|
+
data["attempts"] = 0
|
|
101
|
+
data["last_error"] = nil
|
|
102
|
+
write_atomic(path(:ready, job_id), data)
|
|
103
|
+
File.delete(src)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def purge(state)
|
|
107
|
+
dir = Textus::Layout.queue_state(@root, state.to_sym)
|
|
108
|
+
Dir.children(dir).each { |f| File.delete(File.join(dir, f)) }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
def stamp_lease(leased_path, worker_id:, expires_at:)
|
|
114
|
+
data = JSON.parse(File.read(leased_path))
|
|
115
|
+
data["lease"] = { "worker_id" => worker_id, "expires_at" => expires_at.iso8601 }
|
|
116
|
+
File.write(leased_path, JSON.pretty_generate(data))
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def path(state, job_id)
|
|
120
|
+
File.join(Textus::Layout.queue_state(@root, state), "#{job_id}.json")
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def write_atomic(dest, hash)
|
|
124
|
+
tmp = "#{dest}.#{Process.pid}.tmp"
|
|
125
|
+
File.write(tmp, JSON.pretty_generate(hash))
|
|
126
|
+
File.rename(tmp, dest) # atomic on same filesystem
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -6,7 +6,7 @@ module Textus
|
|
|
6
6
|
# Invokes a :resolve_handler hook handler by name under a timeout — the single
|
|
7
7
|
# home for "call the intake handler under a deadline" (ADR 0048 D1). Shared by
|
|
8
8
|
# Produce::Acquire::Intake (the internal ingest mechanism — no public verb since ADR 0079)
|
|
9
|
-
# as driven by the
|
|
9
|
+
# as driven by the converge sweep (drain/serve) and `textus hook run` (ADR 0089 made
|
|
10
10
|
# ingest system-pushed; there is no read or put trigger).
|
|
11
11
|
# Always passes a Container as `caps:` so the hook contract (ADR 0027) is
|
|
12
12
|
# uniform across every entry point. Maps Timeout::Error to a UsageError;
|
|
@@ -5,7 +5,7 @@ module Textus
|
|
|
5
5
|
module Acquire
|
|
6
6
|
# Internal ingest executor for one machine-zone intake entry. No longer a
|
|
7
7
|
# public verb (ADR 0079 collapsed the `fetch` surface): used by the
|
|
8
|
-
#
|
|
8
|
+
# converge sweep (drain/serve) and `textus hook run` only — ingest is system-pushed
|
|
9
9
|
# (ADR 0089 removed the read-through that once also drove it).
|
|
10
10
|
class Intake
|
|
11
11
|
FETCH_TIMEOUT_SECONDS = Textus::Produce::Acquire::Handler::FETCH_TIMEOUT_SECONDS
|
|
@@ -96,9 +96,9 @@ module Textus
|
|
|
96
96
|
normalized = self.class.normalize_action_result(result, format: mentry.format)
|
|
97
97
|
Textus::Domain::Policy::GuardFactory.new(
|
|
98
98
|
manifest: @manifest, schemas: @schemas,
|
|
99
|
-
).for(:
|
|
99
|
+
).for(:converge, key).check!(
|
|
100
100
|
Textus::Domain::Policy::Evaluation.new(
|
|
101
|
-
actor: @call.role, transition: :
|
|
101
|
+
actor: @call.role, transition: :converge, origin: nil,
|
|
102
102
|
target: key, envelope: nil, manifest: @manifest
|
|
103
103
|
),
|
|
104
104
|
)
|
|
@@ -8,17 +8,16 @@ module Textus
|
|
|
8
8
|
# derived (from: command) -> skip the build; publish_via publishes
|
|
9
9
|
# existing store bytes via mode resolution
|
|
10
10
|
# (None when no targets -> skipped)
|
|
11
|
-
# Runs as the
|
|
11
|
+
# Runs as the converge build actor (self-elevating); the passed `call`
|
|
12
12
|
# supplies only correlation_id/dry_run. Callers choose the key set: the
|
|
13
|
-
# write subscriber passes rdeps ∩ derived;
|
|
13
|
+
# write subscriber passes rdeps ∩ derived; the converge pass passes
|
|
14
14
|
# all-derived + stale-intake.
|
|
15
15
|
class Engine
|
|
16
|
-
# Locked + failure-isolated convergence — the
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
# writer (ADR 0087 §5 failure isolation, preserved).
|
|
16
|
+
# Locked + failure-isolated convergence — the entry point worker handlers
|
|
17
|
+
# call to materialize a key set (ADR 0093 / job-queue model). A held lock
|
|
18
|
+
# is a soft miss (an in-flight build/converge already produces fresh
|
|
19
|
+
# output); any other error is republished as :produce_failed and never
|
|
20
|
+
# raised at the caller (ADR 0087 §5 failure isolation, preserved).
|
|
22
21
|
def self.converge(container:, call:, keys:)
|
|
23
22
|
Textus::Ports::BuildLock.with(root: container.root) do
|
|
24
23
|
new(container: container, call: call).call(keys: keys)
|
|
@@ -73,10 +72,10 @@ module Textus
|
|
|
73
72
|
end
|
|
74
73
|
|
|
75
74
|
def build_actor_call
|
|
76
|
-
build_role = @manifest.policy.actor_for("
|
|
75
|
+
build_role = @manifest.policy.actor_for("converge") or
|
|
77
76
|
raise Textus::UsageError.new(
|
|
78
|
-
"no role holds the '
|
|
79
|
-
hint: "declare a role with `can: [
|
|
77
|
+
"no role holds the 'converge' capability",
|
|
78
|
+
hint: "declare a role with `can: [converge]` in .textus/manifest.yaml",
|
|
80
79
|
)
|
|
81
80
|
Textus::Call.build(
|
|
82
81
|
role: build_role,
|
|
@@ -91,53 +90,6 @@ module Textus
|
|
|
91
90
|
reader: Textus::Read::Get.new(container: @container, call: call)
|
|
92
91
|
)
|
|
93
92
|
end
|
|
94
|
-
|
|
95
|
-
# In-process deferral for the async write trigger (ADR 0087/0093).
|
|
96
|
-
# Spawns a tracked thread that runs Produce.converge after the write
|
|
97
|
-
# returns; a one-time at_exit joins
|
|
98
|
-
# all pending threads so a short-lived CLI process cannot exit before an
|
|
99
|
-
# async rebuild completes. The write itself never blocks.
|
|
100
|
-
module AsyncRunner
|
|
101
|
-
@mutex = Mutex.new
|
|
102
|
-
@threads = []
|
|
103
|
-
@hooked = false
|
|
104
|
-
|
|
105
|
-
class << self
|
|
106
|
-
def enqueue(container:, call:, keys:)
|
|
107
|
-
thread = Thread.new { Textus::Produce::Engine.converge(container: container, call: call, keys: keys) }
|
|
108
|
-
track(thread)
|
|
109
|
-
thread
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
# Block until every spawned async rebuild has finished. Idempotent;
|
|
113
|
-
# safe to call from at_exit and directly from tests.
|
|
114
|
-
def drain
|
|
115
|
-
pending = @mutex.synchronize { @threads.dup }
|
|
116
|
-
pending.each(&:join)
|
|
117
|
-
@mutex.synchronize { @threads.delete_if { |t| !t.alive? } }
|
|
118
|
-
nil
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
private
|
|
122
|
-
|
|
123
|
-
def track(thread)
|
|
124
|
-
@mutex.synchronize do
|
|
125
|
-
@threads.delete_if { |t| !t.alive? }
|
|
126
|
-
@threads << thread
|
|
127
|
-
install_drain_hook
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
# Register the join-before-exit hook exactly once. Guarded by the
|
|
132
|
-
# caller holding @mutex.
|
|
133
|
-
def install_drain_hook
|
|
134
|
-
return if @hooked
|
|
135
|
-
|
|
136
|
-
@hooked = true
|
|
137
|
-
at_exit { drain }
|
|
138
|
-
end
|
|
139
|
-
end
|
|
140
|
-
end
|
|
141
93
|
end
|
|
142
94
|
end
|
|
143
95
|
end
|