textus 0.51.0 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +19 -19
  4. data/SPEC.md +41 -39
  5. data/docs/architecture/README.md +9 -9
  6. data/docs/reference/conventions.md +8 -8
  7. data/lib/textus/boot.rb +7 -5
  8. data/lib/textus/cli/runner.rb +2 -2
  9. data/lib/textus/cli/verb/put.rb +1 -1
  10. data/lib/textus/cli/verb/serve.rb +19 -0
  11. data/lib/textus/dispatcher.rb +3 -1
  12. data/lib/textus/doctor/check/generator_drift.rb +1 -1
  13. data/lib/textus/doctor/check/sentinels.rb +2 -2
  14. data/lib/textus/domain/freshness/evaluator.rb +2 -2
  15. data/lib/textus/domain/jobs/job.rb +58 -0
  16. data/lib/textus/domain/jobs/registry.rb +37 -0
  17. data/lib/textus/domain/policy/base_guards.rb +1 -1
  18. data/lib/textus/domain/policy/retention.rb +1 -1
  19. data/lib/textus/domain/policy/source.rb +4 -10
  20. data/lib/textus/errors.rb +2 -2
  21. data/lib/textus/hooks/catalog.rb +0 -1
  22. data/lib/textus/init/templates/machine_intake.rb +1 -1
  23. data/lib/textus/init.rb +4 -4
  24. data/lib/textus/jobs/handlers.rb +62 -0
  25. data/lib/textus/jobs/scheduler.rb +36 -0
  26. data/lib/textus/jobs/seeder.rb +57 -0
  27. data/lib/textus/layout.rb +8 -0
  28. data/lib/textus/maintenance/drain.rb +42 -0
  29. data/lib/textus/maintenance/retention/apply.rb +52 -0
  30. data/lib/textus/maintenance/serve.rb +30 -0
  31. data/lib/textus/maintenance/worker.rb +74 -0
  32. data/lib/textus/manifest/capabilities.rb +1 -1
  33. data/lib/textus/manifest/data.rb +16 -1
  34. data/lib/textus/manifest/schema/keys.rb +1 -1
  35. data/lib/textus/manifest/schema/validator.rb +3 -3
  36. data/lib/textus/manifest/schema/vocabulary.rb +2 -2
  37. data/lib/textus/mcp/server.rb +1 -1
  38. data/lib/textus/ports/build_lock.rb +1 -1
  39. data/lib/textus/ports/produce_on_write_subscriber.rb +28 -24
  40. data/lib/textus/ports/queue.rb +130 -0
  41. data/lib/textus/produce/acquire/handler.rb +1 -1
  42. data/lib/textus/produce/acquire/intake.rb +3 -3
  43. data/lib/textus/produce/engine.rb +10 -58
  44. data/lib/textus/produce/events.rb +1 -1
  45. data/lib/textus/read/freshness.rb +2 -2
  46. data/lib/textus/read/get.rb +3 -3
  47. data/lib/textus/read/jobs.rb +31 -0
  48. data/lib/textus/role.rb +1 -1
  49. data/lib/textus/version.rb +1 -1
  50. data/lib/textus/write/enqueue.rb +50 -0
  51. metadata +14 -2
  52. data/lib/textus/maintenance/reconcile.rb +0 -160
@@ -0,0 +1,42 @@
1
+ module Textus
2
+ module Maintenance
3
+ # Converge-and-exit: seed the full convergence set for the scope, run the
4
+ # worker until the queue is empty, return a health summary. Exits not-ok if
5
+ # any job dead-lettered. This is the converge entry point and what CI
6
+ # runs. Single-pass (serial) on purpose: each produce job self-locks via
7
+ # Produce::Engine.converge, so running them in turn keeps the build lock
8
+ # uncontended; a concurrent pool would make all-but-one produce job hit
9
+ # BuildInProgress and skip.
10
+ class Drain
11
+ extend Textus::Contract::DSL
12
+
13
+ verb :drain
14
+ summary "Converge everything now: seed produce + retention jobs and drain the queue to empty."
15
+ surfaces :cli, :mcp
16
+ cli "drain"
17
+ arg :prefix, String, description: "restrict convergence to keys under this dotted prefix"
18
+ arg :zone, String, description: "restrict convergence to entries in this zone"
19
+
20
+ def initialize(container:, call:)
21
+ @container = container
22
+ @call = call
23
+ end
24
+
25
+ def call(prefix: nil, zone: nil)
26
+ queue = Textus::Ports::Queue.new(root: @container.root)
27
+ Textus::Jobs::Seeder.new(container: @container, queue: queue, call: @call).seed(prefix: prefix, zone: zone)
28
+
29
+ summary = Worker.for(container: @container, queue: queue).drain
30
+ health = Read::Doctor.new(container: @container, call: @call).call
31
+
32
+ {
33
+ "protocol" => Textus::PROTOCOL,
34
+ "ok" => summary.failed.zero?,
35
+ "completed" => summary.completed,
36
+ "failed" => summary.failed,
37
+ "health" => health,
38
+ }
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,52 @@
1
+ require "fileutils"
2
+
3
+ module Textus
4
+ module Maintenance
5
+ module Retention
6
+ # The destructive half of convergence: apply retention rows (drop/archive).
7
+ # Lifted verbatim from the legacy reconcile apply/archive_leaf so drain/serve and
8
+ # the `sweep` job handler share one path. Runs as the caller's role — never
9
+ # self-elevates (ADR 0079/0093: destructiveness decides authority).
10
+ class Apply
11
+ def initialize(container:, call:)
12
+ @container = container
13
+ @call = call
14
+ end
15
+
16
+ def call(rows)
17
+ out = { dropped: [], archived: [], failed: [] }
18
+ delete = Write::KeyDelete.new(container: @container, call: @call)
19
+ rows.each do |row|
20
+ key = row["key"]
21
+ begin
22
+ case row["action"]
23
+ when "drop"
24
+ delete.call(key)
25
+ out[:dropped] << key
26
+ when "archive"
27
+ archive_leaf(row)
28
+ delete.call(key)
29
+ out[:archived] << key
30
+ end
31
+ rescue Textus::Error => e
32
+ out[:failed] << { "key" => key, "error" => e.message }
33
+ end
34
+ end
35
+ out
36
+ end
37
+
38
+ private
39
+
40
+ # Copy the leaf into <store>/archive/<relative-path> before deletion.
41
+ def archive_leaf(row)
42
+ src = row["path"]
43
+ root = @container.root.to_s
44
+ rel = src.delete_prefix("#{root}/")
45
+ dest = File.join(root, "archive", rel)
46
+ FileUtils.mkdir_p(File.dirname(dest))
47
+ FileUtils.cp(src, dest)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,30 @@
1
+ module Textus
2
+ module Maintenance
3
+ # The convergence daemon loop: seed scheduled work (TTL re-pull + sweep),
4
+ # reclaim crashed leases, drain the queue, sleep, repeat. `tick` is one
5
+ # iteration (unit-testable); `run` loops forever. Drains serially for the
6
+ # same reason as Drain — each produce job self-locks, so running them in turn
7
+ # keeps the build lock uncontended.
8
+ class Serve
9
+ def initialize(container:, call:)
10
+ @container = container
11
+ @call = call
12
+ @queue = Textus::Ports::Queue.new(root: container.root)
13
+ end
14
+
15
+ def tick
16
+ Textus::Jobs::Scheduler.new(container: @container, queue: @queue).run_once
17
+ @queue.reclaim(now: Textus::Ports::Clock.new.now)
18
+ Worker.for(container: @container, queue: @queue).drain
19
+ end
20
+
21
+ def run(poll: nil)
22
+ interval = poll || @container.manifest.data.worker_config[:poll]
23
+ loop do
24
+ tick
25
+ sleep(interval)
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,74 @@
1
+ module Textus
2
+ module Maintenance
3
+ # Drains the job queue: lease a job, look up its handler in the registry, run
4
+ # it (as the job's stamped authority — wired in a later phase), then ack on
5
+ # success or fail (requeue/dead-letter) on a raise. `drain` runs until the
6
+ # queue is empty and returns a summary. Delivery is at-least-once.
7
+ class Worker
8
+ Summary = Struct.new(:completed, :failed, keyword_init: true)
9
+
10
+ # The standard convergence worker: the closed handler allow-list plus the
11
+ # lease TTL from worker_config. Both `drain` and `serve` build it this way.
12
+ def self.for(container:, queue:)
13
+ new(
14
+ queue: queue, registry: Textus::Jobs::Handlers.registry,
15
+ container: container, lease_ttl: container.manifest.data.worker_config[:lease_ttl]
16
+ )
17
+ end
18
+
19
+ def initialize(queue:, registry:, container:, lease_ttl: 60)
20
+ @queue = queue
21
+ @registry = registry
22
+ @container = container
23
+ @lease_ttl = lease_ttl
24
+ end
25
+
26
+ def drain(worker_id: "drain-#{Process.pid}")
27
+ completed = 0
28
+ failed = 0
29
+ loop do
30
+ leased = @queue.lease(worker_id: worker_id, lease_ttl: @lease_ttl)
31
+ break unless leased
32
+
33
+ case run_one(leased)
34
+ when :completed then completed += 1
35
+ when :dead_lettered then failed += 1
36
+ # :requeued -> a transient failure; it re-leases on a later iteration
37
+ end
38
+ end
39
+ Summary.new(completed: completed, failed: failed)
40
+ end
41
+
42
+ def drain_pool(pool: 4)
43
+ summaries = []
44
+ mutex = Mutex.new
45
+ threads = Array.new(pool) do |i|
46
+ Thread.new do
47
+ s = drain(worker_id: "pool-#{Process.pid}-#{i}")
48
+ mutex.synchronize { summaries << s }
49
+ end
50
+ end
51
+ threads.each(&:join)
52
+ Summary.new(
53
+ completed: summaries.sum(&:completed),
54
+ failed: summaries.sum(&:failed),
55
+ )
56
+ end
57
+
58
+ private
59
+
60
+ # Returns :completed on ack, or the queue's failure verdict (:requeued |
61
+ # :dead_lettered) on a raise. A requeued job re-leases on the next loop
62
+ # iteration, so a transient failure still drains; only a dead-letter is a
63
+ # terminal failure that counts toward the summary.
64
+ def run_one(leased)
65
+ entry = @registry.lookup(leased.job.type)
66
+ entry.handler.call(job: leased.job, container: @container)
67
+ @queue.ack(leased)
68
+ :completed
69
+ rescue StandardError => e
70
+ @queue.fail(leased, error: e.message)
71
+ end
72
+ end
73
+ end
74
+ end
@@ -13,7 +13,7 @@ module Textus
13
13
  DEFAULT_MAPPING = {
14
14
  Textus::Role::HUMAN => %w[author propose].freeze,
15
15
  Textus::Role::AGENT => %w[propose].freeze,
16
- Textus::Role::AUTOMATION => %w[reconcile].freeze,
16
+ Textus::Role::AUTOMATION => %w[converge].freeze,
17
17
  }.freeze
18
18
 
19
19
  # Returns { role_name => [verbs] }. When `roles:` is declared we use
@@ -10,10 +10,11 @@ module Textus
10
10
  # resolution, rules) lives on Manifest::Policy / Resolver / Rules.
11
11
  class Data
12
12
  AUDIT_DEFAULTS = { max_size: 10_485_760, keep: 5 }.freeze
13
+ WORKER_DEFAULTS = { pool: 4, poll: 5, lease_ttl: 60, max_attempts: 3 }.freeze
13
14
 
14
15
  attr_reader :raw, :root, :entries, :declared_zone_kinds,
15
16
  :zone_descs, :zone_owners,
16
- :audit_config, :role_caps, :policy
17
+ :audit_config, :worker_config, :role_caps, :policy
17
18
 
18
19
  def self.validate_key!(key)
19
20
  raise UsageError.new("empty key") if key.nil? || key.empty?
@@ -47,6 +48,7 @@ module Textus
47
48
  # future `zone_owners.key?(name)` means "owner declared", not "zone exists".
48
49
  @zone_owners = Array(raw["zones"]).to_h { |z| [z["name"], z["owner"]] }.compact
49
50
  @audit_config = build_audit_config(raw)
51
+ @worker_config = build_worker_config(raw)
50
52
  @role_caps = Capabilities.resolve(raw["roles"])
51
53
  # Policy is constructed before entries because Entry validators
52
54
  # use the entry's own `derived?` and similar helpers that call into
@@ -67,6 +69,19 @@ module Textus
67
69
  }.freeze
68
70
  end
69
71
 
72
+ # Worker/queue tunables (ADR: job-queue execution model). All optional;
73
+ # the daemon (serve) and batch drain read these, falling back to defaults
74
+ # so a manifest with no `worker:` block runs the queue out of the box.
75
+ def build_worker_config(raw)
76
+ w = raw["worker"] || {}
77
+ {
78
+ pool: w["pool"] || WORKER_DEFAULTS[:pool],
79
+ poll: w["poll"] || WORKER_DEFAULTS[:poll],
80
+ lease_ttl: w["lease_ttl"] || WORKER_DEFAULTS[:lease_ttl],
81
+ max_attempts: w["max_attempts"] || WORKER_DEFAULTS[:max_attempts],
82
+ }.freeze
83
+ end
84
+
70
85
  def build_entries(raw)
71
86
  Array(raw["entries"]).map do |e|
72
87
  entry = Manifest::Entry::Parser.call(e)
@@ -23,7 +23,7 @@ module Textus
23
23
  # `inject_boot`/`provenance` fields are kept here so the schema walk can
24
24
  # still emit the migration hint rather than a bare "unknown key".
25
25
  SOURCE_KEYS = %w[
26
- from handler config template project command sources ttl on_write inject_boot provenance
26
+ from handler config template project command sources ttl inject_boot provenance
27
27
  select pluck sort_by transform
28
28
  ].freeze
29
29
  # ADR 0093: rule-level GC slot. drop/archive only (refresh gone).
@@ -168,7 +168,7 @@ module Textus
168
168
  hints = {
169
169
  "lifecycle" => "age GC moved to the `retention:` rule ({ ttl, action: drop|archive }); " \
170
170
  "intake cadence to the entry's `source: { ttl }`",
171
- "materialize" => "moved to the entry's `source: { on_write: sync|async }`",
171
+ "materialize" => "removed materialization is automatic (a write enqueues a job; run `drain`)",
172
172
  }
173
173
  hints.each do |old, hint|
174
174
  next unless rule.key?(old)
@@ -194,10 +194,10 @@ module Textus
194
194
  Array(r["can"]).each do |verb|
195
195
  next if CAPABILITIES.include?(verb)
196
196
 
197
- # The quarantine capability folded into reconcile (ADR 0090); a
197
+ # The quarantine capability folded into the converge capability (ADR 0090); a
198
198
  # manifest still naming the old quarantine capability (`ingest`, or
199
199
  # legacy `fetch`) gets a pointed hint rather than a bare error.
200
- hint = %w[ingest fetch].include?(verb) ? " — the quarantine capability folded into 'reconcile' (ADR 0090)" : ""
200
+ hint = %w[ingest fetch].include?(verb) ? " — the quarantine capability folded into 'converge' (ADR 0090)" : ""
201
201
  raise BadManifest.new(
202
202
  "unknown capability '#{verb}' for role '#{name}' at '#{path}' " \
203
203
  "(known: #{CAPABILITIES.join(", ")})#{hint}",
@@ -5,13 +5,13 @@ module Textus
5
5
  # 0034; the quarantine + derived ZONE-KINDS folded into one `machine` kind
6
6
  # in ADR 0091). Each kind pairs with the capability that authorizes
7
7
  # originating bytes in it. ONE source of truth; the derived constants below
8
- # cannot drift. A BIJECTION again (0090 had two kinds → reconcile; 0091
8
+ # cannot drift. A BIJECTION again (0090 had two kinds → the converge capability; 0091
9
9
  # collapses them, so kind ↔ capability is 1:1).
10
10
  module Vocabulary
11
11
  LANES = {
12
12
  "canon" => "author",
13
13
  "workspace" => "keep",
14
- "machine" => "reconcile",
14
+ "machine" => "converge",
15
15
  "queue" => "propose",
16
16
  }.freeze
17
17
 
@@ -94,7 +94,7 @@ module Textus
94
94
 
95
95
  # ADR 0083: the contract-drift guard gates mutating verbs — every MCP
96
96
  # verb that is NOT a pure read (Write:: + the destructive Maintenance::
97
- # verbs reconcile/zone_mv/key_*_prefix). Reads and boot bypass it (a stale
97
+ # verbs drain/zone_mv/key_*_prefix). Reads and boot bypass it (a stale
98
98
  # read returns on-disk truth; boot re-orients). Keying on read_verbs
99
99
  # (not write_verbs) keeps the destructive Maintenance:: verbs gated.
100
100
  @session.check_etag!(contract_etag) unless Catalog.read_verbs.include?(name)
@@ -5,7 +5,7 @@ require "time"
5
5
  module Textus
6
6
  module Ports
7
7
  # Cross-process build lock: a pid/host-stamped lockfile under the store root
8
- # that serializes reconcile's produce/sweep. An instantiable class — it holds
8
+ # that serializes converge's produce/sweep. An instantiable class — it holds
9
9
  # the root and lock state; `self.with(root:)` is a convenience that constructs
10
10
  # one and runs the block under the held lock. It already satisfied ADR 0109's
11
11
  # single-shape rule (every port is an instantiable class) before that ADR's
@@ -2,37 +2,50 @@
2
2
 
3
3
  module Textus
4
4
  module Ports
5
- # ADR 0093: on a canon write, converge the derived entries that depend on the
6
- # written key (rdeps derived) by running Produce scoped + non-destructive.
7
- # This IS reconcile narrowed to a write's blast radius; there is no separate
8
- # "reactive materialize" subsystem. Per-entry source.on_write (sync|async)
9
- # picks inline-under-lock vs deferred. A write INTO a derived entry does not
10
- # fan out (recursion guard). Failures never reach the writer (Produce.converge
11
- # isolates them). Attached at Store boot, alongside AuditSubscriber.
5
+ # ADR 0093 / job-queue model: on a canon write, enqueue a `materialize` job
6
+ # for each derived entry that depends on the written key (rdeps ∩ producible).
7
+ # Async-only the write returns immediately; a worker (drain/serve) converges
8
+ # the jobs. There is no inline `sync` path and no in-process thread: freshness
9
+ # is re-homed to drain (at the commit/CI gate) and the daemon. A write INTO a
10
+ # derived entry does not fan out (recursion guard). Produce self-elevates, so
11
+ # the job is stamped automation. Attached at Store boot, alongside
12
+ # AuditSubscriber.
12
13
  class ProduceOnWriteSubscriber
13
14
  def initialize(container)
14
15
  @container = container
15
16
  end
16
17
 
17
18
  def attach(bus)
18
- bus.on(:entry_written, :produce_on_write) do |ctx:, key:, **|
19
- call = Textus::Call.build(role: ctx.role, correlation_id: ctx.correlation_id)
20
- on_write(key: key, call: call)
19
+ bus.on(:entry_written, :produce_on_write) do |key:, **|
20
+ on_write(key: key)
21
+ end
22
+ # Closes the ADR 0087 gap: a delete/rename of a source must re-materialize
23
+ # its orphaned dependents too, not just a write. These fire distinct
24
+ # events (:entry_deleted / :entry_renamed), so subscribe to each.
25
+ bus.on(:entry_deleted, :produce_on_delete) do |key:, **|
26
+ on_write(key: key)
27
+ end
28
+ bus.on(:entry_renamed, :produce_on_rename) do |from_key:, to_key:, **|
29
+ on_write(key: from_key)
30
+ on_write(key: to_key)
21
31
  end
22
32
  self
23
33
  end
24
34
 
25
- def on_write(key:, call:)
35
+ def on_write(key:)
26
36
  return if derived_write?(key) # recursion guard: produce output is not a source change
27
37
 
28
38
  affected = Textus::Read::Rdeps.new(container: @container).call(key)["rdeps"]
29
39
  producible = affected.select { |k| producible?(k) }
30
40
  return if producible.empty?
31
41
 
32
- if any_sync?(producible)
33
- Textus::Produce::Engine.converge(container: @container, call: call, keys: producible)
34
- else
35
- Textus::Produce::Engine::AsyncRunner.enqueue(container: @container, call: call, keys: producible)
42
+ queue = Textus::Ports::Queue.new(root: @container.root)
43
+ producible.each do |k|
44
+ queue.enqueue(
45
+ Textus::Domain::Jobs::Job.new(
46
+ type: "materialize", args: { "key" => k }, enqueued_by: Textus::Role::AUTOMATION,
47
+ ),
48
+ )
36
49
  end
37
50
  end
38
51
 
@@ -55,15 +68,6 @@ module Textus
55
68
  rescue Textus::Error
56
69
  false
57
70
  end
58
-
59
- # Only derived entries carry a source with on_write semantics; a nested
60
- # publish_tree entry has no source and defaults to async.
61
- def any_sync?(keys)
62
- keys.any? do |k|
63
- entry = @container.manifest.resolver.resolve(k).entry
64
- entry.derived? && entry.source.sync?
65
- end
66
- end
67
71
  end
68
72
  end
69
73
  end
@@ -0,0 +1,130 @@
1
+ require "fileutils"
2
+ require "json"
3
+ require "time"
4
+
5
+ module Textus
6
+ module Ports
7
+ # File-backed durable job queue under `<root>/.run/queue/`. Each job state
8
+ # is a directory; a job is one `<id>.json` file. Claiming is an atomic
9
+ # `rename(2)` from ready/ to leased/ — the rename winner owns the job, so a
10
+ # worker pool needs no central lock. Dedup falls out of the id-as-filename:
11
+ # enqueueing an id that already exists is a no-op. ADR 0038 (runtime subtree),
12
+ # ADR 0108 (instantiable port).
13
+ class Queue
14
+ STATES = %i[ready leased done failed].freeze
15
+
16
+ def initialize(root:)
17
+ @root = root
18
+ STATES.each { |s| FileUtils.mkdir_p(Textus::Layout.queue_state(root, s)) }
19
+ end
20
+
21
+ def enqueue(job)
22
+ dest = path(:ready, job.id)
23
+ return if File.exist?(dest) # dedup: identical work already queued
24
+
25
+ write_atomic(dest, job.to_h)
26
+ end
27
+
28
+ def ready_ids
29
+ Dir.children(Textus::Layout.queue_state(@root, :ready)).map { |f| File.basename(f, ".json") }
30
+ end
31
+
32
+ # A claimed job plus the path it lives at, so ack/fail act on this copy.
33
+ Leased = Struct.new(:job, :leased_path, keyword_init: true)
34
+
35
+ def lease(worker_id:, lease_ttl:)
36
+ ready_dir = Textus::Layout.queue_state(@root, :ready)
37
+ Dir.children(ready_dir).each do |name|
38
+ src = File.join(ready_dir, name)
39
+ dst = File.join(Textus::Layout.queue_state(@root, :leased), name)
40
+ begin
41
+ File.rename(src, dst) # atomic claim; loser's rename raises ENOENT
42
+ rescue Errno::ENOENT
43
+ next # another worker won this one
44
+ end
45
+ job = Textus::Domain::Jobs::Job.from_h(JSON.parse(File.read(dst)))
46
+ stamp_lease(dst, worker_id: worker_id, expires_at: Time.now.utc + lease_ttl)
47
+ return Leased.new(job: job, leased_path: dst)
48
+ end
49
+ nil
50
+ end
51
+
52
+ def ack(leased)
53
+ dest = File.join(Textus::Layout.queue_state(@root, :done), File.basename(leased.leased_path))
54
+ File.rename(leased.leased_path, dest)
55
+ end
56
+
57
+ # Increment attempts and either requeue (transient) or dead-letter (attempts
58
+ # exhausted). Returns :requeued or :dead_lettered so the worker can count
59
+ # terminal failures distinctly from retries.
60
+ def fail(leased, error:)
61
+ job = leased.job
62
+ job.attempts += 1
63
+ job.last_error = error
64
+ dead = job.attempts >= job.max_attempts
65
+ write_atomic(path(dead ? :failed : :ready, job.id), job.to_h)
66
+ File.delete(leased.leased_path)
67
+ dead ? :dead_lettered : :requeued
68
+ end
69
+
70
+ # Return expired leases to ready/ (the holding worker crashed). Returns the
71
+ # count reclaimed. At-least-once delivery: a job whose handler actually
72
+ # finished but whose ack was lost will re-run — handlers must be idempotent.
73
+ def reclaim(now:)
74
+ leased_dir = Textus::Layout.queue_state(@root, :leased)
75
+ count = 0
76
+ Dir.children(leased_dir).each do |name|
77
+ src = File.join(leased_dir, name)
78
+ data = JSON.parse(File.read(src))
79
+ expires = data.dig("lease", "expires_at")
80
+ next if expires && Time.parse(expires) > now
81
+
82
+ dst = File.join(Textus::Layout.queue_state(@root, :ready), name)
83
+ data.delete("lease")
84
+ File.write(src, JSON.pretty_generate(data))
85
+ File.rename(src, dst)
86
+ count += 1
87
+ rescue Errno::ENOENT
88
+ next # raced with another reclaimer / the worker's ack
89
+ end
90
+ count
91
+ end
92
+
93
+ def list(state)
94
+ Dir.children(Textus::Layout.queue_state(@root, state.to_sym)).map { |f| File.basename(f, ".json") }
95
+ end
96
+
97
+ def retry_failed(job_id)
98
+ src = path(:failed, job_id)
99
+ data = JSON.parse(File.read(src))
100
+ data["attempts"] = 0
101
+ data["last_error"] = nil
102
+ write_atomic(path(:ready, job_id), data)
103
+ File.delete(src)
104
+ end
105
+
106
+ def purge(state)
107
+ dir = Textus::Layout.queue_state(@root, state.to_sym)
108
+ Dir.children(dir).each { |f| File.delete(File.join(dir, f)) }
109
+ end
110
+
111
+ private
112
+
113
+ def stamp_lease(leased_path, worker_id:, expires_at:)
114
+ data = JSON.parse(File.read(leased_path))
115
+ data["lease"] = { "worker_id" => worker_id, "expires_at" => expires_at.iso8601 }
116
+ File.write(leased_path, JSON.pretty_generate(data))
117
+ end
118
+
119
+ def path(state, job_id)
120
+ File.join(Textus::Layout.queue_state(@root, state), "#{job_id}.json")
121
+ end
122
+
123
+ def write_atomic(dest, hash)
124
+ tmp = "#{dest}.#{Process.pid}.tmp"
125
+ File.write(tmp, JSON.pretty_generate(hash))
126
+ File.rename(tmp, dest) # atomic on same filesystem
127
+ end
128
+ end
129
+ end
130
+ end
@@ -6,7 +6,7 @@ module Textus
6
6
  # Invokes a :resolve_handler hook handler by name under a timeout — the single
7
7
  # home for "call the intake handler under a deadline" (ADR 0048 D1). Shared by
8
8
  # Produce::Acquire::Intake (the internal ingest mechanism — no public verb since ADR 0079)
9
- # as driven by the `reconcile` sweep and `textus hook run` (ADR 0089 made
9
+ # as driven by the converge sweep (drain/serve) and `textus hook run` (ADR 0089 made
10
10
  # ingest system-pushed; there is no read or put trigger).
11
11
  # Always passes a Container as `caps:` so the hook contract (ADR 0027) is
12
12
  # uniform across every entry point. Maps Timeout::Error to a UsageError;
@@ -5,7 +5,7 @@ module Textus
5
5
  module Acquire
6
6
  # Internal ingest executor for one machine-zone intake entry. No longer a
7
7
  # public verb (ADR 0079 collapsed the `fetch` surface): used by the
8
- # `reconcile` sweep and `textus hook run` only — ingest is system-pushed
8
+ # converge sweep (drain/serve) and `textus hook run` only — ingest is system-pushed
9
9
  # (ADR 0089 removed the read-through that once also drove it).
10
10
  class Intake
11
11
  FETCH_TIMEOUT_SECONDS = Textus::Produce::Acquire::Handler::FETCH_TIMEOUT_SECONDS
@@ -96,9 +96,9 @@ module Textus
96
96
  normalized = self.class.normalize_action_result(result, format: mentry.format)
97
97
  Textus::Domain::Policy::GuardFactory.new(
98
98
  manifest: @manifest, schemas: @schemas,
99
- ).for(:reconcile, key).check!(
99
+ ).for(:converge, key).check!(
100
100
  Textus::Domain::Policy::Evaluation.new(
101
- actor: @call.role, transition: :reconcile, origin: nil,
101
+ actor: @call.role, transition: :converge, origin: nil,
102
102
  target: key, envelope: nil, manifest: @manifest
103
103
  ),
104
104
  )
@@ -8,17 +8,16 @@ module Textus
8
8
  # derived (from: command) -> skip the build; publish_via publishes
9
9
  # existing store bytes via mode resolution
10
10
  # (None when no targets -> skipped)
11
- # Runs as the reconcile build actor (self-elevating); the passed `call`
11
+ # Runs as the converge build actor (self-elevating); the passed `call`
12
12
  # supplies only correlation_id/dry_run. Callers choose the key set: the
13
- # write subscriber passes rdeps ∩ derived; reconcile passes
13
+ # write subscriber passes rdeps ∩ derived; the converge pass passes
14
14
  # all-derived + stale-intake.
15
15
  class Engine
16
- # Locked + failure-isolated convergence — the shared entry point for the
17
- # write trigger (ADR 0093). Both the sync path (inline, in the subscriber)
18
- # and the async path (AsyncRunner) call this. A held lock is a soft miss
19
- # (an in-flight build/reconcile already produces fresh output); any other
20
- # error is republished as :produce_failed and never raised at the
21
- # writer (ADR 0087 §5 failure isolation, preserved).
16
+ # Locked + failure-isolated convergence — the entry point worker handlers
17
+ # call to materialize a key set (ADR 0093 / job-queue model). A held lock
18
+ # is a soft miss (an in-flight build/converge already produces fresh
19
+ # output); any other error is republished as :produce_failed and never
20
+ # raised at the caller (ADR 0087 §5 failure isolation, preserved).
22
21
  def self.converge(container:, call:, keys:)
23
22
  Textus::Ports::BuildLock.with(root: container.root) do
24
23
  new(container: container, call: call).call(keys: keys)
@@ -73,10 +72,10 @@ module Textus
73
72
  end
74
73
 
75
74
  def build_actor_call
76
- build_role = @manifest.policy.actor_for("reconcile") or
75
+ build_role = @manifest.policy.actor_for("converge") or
77
76
  raise Textus::UsageError.new(
78
- "no role holds the 'reconcile' capability",
79
- hint: "declare a role with `can: [reconcile]` in .textus/manifest.yaml",
77
+ "no role holds the 'converge' capability",
78
+ hint: "declare a role with `can: [converge]` in .textus/manifest.yaml",
80
79
  )
81
80
  Textus::Call.build(
82
81
  role: build_role,
@@ -91,53 +90,6 @@ module Textus
91
90
  reader: Textus::Read::Get.new(container: @container, call: call)
92
91
  )
93
92
  end
94
-
95
- # In-process deferral for the async write trigger (ADR 0087/0093).
96
- # Spawns a tracked thread that runs Produce.converge after the write
97
- # returns; a one-time at_exit joins
98
- # all pending threads so a short-lived CLI process cannot exit before an
99
- # async rebuild completes. The write itself never blocks.
100
- module AsyncRunner
101
- @mutex = Mutex.new
102
- @threads = []
103
- @hooked = false
104
-
105
- class << self
106
- def enqueue(container:, call:, keys:)
107
- thread = Thread.new { Textus::Produce::Engine.converge(container: container, call: call, keys: keys) }
108
- track(thread)
109
- thread
110
- end
111
-
112
- # Block until every spawned async rebuild has finished. Idempotent;
113
- # safe to call from at_exit and directly from tests.
114
- def drain
115
- pending = @mutex.synchronize { @threads.dup }
116
- pending.each(&:join)
117
- @mutex.synchronize { @threads.delete_if { |t| !t.alive? } }
118
- nil
119
- end
120
-
121
- private
122
-
123
- def track(thread)
124
- @mutex.synchronize do
125
- @threads.delete_if { |t| !t.alive? }
126
- @threads << thread
127
- install_drain_hook
128
- end
129
- end
130
-
131
- # Register the join-before-exit hook exactly once. Guarded by the
132
- # caller holding @mutex.
133
- def install_drain_hook
134
- return if @hooked
135
-
136
- @hooked = true
137
- at_exit { drain }
138
- end
139
- end
140
- end
141
93
  end
142
94
  end
143
95
  end