tempest-rb 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ require_relative "../tempest"
2
+
3
+ module Tempest
4
+ # Resolves AT Protocol DIDs to Bluesky handles via app.bsky.actor.getProfile.
5
+ # Caches both positive and negative lookups in-process so a busy Jetstream
6
+ # feed doesn't hammer the PDS on every event.
7
+ class HandleResolver
8
+ NOT_FOUND = Object.new.freeze
9
+
10
+ def initialize(client:)
11
+ @client = client
12
+ @cache = {}
13
+ @mutex = Mutex.new
14
+ end
15
+
16
+ def resolve(did)
17
+ cached = @mutex.synchronize { @cache[did] }
18
+ return cached_value(cached) unless cached.nil?
19
+
20
+ handle = lookup(did)
21
+ @mutex.synchronize { @cache[did] = handle.nil? ? NOT_FOUND : handle }
22
+ handle
23
+ end
24
+
25
+ def seed(did, handle)
26
+ @mutex.synchronize { @cache[did] = handle }
27
+ end
28
+
29
+ private
30
+
31
+ def cached_value(value)
32
+ value.equal?(NOT_FOUND) ? nil : value
33
+ end
34
+
35
+ def lookup(did)
36
+ response = @client.get("app.bsky.actor.getProfile", query: { "actor" => did })
37
+ response.is_a?(Hash) ? response["handle"] : nil
38
+ rescue Tempest::APIError
39
+ nil
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,88 @@
1
+ require "json"
2
+ require "uri"
3
+ require "async"
4
+ require "async/http/internet"
5
+
6
+ require_relative "../tempest"
7
+
8
+ module Tempest
9
+ # JSON-over-HTTP transport for XRPC endpoints.
10
+ # Backed by Async::HTTP::Internet, which keeps connections alive per origin
11
+ # and reuses them across calls. The public interface stays synchronous
12
+ # (returns Response on call) by wrapping work in Sync so the REPL doesn't
13
+ # need to know about Async.
14
+ module HTTP
15
+ Response = Struct.new(:status, :body) do
16
+ def ok?
17
+ status >= 200 && status < 300
18
+ end
19
+
20
+ def unauthorized?
21
+ status == 401
22
+ end
23
+ end
24
+
25
+ @internet_mutex = Mutex.new
26
+ @internet = nil
27
+
28
+ module_function
29
+
30
+ def post_json(url, body: nil, headers: {})
31
+ request("POST", url, body: body, headers: headers)
32
+ end
33
+
34
+ def get_json(url, headers: {}, query: nil)
35
+ uri = URI(url)
36
+ if query && !query.empty?
37
+ existing = uri.query ? URI.decode_www_form(uri.query) : []
38
+ uri.query = URI.encode_www_form(existing + query.to_a)
39
+ end
40
+ request("GET", uri.to_s, headers: headers)
41
+ end
42
+
43
+ def request(method, url, body: nil, headers: {})
44
+ normalized = headers.each_with_object({}) { |(k, v), h| h[k.to_s.downcase] = v }
45
+ payload = nil
46
+ if body
47
+ normalized["content-type"] ||= "application/json"
48
+ payload = [JSON.generate(body)]
49
+ end
50
+ normalized["accept"] ||= "application/json"
51
+
52
+ header_pairs = normalized.to_a
53
+
54
+ Sync do
55
+ response = internet.call(method, url, header_pairs, payload)
56
+ begin
57
+ body_str = response.read.to_s
58
+ Response.new(response.status, parse_body(response, body_str))
59
+ ensure
60
+ response.close
61
+ end
62
+ end
63
+ end
64
+
65
+ def parse_body(response, body_str)
66
+ return nil if body_str.empty?
67
+ ctype = response.headers["content-type"].to_s
68
+ return JSON.parse(body_str) if ctype.include?("application/json")
69
+ body_str
70
+ end
71
+
72
+ def internet
73
+ @internet_mutex.synchronize do
74
+ @internet ||= Async::HTTP::Internet.new
75
+ end
76
+ end
77
+
78
+ def reset!
79
+ @internet_mutex.synchronize do
80
+ existing = @internet
81
+ @internet = nil
82
+ if existing
83
+ Sync { existing.close }
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,44 @@
1
+ require_relative "../tempest"
2
+
3
+ module Tempest
4
+ # Earthquake-style identifier ring. Each generator owns a fixed list of
5
+ # slots (e.g. "AA".."ZZ" = 676 slots). New ids consume the next slot;
6
+ # when the ring wraps the previous tenant of the recycled slot is
7
+ # evicted from both the forward (id => var) and reverse (var => id)
8
+ # tables so callers never see a stale mapping.
9
+ #
10
+ # Not thread-safe. The REPL renders posts on a single thread (either
11
+ # the main REPL thread or behind Screen's mutex) so external
12
+ # serialization is sufficient.
13
+ class IdVar
14
+ def initialize(range:, prefix: "$")
15
+ @slots = range.to_a
16
+ raise ArgumentError, "range produced no slots" if @slots.empty?
17
+ @prefix = prefix
18
+ @cursor = -1
19
+ @forward = {} # id => var
20
+ @reverse = {} # var => id
21
+ end
22
+
23
+ def generate(id)
24
+ return @forward[id] if @forward.key?(id)
25
+ @cursor = (@cursor + 1) % @slots.length
26
+ var = "#{@prefix}#{@slots[@cursor]}"
27
+ evict(var)
28
+ @forward[id] = var
29
+ @reverse[var] = id
30
+ var
31
+ end
32
+
33
+ def lookup(var)
34
+ @reverse[var]
35
+ end
36
+
37
+ private
38
+
39
+ def evict(var)
40
+ old_id = @reverse.delete(var)
41
+ @forward.delete(old_id) if old_id
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,69 @@
1
+ require "uri"
2
+
3
+ require_relative "../../tempest"
4
+ require_relative "decoder"
5
+
6
+ module Tempest
7
+ module Jetstream
8
+ DEFAULT_URL = "wss://jetstream2.us-east.bsky.network/subscribe".freeze
9
+
10
+ class Client
11
+ def initialize(url: DEFAULT_URL, wanted_collections: [], wanted_dids: [], decoder: Decoder, transport: nil)
12
+ @url = url
13
+ @wanted_collections = Array(wanted_collections)
14
+ @wanted_dids = Array(wanted_dids)
15
+ @decoder = decoder
16
+ @transport = transport
17
+ end
18
+
19
+ def subscribe_url(cursor: nil)
20
+ params = []
21
+ @wanted_collections.each { |c| params << ["wantedCollections", c] }
22
+ @wanted_dids.each { |d| params << ["wantedDids", d] }
23
+ params << ["cursor", cursor.to_s] if cursor
24
+ return @url if params.empty?
25
+
26
+ uri = URI(@url)
27
+ existing = uri.query ? URI.decode_www_form(uri.query) : []
28
+ uri.query = URI.encode_www_form(existing + params)
29
+ uri.to_s
30
+ end
31
+
32
+ def each_event(cursor: nil, &block)
33
+ return enum_for(:each_event, cursor: cursor) unless block
34
+
35
+ transport.each_message(subscribe_url(cursor: cursor)) do |raw|
36
+ event = @decoder.decode(raw)
37
+ yield event if event
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def transport
44
+ @transport ||= AsyncWebSocketTransport.new
45
+ end
46
+ end
47
+
48
+ # Default WebSocket transport using async-websocket. Loaded lazily so unit
49
+ # tests that inject a stub transport don't pull in the Async runtime.
50
+ class AsyncWebSocketTransport
51
+ def initialize
52
+ require "async"
53
+ require "async/http/endpoint"
54
+ require "async/websocket/client"
55
+ end
56
+
57
+ def each_message(url)
58
+ Async do |task|
59
+ endpoint = Async::HTTP::Endpoint.parse(url)
60
+ Async::WebSocket::Client.connect(endpoint) do |connection|
61
+ while (message = connection.read)
62
+ yield message.buffer
63
+ end
64
+ end
65
+ end.wait
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,81 @@
1
+ require "json"
2
+
3
+ require_relative "../../tempest"
4
+ require_relative "../facet"
5
+
6
+ module Tempest
7
+ module Jetstream
8
+ Event = Data.define(
9
+ :kind,
10
+ :did,
11
+ :time_us,
12
+ :collection,
13
+ :operation,
14
+ :rkey,
15
+ :cid,
16
+ :text,
17
+ :created_at,
18
+ :subject_uri,
19
+ :facets,
20
+ :reply_parent_uri,
21
+ ) do
22
+ def initialize(kind:, did:, time_us:, collection:, operation:, rkey:, cid:,
23
+ text:, created_at:, subject_uri: nil, facets: [],
24
+ reply_parent_uri: nil)
25
+ super
26
+ end
27
+
28
+ def post?
29
+ collection == "app.bsky.feed.post"
30
+ end
31
+
32
+ def like?
33
+ collection == "app.bsky.feed.like"
34
+ end
35
+
36
+ def repost?
37
+ collection == "app.bsky.feed.repost"
38
+ end
39
+
40
+ def create?
41
+ operation == :create
42
+ end
43
+
44
+ def at_uri
45
+ "at://#{did}/#{collection}/#{rkey}"
46
+ end
47
+ end
48
+
49
+ module Decoder
50
+ module_function
51
+
52
+ def decode(payload)
53
+ message = JSON.parse(payload)
54
+ return nil unless message["kind"] == "commit"
55
+
56
+ commit = message["commit"] || {}
57
+ record = commit["record"] || {}
58
+ subject = record["subject"]
59
+ reply = record["reply"]
60
+ reply_parent = reply.is_a?(Hash) ? reply["parent"] : nil
61
+
62
+ Event.new(
63
+ kind: :commit,
64
+ did: message["did"],
65
+ time_us: message["time_us"],
66
+ collection: commit["collection"],
67
+ operation: commit["operation"]&.to_sym,
68
+ rkey: commit["rkey"],
69
+ cid: commit["cid"],
70
+ text: record["text"],
71
+ created_at: record["createdAt"],
72
+ subject_uri: subject.is_a?(Hash) ? subject["uri"] : nil,
73
+ facets: Tempest::Facet.parse(record["facets"]),
74
+ reply_parent_uri: reply_parent.is_a?(Hash) ? reply_parent["uri"] : nil,
75
+ )
76
+ rescue JSON::ParserError
77
+ nil
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,262 @@
1
+ require_relative "../../tempest"
2
+ require_relative "../debug_log"
3
+ require_relative "client"
4
+
5
+ module Tempest
6
+ module Jetstream
7
+ # Runs a Jetstream::Client in a background thread so the REPL stays
8
+ # responsive. The transport itself is fiber-based, but we keep that fiber
9
+ # off the main thread to avoid interleaving with Reline's blocking read.
10
+ # Owns reconnect-with-cursor so a flaky socket or sleep/wake cycle doesn't
11
+ # silently strand the live feed.
12
+ class StreamManager
13
+ DEFAULT_BACKOFF = [1, 2, 5, 10, 30].freeze
14
+ # Conservative replay window: Jetstream's default event-ttl is 24h, but
15
+ # Bluesky doesn't publicly commit to that for their hosted instances and
16
+ # boundary cases (clock skew, tail trim races) bite around the limit. If
17
+ # we've been offline longer than this, drop the cursor and let the Runner
18
+ # backfill via getTimeline.
19
+ CURSOR_WINDOW_SECONDS = 12 * 60 * 60
20
+ # How often we persist the cursor during a stable live-tail. 5s caps the
21
+ # worst-case event loss on crash to a few seconds of activity while
22
+ # keeping disk writes negligible on a busy stream.
23
+ DEFAULT_CURSOR_SAVE_INTERVAL = 5.0
24
+
25
+ def initialize(client:, backoff: DEFAULT_BACKOFF, sleeper: ->(s) { sleep(s) },
26
+ clock: -> { Time.now }, cursor_store: nil,
27
+ cursor_save_interval: DEFAULT_CURSOR_SAVE_INTERVAL,
28
+ filter: nil, logger: nil)
29
+ @client = client
30
+ @backoff = backoff
31
+ @sleeper = sleeper
32
+ @clock = clock
33
+ @cursor_store = cursor_store
34
+ @cursor_save_interval = cursor_save_interval
35
+ @filter = filter
36
+ @logger = logger || Tempest::DebugLog.build_null_logger
37
+ @thread = nil
38
+ @mutex = Mutex.new
39
+ @stopping = false
40
+ @cursor_state = { live: nil, saved: nil }
41
+ @last_event_at = nil
42
+ end
43
+
44
+ def start(&on_event)
45
+ @mutex.synchronize do
46
+ return if @thread&.alive?
47
+ @stopping = false
48
+ @thread = Thread.new { run(on_event) }
49
+ end
50
+ end
51
+
52
+ def stop
53
+ @logger.info("stream") do
54
+ live = @mutex.synchronize { @cursor_state[:live] }
55
+ "stopping final_cursor=#{live.inspect}"
56
+ end
57
+ @mutex.synchronize { @stopping = true }
58
+ thread = @mutex.synchronize do
59
+ t = @thread
60
+ @thread = nil
61
+ t
62
+ end
63
+ thread&.kill
64
+ thread&.join
65
+ flush_cursor!
66
+ end
67
+
68
+ def running?
69
+ @mutex.synchronize { !!@thread&.alive? }
70
+ end
71
+
72
+ # Time of the last event yielded by the underlying client, regardless of
73
+ # whether the filter accepted it. Watchdog reads this to detect a stalled
74
+ # socket (kernel still thinks the TCP connection is alive but no bytes
75
+ # are arriving).
76
+ def last_event_at
77
+ @mutex.synchronize { @last_event_at }
78
+ end
79
+
80
+ # Break a stalled each_event so the reconnect loop can run. Used by the
81
+ # Watchdog when the kernel hasn't surfaced the disconnect (e.g., after
82
+ # macOS sleep/wake). Safe to call from another thread or when no worker
83
+ # is running.
84
+ def force_reconnect
85
+ thread = @mutex.synchronize { @thread }
86
+ return unless thread&.alive?
87
+ @logger.warn("stream") { "force_reconnect requested" }
88
+ begin
89
+ thread.raise(Stalled.new("forced reconnect"))
90
+ rescue ThreadError
91
+ # Thread already exited between alive? and raise — nothing to do.
92
+ end
93
+ end
94
+
95
+ private
96
+
97
+ def run(on_event)
98
+ Thread.current.report_on_exception = false
99
+ cursor, startup_gap_since = load_initial_cursor
100
+ if startup_gap_since
101
+ @logger.warn("stream") { "startup_stale stale_since=#{startup_gap_since.iso8601}" }
102
+ on_event.call(StreamStatus.new(state: :gapped, since: startup_gap_since))
103
+ end
104
+ last_saved_cursor = cursor
105
+ last_save_at = nil
106
+ attempt = 0
107
+
108
+ @logger.info("stream") do
109
+ age = cursor ? cursor_age_seconds(cursor) : nil
110
+ "worker start cursor=#{cursor.inspect} cursor_age_seconds=#{age.inspect}"
111
+ end
112
+
113
+ until stopping?
114
+ # Detect a long offline gap from the cursor's age rather than from
115
+ # wall-clock disconnect timestamps. When the host machine sleeps,
116
+ # the background thread is suspended and we only learn about the
117
+ # outage at wake time — `disconnected_at` would therefore reflect
118
+ # the wake time, not the actual go-offline time, and the window
119
+ # check would never fire. The cursor (a unix-microseconds event
120
+ # timestamp from Jetstream) is unaffected by our suspension, so its
121
+ # age is a reliable proxy for "how long since we last saw events".
122
+ if attempt > 0 && cursor
123
+ cursor_age = @clock.call.to_f - (cursor / 1_000_000.0)
124
+ if cursor_age > CURSOR_WINDOW_SECONDS
125
+ since = Time.at(cursor / 1_000_000.0)
126
+ @logger.warn("stream") { "gapped cursor_age_seconds=#{cursor_age.round(1)} since=#{since.iso8601}" }
127
+ on_event.call(StreamStatus.new(state: :gapped, since: since))
128
+ cursor = nil
129
+ end
130
+ end
131
+
132
+ if attempt > 0
133
+ delay = @backoff[[attempt - 1, @backoff.length - 1].min]
134
+ @logger.info("stream") { "reconnecting attempt=#{attempt} cursor=#{cursor.inspect} backoff_just_slept=#{delay}" }
135
+ on_event.call(StreamStatus.new(state: :reconnecting))
136
+ end
137
+
138
+ error = nil
139
+ saw_event = false
140
+ @logger.info("stream") { "subscribe cursor=#{cursor.inspect}" }
141
+ begin
142
+ @client.each_event(cursor: cursor) do |event|
143
+ now = @clock.call
144
+ @mutex.synchronize { @last_event_at = now }
145
+ if event.respond_to?(:time_us) && event.time_us
146
+ cursor = event.time_us
147
+ @mutex.synchronize { @cursor_state[:live] = cursor }
148
+ if @cursor_store && cursor != last_saved_cursor
149
+ if last_save_at.nil? || (now - last_save_at) >= @cursor_save_interval
150
+ @cursor_store.save(time_us: cursor, at: now)
151
+ @logger.debug("stream") { "cursor save time_us=#{cursor}" }
152
+ last_saved_cursor = cursor
153
+ last_save_at = now
154
+ @mutex.synchronize { @cursor_state[:saved] = cursor }
155
+ end
156
+ end
157
+ end
158
+ next if @filter && !@filter.call(event)
159
+
160
+ if attempt > 0 && !saw_event
161
+ on_event.call(StreamStatus.new(state: :live))
162
+ end
163
+ saw_event = true
164
+ on_event.call(event)
165
+ end
166
+ rescue Stalled => e
167
+ error = e
168
+ @logger.warn("stream") { "stalled — forced reconnect cursor=#{cursor.inspect}" }
169
+ on_event.call(StreamError.new(e))
170
+ rescue => e
171
+ error = e
172
+ @logger.warn("stream") { "disconnect error=#{e.class}: #{e.message}" }
173
+ on_event.call(StreamError.new(e))
174
+ end
175
+
176
+ break if stopping?
177
+
178
+ # Force a final save on disconnect so we don't lose the tail between
179
+ # the throttle interval and the connection drop.
180
+ if @cursor_store && cursor && cursor != last_saved_cursor
181
+ now = @clock.call
182
+ @cursor_store.save(time_us: cursor, at: now)
183
+ @logger.debug("stream") { "cursor save (disconnect) time_us=#{cursor}" }
184
+ last_saved_cursor = cursor
185
+ last_save_at = now
186
+ @mutex.synchronize { @cursor_state[:saved] = cursor }
187
+ end
188
+
189
+ on_event.call(
190
+ StreamStatus.new(
191
+ state: :disconnected,
192
+ reason: error ? :error : :closed,
193
+ error: error,
194
+ ),
195
+ )
196
+
197
+ delay = @backoff[[attempt, @backoff.length - 1].min]
198
+ @sleeper.call(delay)
199
+ attempt += 1
200
+ end
201
+
202
+ @logger.info("stream") { "worker exit final_cursor=#{cursor.inspect}" }
203
+ end
204
+
205
+ def cursor_age_seconds(cursor)
206
+ return nil unless cursor
207
+ (@clock.call - Time.at(cursor / 1_000_000.0)).round(1)
208
+ rescue StandardError
209
+ nil
210
+ end
211
+
212
+ def stopping?
213
+ @mutex.synchronize { @stopping }
214
+ end
215
+
216
+ # Returns [cursor, gap_since]. `gap_since` is non-nil when a persisted
217
+ # cursor existed but is too old to replay safely; the caller emits
218
+ # :gapped (so the Runner backfills via getTimeline) and subscribes
219
+ # without a cursor.
220
+ def load_initial_cursor
221
+ return [nil, nil] unless @cursor_store
222
+ stored = @cursor_store.load
223
+ return [nil, nil] unless stored && stored[:time_us] && stored[:saved_at]
224
+ age = @clock.call - stored[:saved_at]
225
+ return [nil, stored[:saved_at]] if age > CURSOR_WINDOW_SECONDS
226
+ [stored[:time_us], nil]
227
+ end
228
+
229
+ # Called from `stop` after the worker thread has been killed. Ensures the
230
+ # most recent in-memory cursor (which the throttle may have skipped over)
231
+ # makes it to disk; otherwise a crash during a stable live-tail would
232
+ # roll us back by `cursor_save_interval` worth of events on next launch.
233
+ def flush_cursor!
234
+ return unless @cursor_store
235
+ live, saved = @mutex.synchronize { [@cursor_state[:live], @cursor_state[:saved]] }
236
+ return unless live && live != saved
237
+ @cursor_store.save(time_us: live, at: @clock.call)
238
+ @mutex.synchronize { @cursor_state[:saved] = live }
239
+ end
240
+ end
241
+
242
+ # Raised inside the worker thread by StreamManager#force_reconnect to
243
+ # break a stalled each_event. The run loop catches it and treats it as a
244
+ # disconnect, preserving the existing reconnect-with-cursor flow.
245
+ class Stalled < StandardError; end
246
+
247
+ StreamError = Struct.new(:cause)
248
+
249
+ # Lifecycle status emitted alongside Event/StreamError on the same
250
+ # on_event callback so the REPL can render "-- disconnected" /
251
+ # "-- reconnecting" / "-- live" lines without coupling to the manager's
252
+ # internals. `state` is one of :disconnected | :reconnecting | :live |
253
+ # :gapped. `reason` is :closed | :error for :disconnected. `error` is the
254
+ # underlying exception when reason == :error. `since` is the disconnect
255
+ # time when state == :gapped.
256
+ StreamStatus = Data.define(:state, :reason, :error, :since) do
257
+ def initialize(state:, reason: nil, error: nil, since: nil)
258
+ super
259
+ end
260
+ end
261
+ end
262
+ end
@@ -0,0 +1,34 @@
1
+ require "set"
2
+
3
+ require_relative "../../tempest"
4
+
5
+ module Tempest
6
+ module Jetstream
7
+ # Decides whether the Jetstream subscription can use server-side wantedDids
8
+ # filtering or has to fall back to a firehose-plus-client-side-filter
9
+ # arrangement. Jetstream caps wantedDids at 10000 DIDs per subscription, so
10
+ # anyone following more than that has to receive the full stream and drop
11
+ # uninteresting events locally.
12
+ Plan = Data.define(:wanted_dids, :filter)
13
+
14
+ module Subscription
15
+ module_function
16
+
17
+ def build(self_did:, follows:, cap: 10_000)
18
+ ordered = [self_did]
19
+ follows.each do |row|
20
+ did = row[:did] || row["did"]
21
+ next if did.nil? || did == self_did
22
+ ordered << did
23
+ end
24
+
25
+ if ordered.length <= cap
26
+ Plan.new(wanted_dids: ordered, filter: nil)
27
+ else
28
+ allowed = ordered.to_set
29
+ Plan.new(wanted_dids: [], filter: ->(event) { allowed.include?(event.did) })
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end