firehose-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ef4dac25083ab9471245c47df41233e04e19e12282153e43e630603195d7e1cc
4
+ data.tar.gz: 36db7c5b6534a76cbdd88158792fc614524ce7fa3ccce0ed396eb608fa478484
5
+ SHA512:
6
+ metadata.gz: 9335a0468cb38b470b67edde432d05e6b96ac3e6209c2b66a51667754f9888eeccb9f8bf8944dcb91c175f15dbe8528343f01bc3c2e93429db55ccbb5003a2ac
7
+ data.tar.gz: cec5635b29ca8b3a592fc79b4333baf6e1ae472f47bf2858d0cd11e17849ebe5b55233cae584f87fae1039b8a6c8d282dd7561b8a66cfb5d962a8fcb8a974dcc
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # firehose-rb
2
+
3
+ Ruby client for the Firehose real-time web monitoring API. SSE streaming with auto-reconnect, rules CRUD, and offset tracking.
4
+
5
+ ## Installation
6
+
7
+ ```ruby
8
+ gem "firehose-rb", path: "../firehose-rb"
9
+ ```
10
+
11
+ ## Configuration
12
+
13
+ ```ruby
14
+ Firehose.configure do |c|
15
+ c.management_key = "fhm_..."
16
+ c.tap_token = "fh_..."
17
+ c.base_url = "https://api.firehose.dev" # default
18
+ c.timeout = 300 # SSE timeout in seconds
19
+ end
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ### Rules CRUD
25
+
26
+ ```ruby
27
+ client = Firehose.client
28
+
29
+ # Create a rule
30
+ rule = client.create_rule(
31
+ value: '"AI agent" AND language:"en" AND recent:7d',
32
+ tag: "ai-agent",
33
+ quality: true
34
+ )
35
+
36
+ # List rules
37
+ rules = client.list_rules
38
+
39
+ # Delete a rule
40
+ client.delete_rule(rule.id)
41
+ ```
42
+
43
+ ### Streaming
44
+
45
+ ```ruby
46
+ client = Firehose.client
47
+
48
+ # Track offsets for resume
49
+ client.on_offset { |offset| save_offset(offset) }
50
+
51
+ # Stream events (auto-reconnects with exponential backoff)
52
+ client.stream(since: "1h") do |event|
53
+ event.id # String
54
+ event.document.url # String
55
+ event.document.title # String
56
+ event.document.markdown # String (full page content)
57
+ event.matched_rule # String (tag)
58
+ event.matched_at # Time
59
+ end
60
+
61
+ # Stop streaming
62
+ client.stop_stream
63
+ ```
64
+
65
+ ## Data Structures
66
+
67
+ - `Firehose::Rule` — id, value, tag, quality, nsfw
68
+ - `Firehose::Event` — id, document, matched_rule, matched_at
69
+ - `Firehose::Document` — url, title, markdown, categories, types, language, publish_time
70
+
71
+ ## Error Handling
72
+
73
+ - `Firehose::AuthenticationError` — invalid API keys
74
+ - `Firehose::RateLimitError` — rate limited
75
+ - `Firehose::ConnectionError` — connection failures
76
+ - `Firehose::TimeoutError` — request timeout
77
+
78
+ SSE streaming auto-reconnects with exponential backoff (1s → 2s → 4s → max 30s).
79
+ Authentication errors are not retried.
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "json"
5
+
6
+ module Firehose
7
+ class Client
8
+ def initialize(config: Firehose.configuration)
9
+ @config = config
10
+ @stream = Stream.new(config: config)
11
+ end
12
+
13
+ # Rules CRUD
14
+
15
+ def create_rule(value:, tag: nil, quality: false, nsfw: false)
16
+ body = { value: value, tag: tag, quality: quality, nsfw: nsfw }.compact
17
+ response = management_connection.post("/rules", body.to_json)
18
+ handle_response(response)
19
+ Rule.from_hash(JSON.parse(response.body))
20
+ end
21
+
22
+ def list_rules
23
+ response = management_connection.get("/rules")
24
+ handle_response(response)
25
+ JSON.parse(response.body).map { |r| Rule.from_hash(r) }
26
+ end
27
+
28
+ def delete_rule(rule_id)
29
+ response = management_connection.delete("/rules/#{rule_id}")
30
+ handle_response(response)
31
+ true
32
+ end
33
+
34
+ # Streaming
35
+
36
+ def stream(since: nil, &block)
37
+ raise ArgumentError, "block required" unless block_given?
38
+
39
+ @stream.connect(since: since, &block)
40
+ end
41
+
42
+ def on_offset(&block)
43
+ @stream.on_offset(&block)
44
+ end
45
+
46
+ def stop_stream
47
+ @stream.stop
48
+ end
49
+
50
+ private
51
+
52
+ def management_connection
53
+ @management_connection ||= Faraday.new(url: @config.base_url) do |f|
54
+ f.headers["Authorization"] = "Bearer #{@config.management_key}"
55
+ f.headers["Content-Type"] = "application/json"
56
+ f.adapter Faraday.default_adapter
57
+ end
58
+ end
59
+
60
+ def handle_response(response)
61
+ case response.status
62
+ when 200..299 then nil
63
+ when 401, 403 then raise AuthenticationError, "Invalid management key"
64
+ when 429 then raise RateLimitError, "Rate limited"
65
+ else raise Error, "HTTP #{response.status}: #{response.body}"
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Firehose
4
+ Document = Data.define(:url, :title, :markdown, :categories, :types, :language, :publish_time) do
5
+ def initialize(url:, title: nil, markdown: nil, categories: [], types: [], language: nil, publish_time: nil)
6
+ super
7
+ end
8
+
9
+ def self.from_hash(hash)
10
+ new(
11
+ url: hash["url"] || hash[:url],
12
+ title: hash["title"] || hash[:title],
13
+ markdown: hash["markdown"] || hash[:markdown],
14
+ categories: Array(hash["categories"] || hash[:categories]),
15
+ types: Array(hash["types"] || hash[:types]),
16
+ language: hash["language"] || hash[:language],
17
+ publish_time: parse_time(hash["publish_time"] || hash[:publish_time])
18
+ )
19
+ end
20
+
21
+ def self.parse_time(value)
22
+ return nil if value.nil?
23
+ return value if value.is_a?(Time)
24
+
25
+ Time.parse(value.to_s)
26
+ rescue ArgumentError
27
+ nil
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Firehose
4
+ class Error < StandardError; end
5
+ class AuthenticationError < Error; end
6
+ class RateLimitError < Error; end
7
+ class ConnectionError < Error; end
8
+ class TimeoutError < Error; end
9
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Firehose
6
+ Event = Data.define(:id, :document, :matched_rule, :matched_at) do
7
+ def initialize(id:, document:, matched_rule: nil, matched_at: nil)
8
+ super
9
+ end
10
+
11
+ def self.from_sse(data, id: nil)
12
+ parsed = JSON.parse(data)
13
+
14
+ doc = Document.from_hash(parsed["document"] || parsed)
15
+ matched_at = parsed["matched_at"] ? Time.parse(parsed["matched_at"]) : nil
16
+
17
+ new(
18
+ id: id || parsed["id"],
19
+ document: doc,
20
+ matched_rule: parsed["matched_rule"] || parsed["tag"],
21
+ matched_at: matched_at
22
+ )
23
+ rescue JSON::ParserError => e
24
+ raise Firehose::Error, "Failed to parse SSE event: #{e.message}"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Firehose
4
+ Rule = Data.define(:id, :value, :tag, :quality, :nsfw) do
5
+ def initialize(id:, value:, tag: nil, quality: false, nsfw: false)
6
+ super
7
+ end
8
+
9
+ def self.from_hash(hash)
10
+ new(
11
+ id: hash["id"] || hash[:id],
12
+ value: hash["value"] || hash[:value],
13
+ tag: hash["tag"] || hash[:tag],
14
+ quality: hash["quality"] || hash[:quality] || false,
15
+ nsfw: hash["nsfw"] || hash[:nsfw] || false
16
+ )
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+
6
+ module Firehose
7
+ class Stream
8
+ MAX_BACKOFF = 30
9
+ INITIAL_BACKOFF = 1
10
+
11
+ attr_reader :last_event_id
12
+
13
+ def initialize(config:)
14
+ @config = config
15
+ @last_event_id = nil
16
+ @on_offset = nil
17
+ @running = false
18
+ end
19
+
20
+ def on_offset(&block)
21
+ @on_offset = block
22
+ end
23
+
24
+ def stop
25
+ @running = false
26
+ end
27
+
28
+ def connect(since: nil, &block)
29
+ @running = true
30
+ backoff = INITIAL_BACKOFF
31
+
32
+ while @running
33
+ begin
34
+ stream_events(since: since, &block)
35
+ backoff = INITIAL_BACKOFF
36
+ rescue Errno::ECONNREFUSED, Errno::ECONNRESET, Errno::ETIMEDOUT,
37
+ Net::OpenTimeout, Net::ReadTimeout, IOError => e
38
+ break unless @running
39
+
40
+ sleep(backoff)
41
+ backoff = [backoff * 2, MAX_BACKOFF].min
42
+ since = nil # use last_event_id on reconnect
43
+ rescue Firehose::AuthenticationError
44
+ raise
45
+ rescue StandardError => e
46
+ break unless @running
47
+
48
+ sleep(backoff)
49
+ backoff = [backoff * 2, MAX_BACKOFF].min
50
+ since = nil
51
+ end
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def stream_events(since: nil, &block)
58
+ uri = build_uri(since: since)
59
+ headers = build_headers
60
+
61
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
62
+ open_timeout: 10, read_timeout: @config.timeout) do |http|
63
+ request = Net::HTTP::Get.new(uri, headers)
64
+ buffer = +""
65
+
66
+ http.request(request) do |response|
67
+ handle_response_status(response)
68
+
69
+ response.read_body do |chunk|
70
+ break unless @running
71
+
72
+ buffer << chunk
73
+ process_buffer(buffer, &block)
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ def build_uri(since: nil)
80
+ uri = URI.join(@config.base_url, "/stream")
81
+ params = {}
82
+ params["since"] = since if since
83
+ uri.query = URI.encode_www_form(params) if params.any?
84
+ uri
85
+ end
86
+
87
+ def build_headers
88
+ headers = {
89
+ "Accept" => "text/event-stream",
90
+ "Authorization" => "Bearer #{@config.tap_token}",
91
+ "Cache-Control" => "no-cache"
92
+ }
93
+ headers["Last-Event-ID"] = @last_event_id if @last_event_id
94
+ headers
95
+ end
96
+
97
+ def handle_response_status(response)
98
+ case response.code.to_i
99
+ when 200 then nil
100
+ when 401, 403 then raise Firehose::AuthenticationError, "Invalid tap token"
101
+ when 429 then raise Firehose::RateLimitError, "Rate limited"
102
+ else raise Firehose::ConnectionError, "HTTP #{response.code}: #{response.message}"
103
+ end
104
+ end
105
+
106
+ def process_buffer(buffer, &block)
107
+ while (idx = buffer.index("\n\n"))
108
+ raw_event = buffer.slice!(0, idx + 2)
109
+ parse_sse_event(raw_event, &block)
110
+ end
111
+ end
112
+
113
+ def parse_sse_event(raw, &block)
114
+ id = nil
115
+ data_lines = []
116
+
117
+ raw.each_line do |line|
118
+ line = line.chomp
119
+ if line.start_with?("id:")
120
+ id = line.sub("id:", "").strip
121
+ elsif line.start_with?("data:")
122
+ data_lines << line.sub("data:", "").strip
123
+ end
124
+ end
125
+
126
+ return if data_lines.empty?
127
+
128
+ data = data_lines.join("\n")
129
+ event = Event.from_sse(data, id: id)
130
+
131
+ @last_event_id = id if id
132
+ @on_offset&.call(@last_event_id)
133
+
134
+ block.call(event)
135
+ end
136
+ end
137
+ end
data/lib/firehose.rb ADDED
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "firehose/error"
4
+ require_relative "firehose/rule"
5
+ require_relative "firehose/document"
6
+ require_relative "firehose/event"
7
+ require_relative "firehose/stream"
8
+ require_relative "firehose/client"
9
+
10
+ module Firehose
11
+ class Configuration
12
+ attr_accessor :management_key, :tap_token, :base_url, :timeout
13
+
14
+ def initialize
15
+ @base_url = "https://api.firehose.dev"
16
+ @timeout = 300
17
+ end
18
+ end
19
+
20
+ class << self
21
+ def configuration
22
+ @configuration ||= Configuration.new
23
+ end
24
+
25
+ def configure
26
+ yield(configuration)
27
+ end
28
+
29
+ def reset_configuration!
30
+ @configuration = Configuration.new
31
+ end
32
+
33
+ def client
34
+ Client.new
35
+ end
36
+ end
37
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: firehose-rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nauman Tariq
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: faraday
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '2.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: rspec
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.12'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.12'
40
+ - !ruby/object:Gem::Dependency
41
+ name: webmock
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.18'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.18'
54
+ description: SSE streaming client with rules CRUD, auto-reconnect, and offset tracking
55
+ for the Firehose API.
56
+ email:
57
+ - nauman@intellecta.co
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - README.md
63
+ - lib/firehose.rb
64
+ - lib/firehose/client.rb
65
+ - lib/firehose/document.rb
66
+ - lib/firehose/error.rb
67
+ - lib/firehose/event.rb
68
+ - lib/firehose/rule.rb
69
+ - lib/firehose/stream.rb
70
+ homepage: https://github.com/nauman/firehose-rb
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '3.1'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ requirements: []
88
+ rubygems_version: 3.6.9
89
+ specification_version: 4
90
+ summary: Ruby client for the Firehose real-time web monitoring API
91
+ test_files: []