firehose-rb 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +59 -26
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b3400d6b686eb7ea407bd385951304b1898629e2ed396baec585528728da7b2d
|
|
4
|
+
data.tar.gz: 7ecaa46e86e08f063219eeed945c7bf2b2c9819e8e27374cb3dfbc85906eb073
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 86bd6635074e868f78d67caff1c3f6ef5c175545dfcbfdf6bdc2ef642147631d2bd48bbad401bc4b8a4b3eee96dadcc87737aec3c95b5daaac8711ccbf8b8816
|
|
7
|
+
data.tar.gz: 8ff3e1afa478900d65569c4260177016df775bb3829a3c3361897c8d4ff97e3b6c0221c2c6dc979cace6964ef0e95976ca823cfe96001302cbe87bfb78fe5770
|
data/README.md
CHANGED
|
@@ -1,27 +1,33 @@
|
|
|
1
1
|
# firehose-rb
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://rubygems.org/gems/firehose-rb)
|
|
4
|
+
|
|
5
|
+
Ruby client for the [Firehose](https://firehose.dev) real-time web monitoring API. Define rules, stream matching pages as they're discovered, and build content pipelines on top of the live web.
|
|
4
6
|
|
|
5
7
|
## Installation
|
|
6
8
|
|
|
7
9
|
```ruby
|
|
8
|
-
gem "firehose-rb",
|
|
10
|
+
gem "firehose-rb", "~> 0.1"
|
|
9
11
|
```
|
|
10
12
|
|
|
13
|
+
Then `bundle install`.
|
|
14
|
+
|
|
11
15
|
## Configuration
|
|
12
16
|
|
|
13
17
|
```ruby
|
|
14
18
|
Firehose.configure do |c|
|
|
15
|
-
c.management_key = "fhm_...
|
|
16
|
-
c.tap_token
|
|
17
|
-
c.base_url
|
|
18
|
-
c.timeout
|
|
19
|
+
c.management_key = ENV["FIREHOSE_MANAGEMENT_KEY"] # fhm_...
|
|
20
|
+
c.tap_token = ENV["FIREHOSE_TAP_TOKEN"] # fh_...
|
|
21
|
+
c.base_url = "https://api.firehose.dev" # default
|
|
22
|
+
c.timeout = 300 # SSE timeout in seconds
|
|
19
23
|
end
|
|
20
24
|
```
|
|
21
25
|
|
|
22
26
|
## Usage
|
|
23
27
|
|
|
24
|
-
### Rules
|
|
28
|
+
### Rules
|
|
29
|
+
|
|
30
|
+
Rules tell Firehose what to watch for. They use Lucene query syntax.
|
|
25
31
|
|
|
26
32
|
```ruby
|
|
27
33
|
client = Firehose.client
|
|
@@ -33,7 +39,7 @@ rule = client.create_rule(
|
|
|
33
39
|
quality: true
|
|
34
40
|
)
|
|
35
41
|
|
|
36
|
-
# List rules
|
|
42
|
+
# List all rules
|
|
37
43
|
rules = client.list_rules
|
|
38
44
|
|
|
39
45
|
# Delete a rule
|
|
@@ -42,38 +48,65 @@ client.delete_rule(rule.id)
|
|
|
42
48
|
|
|
43
49
|
### Streaming
|
|
44
50
|
|
|
51
|
+
Connect to the SSE stream and process matching pages in real time.
|
|
52
|
+
|
|
45
53
|
```ruby
|
|
46
54
|
client = Firehose.client
|
|
47
55
|
|
|
48
|
-
#
|
|
56
|
+
# Persist offsets so you can resume after restart
|
|
49
57
|
client.on_offset { |offset| save_offset(offset) }
|
|
50
58
|
|
|
51
59
|
# Stream events (auto-reconnects with exponential backoff)
|
|
52
60
|
client.stream(since: "1h") do |event|
|
|
53
|
-
event.id
|
|
54
|
-
event.document.url
|
|
55
|
-
event.document.title
|
|
56
|
-
event.document.markdown
|
|
57
|
-
event.
|
|
58
|
-
event.
|
|
61
|
+
event.id # String — unique event ID
|
|
62
|
+
event.document.url # String — page URL
|
|
63
|
+
event.document.title # String — page title
|
|
64
|
+
event.document.markdown # String — full page content as markdown
|
|
65
|
+
event.document.categories # Array — page categories
|
|
66
|
+
event.document.types # Array — page types (article, blog, etc.)
|
|
67
|
+
event.document.language # String — detected language
|
|
68
|
+
event.document.publish_time # Time — when the page was published
|
|
69
|
+
event.matched_rule # String — which rule tag matched
|
|
70
|
+
event.matched_at # Time — when the match occurred
|
|
59
71
|
end
|
|
60
72
|
|
|
61
|
-
# Stop streaming
|
|
73
|
+
# Stop streaming gracefully
|
|
62
74
|
client.stop_stream
|
|
63
75
|
```
|
|
64
76
|
|
|
77
|
+
### Resilience
|
|
78
|
+
|
|
79
|
+
- Auto-reconnect with exponential backoff (1s, 2s, 4s, ... max 30s)
|
|
80
|
+
- `Last-Event-ID` header sent on reconnect for automatic resume
|
|
81
|
+
- `on_offset` callback for persisting stream position
|
|
82
|
+
- Authentication errors (`401/403`) are not retried
|
|
83
|
+
|
|
65
84
|
## Data Structures
|
|
66
85
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
86
|
+
| Struct | Fields |
|
|
87
|
+
|--------|--------|
|
|
88
|
+
| `Firehose::Rule` | id, value, tag, quality, nsfw |
|
|
89
|
+
| `Firehose::Event` | id, document, matched_rule, matched_at |
|
|
90
|
+
| `Firehose::Document` | url, title, markdown, categories, types, language, publish_time |
|
|
91
|
+
|
|
92
|
+
## Errors
|
|
93
|
+
|
|
94
|
+
| Error | Cause |
|
|
95
|
+
|-------|-------|
|
|
96
|
+
| `Firehose::AuthenticationError` | Invalid management_key or tap_token |
|
|
97
|
+
| `Firehose::RateLimitError` | Too many requests (429) |
|
|
98
|
+
| `Firehose::ConnectionError` | Network or HTTP errors |
|
|
99
|
+
| `Firehose::TimeoutError` | Stream or request timeout |
|
|
100
|
+
|
|
101
|
+
## Requirements
|
|
102
|
+
|
|
103
|
+
- Ruby >= 3.1
|
|
104
|
+
- [Faraday](https://github.com/lostisland/faraday) ~> 2.0
|
|
105
|
+
|
|
106
|
+
## Used by
|
|
70
107
|
|
|
71
|
-
|
|
108
|
+
Built for [InventList](https://inventlist.com) — a home for indie builders that turns the live web into weekly signals for makers and their agents.
|
|
72
109
|
|
|
73
|
-
|
|
74
|
-
- `Firehose::RateLimitError` — rate limited
|
|
75
|
-
- `Firehose::ConnectionError` — connection failures
|
|
76
|
-
- `Firehose::TimeoutError` — request timeout
|
|
110
|
+
## License
|
|
77
111
|
|
|
78
|
-
|
|
79
|
-
Authentication errors are not retried.
|
|
112
|
+
MIT
|