feedbag 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.markdown +39 -15
- data/lib/feedbag.rb +69 -16
- metadata +26 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: aed91daea08560f45514eb565a8f5524a02bb39cff16acec30d4fcbf72113944
|
|
4
|
+
data.tar.gz: a873374c6b527f7d55c582fe7126e6c6d26f873828a4f5bf804316d7c113218c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a512e9f8d6e3994d14812681631faa38eb38d164d09b8fe5c8adb2f754a4728460e93079d01048c89e65c4f929276268fde16bc88b18cfa4d9736b411d94b219
|
|
7
|
+
data.tar.gz: c8be3cae759ded40c08737dea5b3547c0f7a9e9fd5a2403bd5cda280bae8af69227edbb34ecfc1a72b17931acb35935605f9cd682808bdd575383ff05bf0144f
|
data/README.markdown
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Feedbag
|
|
2
2
|
=======
|
|
3
3
|
|
|
4
|
-
Feedbag is Ruby's favorite auto-discovery tool/library!
|
|
4
|
+
Feedbag is Ruby's favorite feed auto-discovery tool/library!
|
|
5
5
|
|
|
6
6
|
### Quick synopsis
|
|
7
7
|
|
|
@@ -10,9 +10,9 @@ Feedbag is Ruby's favorite auto-discovery tool/library!
|
|
|
10
10
|
=> true
|
|
11
11
|
>> Feedbag.find "damog.net/blog"
|
|
12
12
|
=> ["http://damog.net/blog/atom.xml"]
|
|
13
|
-
>> Feedbag.feed? "
|
|
13
|
+
>> Feedbag.feed? "google.com"
|
|
14
14
|
=> false
|
|
15
|
-
>> Feedbag.feed?("https://
|
|
15
|
+
>> Feedbag.feed?("https://daringfireball.net/feeds/main")
|
|
16
16
|
=> true
|
|
17
17
|
```
|
|
18
18
|
|
|
@@ -28,38 +28,62 @@ You can also use the command line tool for quick queries, if you install the gem
|
|
|
28
28
|
|
|
29
29
|
» feedbag https://www.ruby-lang.org/en/
|
|
30
30
|
== https://www.ruby-lang.org/en/:
|
|
31
|
-
|
|
31
|
+
- https://www.ruby-lang.org/en/feeds/news.rss
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
### Usage
|
|
35
|
-
|
|
35
|
+
|
|
36
|
+
Feedbag will find RSS, Atom, and JSON feed types:
|
|
36
37
|
|
|
37
38
|
```ruby
|
|
38
|
-
|
|
39
|
-
=> ["https://daringfireball.net/feeds/main", "https://daringfireball.net/feeds/json"
|
|
39
|
+
>> Feedbag.find('https://daringfireball.net')
|
|
40
|
+
=> ["https://daringfireball.net/feeds/main", "https://daringfireball.net/feeds/json"]
|
|
40
41
|
```
|
|
41
42
|
|
|
42
|
-
|
|
43
|
+
#### Custom User-Agent
|
|
44
|
+
|
|
45
|
+
Feedbag defaults to a User-Agent string of `Feedbag/VERSION`, but you can override it:
|
|
43
46
|
|
|
44
47
|
```ruby
|
|
45
|
-
|
|
46
|
-
=> ["http://feeds.kottke.org/main"
|
|
47
|
-
|
|
48
|
+
>> Feedbag.find('https://kottke.org', 'User-Agent' => "My Personal Agent/1.0.1")
|
|
49
|
+
=> ["http://feeds.kottke.org/main"]
|
|
50
|
+
```
|
|
48
51
|
|
|
49
|
-
|
|
52
|
+
Other options passed to `find` will be forwarded to OpenURI:
|
|
50
53
|
|
|
51
54
|
```ruby
|
|
52
|
-
Feedbag.find("https://
|
|
55
|
+
Feedbag.find("https://example.com", 'User-Agent' => "My Agent/1.0", open_timeout: 10)
|
|
53
56
|
```
|
|
54
57
|
|
|
55
|
-
|
|
58
|
+
See [OpenURI options](https://rubyapi.org/o/openuri/openread#method-i-open) for more details.
|
|
59
|
+
|
|
60
|
+
#### Custom Logger
|
|
61
|
+
|
|
62
|
+
By default, errors are written to `$stderr`. You can redirect them to a custom logger:
|
|
63
|
+
|
|
64
|
+
```ruby
|
|
65
|
+
# Use Rails logger
|
|
66
|
+
Feedbag.logger = Rails.logger
|
|
56
67
|
|
|
68
|
+
# Or silence all output
|
|
69
|
+
Feedbag.logger = Logger.new('/dev/null')
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
#### Non-ASCII URL Support
|
|
73
|
+
|
|
74
|
+
Feedbag handles internationalized URLs (IRIs) with non-ASCII characters:
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
>> Feedbag.find("https://example.com/中文/feed/")
|
|
78
|
+
# Works! URLs are automatically normalized
|
|
79
|
+
```
|
|
57
80
|
|
|
58
81
|
### Why should you use it?
|
|
59
82
|
|
|
60
|
-
- Because it only uses [Nokogiri](http://nokogiri.org/) as
|
|
83
|
+
- Because it only uses [Nokogiri](http://nokogiri.org/) and [Addressable](https://github.com/sporkmonger/addressable) as dependencies.
|
|
61
84
|
- Because it follows modern feed filename conventions (like those ones used by WordPress blogs, or Blogger, etc).
|
|
62
85
|
- Because it's a single file you can embed easily in your application.
|
|
86
|
+
- Because it handles international URLs with non-ASCII characters.
|
|
63
87
|
- Because it's faster than anything else.
|
|
64
88
|
|
|
65
89
|
### Author
|
data/lib/feedbag.rb
CHANGED
|
@@ -6,9 +6,41 @@ require "rubygems"
|
|
|
6
6
|
require "nokogiri"
|
|
7
7
|
require "open-uri"
|
|
8
8
|
require "net/http"
|
|
9
|
+
require "logger"
|
|
10
|
+
|
|
11
|
+
begin
|
|
12
|
+
require "addressable/uri"
|
|
13
|
+
rescue LoadError
|
|
14
|
+
# addressable will be loaded after bundle install
|
|
15
|
+
end
|
|
9
16
|
|
|
10
17
|
class Feedbag
|
|
11
|
-
VERSION = '1.0.
|
|
18
|
+
VERSION = '1.0.1'
|
|
19
|
+
|
|
20
|
+
# Configurable logger for error output
|
|
21
|
+
# Default writes to $stderr. Can be set to Rails.logger or any Logger-compatible object.
|
|
22
|
+
#
|
|
23
|
+
# @example Silence all output
|
|
24
|
+
# Feedbag.logger = Logger.new('/dev/null')
|
|
25
|
+
#
|
|
26
|
+
# @example Use Rails logger
|
|
27
|
+
# Feedbag.logger = Rails.logger
|
|
28
|
+
#
|
|
29
|
+
class << self
|
|
30
|
+
attr_writer :logger
|
|
31
|
+
|
|
32
|
+
def logger
|
|
33
|
+
@logger ||= default_logger
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def default_logger
|
|
39
|
+
logger = Logger.new($stderr)
|
|
40
|
+
logger.formatter = proc { |severity, _datetime, _progname, msg| "#{msg}\n" }
|
|
41
|
+
logger
|
|
42
|
+
end
|
|
43
|
+
end
|
|
12
44
|
CONTENT_TYPES = [
|
|
13
45
|
'application/x.atom+xml',
|
|
14
46
|
'application/atom+xml',
|
|
@@ -25,7 +57,7 @@ class Feedbag
|
|
|
25
57
|
end
|
|
26
58
|
|
|
27
59
|
def self.find(url, options = {})
|
|
28
|
-
new(options: options).find(url,
|
|
60
|
+
new(options: options).find(url, options)
|
|
29
61
|
end
|
|
30
62
|
|
|
31
63
|
def initialize(options: nil)
|
|
@@ -34,9 +66,23 @@ class Feedbag
|
|
|
34
66
|
@options["User-Agent"] ||= "Feedbag/#{VERSION}"
|
|
35
67
|
end
|
|
36
68
|
|
|
69
|
+
# Normalize a URL to handle non-ASCII characters (IRIs)
|
|
70
|
+
# This converts internationalized URLs to valid ASCII URIs
|
|
71
|
+
def self.normalize_url(url)
|
|
72
|
+
return url if url.nil? || url.empty?
|
|
73
|
+
if defined?(Addressable::URI)
|
|
74
|
+
Addressable::URI.parse(url).normalize.to_s
|
|
75
|
+
else
|
|
76
|
+
url
|
|
77
|
+
end
|
|
78
|
+
rescue Addressable::URI::InvalidURIError
|
|
79
|
+
url
|
|
80
|
+
end
|
|
81
|
+
|
|
37
82
|
def feed?(url)
|
|
38
|
-
#
|
|
39
|
-
|
|
83
|
+
# Normalize URL to handle non-ASCII characters
|
|
84
|
+
normalized_url = Feedbag.normalize_url(url)
|
|
85
|
+
url_uri = URI.parse(normalized_url)
|
|
40
86
|
url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
|
|
41
87
|
url << "?#{url_uri.query}" if url_uri.query
|
|
42
88
|
|
|
@@ -52,7 +98,9 @@ class Feedbag
|
|
|
52
98
|
end
|
|
53
99
|
|
|
54
100
|
def find(url, options = {})
|
|
55
|
-
|
|
101
|
+
# Normalize URL to handle non-ASCII characters
|
|
102
|
+
normalized_url = Feedbag.normalize_url(url)
|
|
103
|
+
url_uri = URI.parse(normalized_url)
|
|
56
104
|
url = nil
|
|
57
105
|
if url_uri.scheme.nil?
|
|
58
106
|
url = "http://#{url_uri.to_s}"
|
|
@@ -76,11 +124,11 @@ class Feedbag
|
|
|
76
124
|
# TODO: actually find out timeout. use Terminator?
|
|
77
125
|
# $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
|
|
78
126
|
rescue => ex
|
|
79
|
-
|
|
127
|
+
Feedbag.logger.error "#{ex.class} error occurred with: `#{url}': #{ex.message}"
|
|
80
128
|
end
|
|
81
129
|
|
|
82
130
|
begin
|
|
83
|
-
html = URI.open(url,
|
|
131
|
+
html = URI.open(url, @options) do |f|
|
|
84
132
|
content_type = f.content_type.downcase
|
|
85
133
|
if content_type == "application/octet-stream" # open failed
|
|
86
134
|
content_type = f.meta["content-type"].gsub(/;.*$/, '')
|
|
@@ -135,13 +183,13 @@ class Feedbag
|
|
|
135
183
|
end
|
|
136
184
|
end
|
|
137
185
|
rescue Timeout::Error => err
|
|
138
|
-
|
|
186
|
+
Feedbag.logger.error "Timeout error occurred with `#{url}: #{err}'"
|
|
139
187
|
rescue OpenURI::HTTPError => the_error
|
|
140
|
-
|
|
188
|
+
Feedbag.logger.error "Error occurred with `#{url}': #{the_error}"
|
|
141
189
|
rescue SocketError => err
|
|
142
|
-
|
|
190
|
+
Feedbag.logger.error "Socket error occurred with: `#{url}': #{err}"
|
|
143
191
|
rescue => ex
|
|
144
|
-
|
|
192
|
+
Feedbag.logger.error "#{ex.class} error occurred with: `#{url}': #{ex.message}"
|
|
145
193
|
ensure
|
|
146
194
|
return @feeds
|
|
147
195
|
end
|
|
@@ -160,19 +208,24 @@ class Feedbag
|
|
|
160
208
|
# puts "#{feed_url} - #{orig_url}"
|
|
161
209
|
url = feed_url.sub(/^feed:/, '').strip
|
|
162
210
|
|
|
211
|
+
# Normalize URL to handle non-ASCII characters
|
|
212
|
+
url = Feedbag.normalize_url(url)
|
|
213
|
+
|
|
163
214
|
if base_uri
|
|
164
215
|
# url = base_uri + feed_url
|
|
165
|
-
|
|
216
|
+
normalized_base = Feedbag.normalize_url(base_uri)
|
|
217
|
+
url = URI.parse(normalized_base).merge(url).to_s
|
|
166
218
|
end
|
|
167
219
|
|
|
168
220
|
begin
|
|
169
221
|
uri = URI.parse(url)
|
|
170
|
-
rescue
|
|
171
|
-
|
|
172
|
-
|
|
222
|
+
rescue => ex
|
|
223
|
+
Feedbag.logger.error "Error parsing URL `#{url}': #{ex.message}"
|
|
224
|
+
return
|
|
173
225
|
end
|
|
174
226
|
unless uri.absolute?
|
|
175
|
-
|
|
227
|
+
normalized_orig = Feedbag.normalize_url(orig_url)
|
|
228
|
+
orig = URI.parse(normalized_orig)
|
|
176
229
|
url = orig.merge(url).to_s
|
|
177
230
|
end
|
|
178
231
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: feedbag
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David Moreno
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2025-11-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -30,6 +30,20 @@ dependencies:
|
|
|
30
30
|
- - ">="
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
32
|
version: 1.8.2
|
|
33
|
+
- !ruby/object:Gem::Dependency
|
|
34
|
+
name: addressable
|
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '2.8'
|
|
40
|
+
type: :runtime
|
|
41
|
+
prerelease: false
|
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '2.8'
|
|
33
47
|
- !ruby/object:Gem::Dependency
|
|
34
48
|
name: shoulda
|
|
35
49
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -48,22 +62,22 @@ dependencies:
|
|
|
48
62
|
name: mocha
|
|
49
63
|
requirement: !ruby/object:Gem::Requirement
|
|
50
64
|
requirements:
|
|
51
|
-
- - "~>"
|
|
52
|
-
- !ruby/object:Gem::Version
|
|
53
|
-
version: '0.12'
|
|
54
65
|
- - ">="
|
|
55
66
|
- !ruby/object:Gem::Version
|
|
56
67
|
version: 0.12.0
|
|
68
|
+
- - "~>"
|
|
69
|
+
- !ruby/object:Gem::Version
|
|
70
|
+
version: '0.12'
|
|
57
71
|
type: :development
|
|
58
72
|
prerelease: false
|
|
59
73
|
version_requirements: !ruby/object:Gem::Requirement
|
|
60
74
|
requirements:
|
|
61
|
-
- - "~>"
|
|
62
|
-
- !ruby/object:Gem::Version
|
|
63
|
-
version: '0.12'
|
|
64
75
|
- - ">="
|
|
65
76
|
- !ruby/object:Gem::Version
|
|
66
77
|
version: 0.12.0
|
|
78
|
+
- - "~>"
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
version: '0.12'
|
|
67
81
|
- !ruby/object:Gem::Dependency
|
|
68
82
|
name: webmock
|
|
69
83
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -138,7 +152,7 @@ homepage: http://github.com/damog/feedbag
|
|
|
138
152
|
licenses:
|
|
139
153
|
- MIT
|
|
140
154
|
metadata: {}
|
|
141
|
-
post_install_message:
|
|
155
|
+
post_install_message:
|
|
142
156
|
rdoc_options:
|
|
143
157
|
- "--main"
|
|
144
158
|
- README.markdown
|
|
@@ -155,8 +169,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
155
169
|
- !ruby/object:Gem::Version
|
|
156
170
|
version: '0'
|
|
157
171
|
requirements: []
|
|
158
|
-
rubygems_version: 3.3.
|
|
159
|
-
signing_key:
|
|
172
|
+
rubygems_version: 3.0.3.1
|
|
173
|
+
signing_key:
|
|
160
174
|
specification_version: 4
|
|
161
175
|
summary: RSS/Atom feed auto-discovery tool
|
|
162
176
|
test_files: []
|