feedjira 1.6.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/feedjira.gemspec +4 -3
- data/lib/feedjira.rb +3 -1
- data/lib/feedjira/feed.rb +15 -387
- data/lib/feedjira/version.rb +1 -1
- data/spec/feedjira/feed_spec.rb +26 -596
- metadata +20 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a75fe90b3327d3110c7ba5e5ebe8acdbfc9e4c10
|
4
|
+
data.tar.gz: d4313aca74f021751d583f61c140a18aea2381b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4004f548a5b6a3215294d5b8ebd81496c4603eaab3ba3c2f1b269815010fcbdc12cef8660a10ddd398159a09cb30443131e9c83b6973bf9cad9a746d78728c29
|
7
|
+
data.tar.gz: f426881efa2533cdef5bb34eb0ff57bc86259abfd26eb916fadc79dea1b30f4b95262ffc53d1539494fc306a845b4b68cd35377d79d41bcfd73976a8387fa12d
|
data/CHANGELOG.md
CHANGED
data/feedjira.gemspec
CHANGED
@@ -19,9 +19,10 @@ Gem::Specification.new do |s|
|
|
19
19
|
|
20
20
|
s.platform = Gem::Platform::RUBY
|
21
21
|
|
22
|
-
s.add_dependency 'sax-machine',
|
23
|
-
s.add_dependency '
|
24
|
-
s.add_dependency '
|
22
|
+
s.add_dependency 'sax-machine', '~> 1.0'
|
23
|
+
s.add_dependency 'faraday', '~> 0.9'
|
24
|
+
s.add_dependency 'faraday_middleware', '~> 0.9'
|
25
|
+
s.add_dependency 'loofah', '~> 2.0'
|
25
26
|
|
26
27
|
s.add_development_dependency 'rspec', '~> 3.0'
|
27
28
|
end
|
data/lib/feedjira.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'zlib'
|
2
|
-
require '
|
2
|
+
require 'faraday'
|
3
|
+
require 'faraday_middleware'
|
3
4
|
require 'sax-machine'
|
4
5
|
require 'loofah'
|
5
6
|
|
@@ -27,4 +28,5 @@ require 'feedjira/parser/google_docs_atom'
|
|
27
28
|
|
28
29
|
module Feedjira
|
29
30
|
class NoParserAvailable < StandardError; end
|
31
|
+
class FetchFailure < StandardError; end
|
30
32
|
end
|
data/lib/feedjira/feed.rb
CHANGED
@@ -1,32 +1,9 @@
|
|
1
1
|
module Feedjira
|
2
2
|
class Feed
|
3
|
-
USER_AGENT = 'feedjira http://feedjira.com'
|
4
|
-
|
5
|
-
# Passes raw XML and callbacks to a parser.
|
6
|
-
# === Parameters
|
7
|
-
# [parser<Object>] The parser to pass arguments to - must respond to
|
8
|
-
# `parse` and should return a Feed object.
|
9
|
-
# [xml<String>] The XML that you would like parsed.
|
10
|
-
# === Returns
|
11
|
-
# An instance of the parser feed type.
|
12
3
|
def self.parse_with(parser, xml, &block)
|
13
4
|
parser.parse xml, &block
|
14
5
|
end
|
15
6
|
|
16
|
-
# Takes a raw XML feed and attempts to parse it. If no parser is available a Feedjira::NoParserAvailable exception is raised.
|
17
|
-
# You can pass a block to be called when there's an error during the parsing.
|
18
|
-
# === Parameters
|
19
|
-
# [xml<String>] The XML that you would like parsed.
|
20
|
-
# === Returns
|
21
|
-
# An instance of the determined feed type. By default, one of these:
|
22
|
-
# * Feedjira::Parser::RSSFeedBurner
|
23
|
-
# * Feedjira::Parser::GoogleDocsAtom
|
24
|
-
# * Feedjira::Parser::AtomFeedBurner
|
25
|
-
# * Feedjira::Parser::Atom
|
26
|
-
# * Feedjira::Parser::ITunesRSS
|
27
|
-
# * Feedjira::Parser::RSS
|
28
|
-
# === Raises
|
29
|
-
# Feedjira::NoParserAvailable : If no valid parser classes could be found for the feed.
|
30
7
|
def self.parse(xml, &block)
|
31
8
|
if parser = determine_feed_parser_for_xml(xml)
|
32
9
|
parse_with parser, xml, &block
|
@@ -35,31 +12,15 @@ module Feedjira
|
|
35
12
|
end
|
36
13
|
end
|
37
14
|
|
38
|
-
# Determines the correct parser class to use for parsing the feed.
|
39
|
-
#
|
40
|
-
# === Parameters
|
41
|
-
# [xml<String>] The XML that you would like determine the parser for.
|
42
|
-
# === Returns
|
43
|
-
# The class name of the parser that can handle the XML.
|
44
15
|
def self.determine_feed_parser_for_xml(xml)
|
45
16
|
start_of_doc = xml.slice(0, 2000)
|
46
17
|
feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
|
47
18
|
end
|
48
19
|
|
49
|
-
# Adds a new feed parsing class that will be used for parsing.
|
50
|
-
#
|
51
|
-
# === Parameters
|
52
|
-
# [klass<Constant>] The class/constant that you want to register.
|
53
|
-
# === Returns
|
54
|
-
# A updated array of feed parser class names.
|
55
20
|
def self.add_feed_class(klass)
|
56
21
|
feed_classes.unshift klass
|
57
22
|
end
|
58
23
|
|
59
|
-
# Provides a list of registered feed parsing classes.
|
60
|
-
#
|
61
|
-
# === Returns
|
62
|
-
# A array of class names.
|
63
24
|
def self.feed_classes
|
64
25
|
@feed_classes ||= [
|
65
26
|
Feedjira::Parser::RSSFeedBurner,
|
@@ -71,61 +32,29 @@ module Feedjira
|
|
71
32
|
]
|
72
33
|
end
|
73
34
|
|
74
|
-
# Makes all registered feeds types look for the passed in element to parse.
|
75
|
-
# This is actually just a call to element (a SAXMachine call) in the class.
|
76
|
-
#
|
77
|
-
# === Parameters
|
78
|
-
# [element_tag<String>] The element tag
|
79
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
80
35
|
def self.add_common_feed_element(element_tag, options = {})
|
81
36
|
feed_classes.each do |k|
|
82
37
|
k.element element_tag, options
|
83
38
|
end
|
84
39
|
end
|
85
40
|
|
86
|
-
# Makes all registered feeds types look for the passed in elements to parse.
|
87
|
-
# This is actually just a call to elements (a SAXMachine call) in the class.
|
88
|
-
#
|
89
|
-
# === Parameters
|
90
|
-
# [element_tag<String>] The element tag
|
91
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
92
41
|
def self.add_common_feed_elements(element_tag, options = {})
|
93
42
|
feed_classes.each do |k|
|
94
43
|
k.elements element_tag, options
|
95
44
|
end
|
96
45
|
end
|
97
46
|
|
98
|
-
# Makes all registered entry types look for the passed in element to parse.
|
99
|
-
# This is actually just a call to element (a SAXMachine call) in the class.
|
100
|
-
#
|
101
|
-
# === Parameters
|
102
|
-
# [element_tag<String>]
|
103
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
104
47
|
def self.add_common_feed_entry_element(element_tag, options = {})
|
105
48
|
call_on_each_feed_entry :element, element_tag, options
|
106
49
|
end
|
107
50
|
|
108
|
-
# Makes all registered entry types look for the passed in elements to parse.
|
109
|
-
# This is actually just a call to element (a SAXMachine call) in the class.
|
110
|
-
#
|
111
|
-
# === Parameters
|
112
|
-
# [element_tag<String>]
|
113
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
114
51
|
def self.add_common_feed_entry_elements(element_tag, options = {})
|
115
52
|
call_on_each_feed_entry :elements, element_tag, options
|
116
53
|
end
|
117
54
|
|
118
|
-
# Call a method on all feed entries classes.
|
119
|
-
#
|
120
|
-
# === Parameters
|
121
|
-
# [method<Symbol>] The method name
|
122
|
-
# [parameters<Array>] The method parameters
|
123
55
|
def self.call_on_each_feed_entry(method, *parameters)
|
124
56
|
feed_classes.each do |k|
|
125
|
-
# iterate on the collections defined in the sax collection
|
126
57
|
k.sax_config.collection_elements.each_value do |vl|
|
127
|
-
# vl is a list of CollectionConfig mapped to an attribute name
|
128
|
-
# we'll look for the one set as 'entries' and add the new element
|
129
58
|
vl.find_all{|v| (v.accessor == 'entries') && (v.data_class.class == Class)}.each do |v|
|
130
59
|
v.data_class.send(method, *parameters)
|
131
60
|
end
|
@@ -133,325 +62,24 @@ module Feedjira
|
|
133
62
|
end
|
134
63
|
end
|
135
64
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
# * :cookiefile - file to read cookies
|
143
|
-
# * :cookies - contents of cookies header
|
144
|
-
# * :http_authentication - array containing username, then password
|
145
|
-
# * :proxy_url - proxy url
|
146
|
-
# * :proxy_port - proxy port
|
147
|
-
# * :max_redirects - max number of redirections
|
148
|
-
# * :timeout - timeout
|
149
|
-
# * :ssl_verify_host - boolean
|
150
|
-
# * :ssl_verify_peer - boolean
|
151
|
-
# * :ssl_version - the ssl version to use, see OpenSSL::SSL::SSLContext::METHODS for options
|
152
|
-
def self.setup_easy(curl, options={})
|
153
|
-
curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
|
154
|
-
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
155
|
-
curl.headers["Accept-Language"] = options[:language] if options.has_key?(:language)
|
156
|
-
curl.enable_cookies = options[:enable_cookies] if options.has_key?(:enable_cookies)
|
157
|
-
curl.cookiefile = options[:cookiefile] if options.has_key?(:cookiefile)
|
158
|
-
curl.cookies = options[:cookies] if options.has_key?(:cookies)
|
159
|
-
|
160
|
-
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
161
|
-
curl.proxy_url = options[:proxy_url] if options.has_key?(:proxy_url)
|
162
|
-
curl.proxy_port = options[:proxy_port] if options.has_key?(:proxy_port)
|
163
|
-
curl.max_redirects = options[:max_redirects] if options[:max_redirects]
|
164
|
-
curl.timeout = options[:timeout] if options[:timeout]
|
165
|
-
curl.ssl_verify_host = options[:ssl_verify_host] if options.has_key?(:ssl_verify_host)
|
166
|
-
curl.ssl_verify_peer = options[:ssl_verify_peer] if options.has_key?(:ssl_verify_peer)
|
167
|
-
curl.ssl_version = options[:ssl_version] if options.has_key?(:ssl_version)
|
168
|
-
|
169
|
-
curl.follow_location = true
|
170
|
-
end
|
171
|
-
|
172
|
-
# Fetches and returns the raw XML for each URL provided.
|
173
|
-
#
|
174
|
-
# === Parameters
|
175
|
-
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
176
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
177
|
-
# :if_modified_since - Time object representing when the feed was last updated.
|
178
|
-
# :if_none_match - String that's normally an etag for the request that was stored previously.
|
179
|
-
# :on_success - Block that gets executed after a successful request.
|
180
|
-
# :on_failure - Block that gets executed after a failed request.
|
181
|
-
# * all parameters defined in setup_easy
|
182
|
-
# === Returns
|
183
|
-
# A String of XML if a single URL is passed.
|
184
|
-
#
|
185
|
-
# A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
|
186
|
-
def self.fetch_raw(urls, options = {})
|
187
|
-
url_queue = [*urls]
|
188
|
-
multi = Curl::Multi.new
|
189
|
-
responses = {}
|
190
|
-
url_queue.each do |url|
|
191
|
-
easy = Curl::Easy.new(url) do |curl|
|
192
|
-
setup_easy curl, options
|
193
|
-
|
194
|
-
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
195
|
-
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
196
|
-
|
197
|
-
curl.on_success do |c|
|
198
|
-
responses[url] = decode_content(c)
|
199
|
-
end
|
200
|
-
|
201
|
-
curl.on_complete do |c, err|
|
202
|
-
responses[url] = c.response_code unless responses.has_key?(url)
|
203
|
-
end
|
204
|
-
end
|
205
|
-
multi.add(easy)
|
206
|
-
end
|
207
|
-
|
208
|
-
multi.perform
|
209
|
-
urls.is_a?(String) ? responses.values.first : responses
|
210
|
-
end
|
211
|
-
|
212
|
-
# Fetches and returns the parsed XML for each URL provided.
|
213
|
-
#
|
214
|
-
# === Parameters
|
215
|
-
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
216
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
217
|
-
# * :user_agent - String that overrides the default user agent.
|
218
|
-
# * :if_modified_since - Time object representing when the feed was last updated.
|
219
|
-
# * :if_none_match - String, an etag for the request that was stored previously.
|
220
|
-
# * :on_success - Block that gets executed after a successful request.
|
221
|
-
# * :on_failure - Block that gets executed after a failed request.
|
222
|
-
# === Returns
|
223
|
-
# A Feed object if a single URL is passed.
|
224
|
-
#
|
225
|
-
# A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
|
226
|
-
def self.fetch_and_parse(urls, options = {})
|
227
|
-
url_queue = [*urls]
|
228
|
-
multi = Curl::Multi.new
|
229
|
-
responses = {}
|
230
|
-
|
231
|
-
# I broke these down so I would only try to do 30 simultaneously because
|
232
|
-
# I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
|
233
|
-
url_queue.slice!(0, 30).each do |url|
|
234
|
-
add_url_to_multi(multi, url, url_queue, responses, options)
|
235
|
-
end
|
65
|
+
def self.fetch_and_parse(url)
|
66
|
+
response = connection(url).get
|
67
|
+
raise FetchFailure.new("Fetch failed - #{response.status}") unless response.success?
|
68
|
+
xml = response.body
|
69
|
+
parser_klass = determine_feed_parser_for_xml xml
|
70
|
+
raise NoParserAvailable.new("No valid parser for XML.") unless parser_klass
|
236
71
|
|
237
|
-
|
238
|
-
|
72
|
+
feed = parse_with parser_klass, xml
|
73
|
+
feed.feed_url = url
|
74
|
+
feed.etag = response.headers['etag'].to_s.gsub(/"/, '')
|
75
|
+
feed.last_modified = response.headers['last-modified']
|
76
|
+
feed
|
239
77
|
end
|
240
78
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
# === Returns
|
246
|
-
# A decoded string of XML.
|
247
|
-
def self.decode_content(c)
|
248
|
-
if c.header_str.match(/Content-Encoding: gzip/i)
|
249
|
-
begin
|
250
|
-
gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
|
251
|
-
xml = gz.read
|
252
|
-
gz.close
|
253
|
-
rescue Zlib::GzipFile::Error
|
254
|
-
# Maybe this is not gzipped?
|
255
|
-
xml = c.body_str
|
256
|
-
end
|
257
|
-
elsif c.header_str.match(/Content-Encoding: deflate/i)
|
258
|
-
xml = Zlib::Inflate.inflate(c.body_str)
|
259
|
-
else
|
260
|
-
xml = c.body_str
|
261
|
-
end
|
262
|
-
|
263
|
-
xml
|
264
|
-
end
|
265
|
-
|
266
|
-
# Updates each feed for each Feed object provided.
|
267
|
-
#
|
268
|
-
# === Parameters
|
269
|
-
# [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
|
270
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
271
|
-
# * :on_success - Block that gets executed after a successful request.
|
272
|
-
# * :on_failure - Block that gets executed after a failed request.
|
273
|
-
# * all parameters defined in setup_easy
|
274
|
-
# === Returns
|
275
|
-
# A updated Feed object if a single URL is passed.
|
276
|
-
#
|
277
|
-
# A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
|
278
|
-
def self.update(feeds, options = {})
|
279
|
-
feed_queue = [*feeds]
|
280
|
-
multi = Curl::Multi.new
|
281
|
-
responses = {}
|
282
|
-
|
283
|
-
feed_queue.slice!(0, 30).each do |feed|
|
284
|
-
add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
285
|
-
end
|
286
|
-
|
287
|
-
multi.perform
|
288
|
-
feeds.is_a?(Array) ? responses : responses.values.first
|
289
|
-
end
|
290
|
-
|
291
|
-
# An abstraction for adding a feed by URL to the passed Curb::multi stack.
|
292
|
-
#
|
293
|
-
# === Parameters
|
294
|
-
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
295
|
-
# [url<String>] The URL of the feed that you would like to be fetched.
|
296
|
-
# [url_queue<Array>] An array of URLs that are queued for request.
|
297
|
-
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
298
|
-
# [feeds<String> or <Array>] A single feed object, or an array of feed objects.
|
299
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
300
|
-
# * :on_success - Block that gets executed after a successful request.
|
301
|
-
# * :on_failure - Block that gets executed after a failed request.
|
302
|
-
# * all parameters defined in setup_easy
|
303
|
-
# === Returns
|
304
|
-
# The updated Curl::Multi object with the request details added to it's stack.
|
305
|
-
def self.add_url_to_multi(multi, url, url_queue, responses, options)
|
306
|
-
easy = Curl::Easy.new(url) do |curl|
|
307
|
-
setup_easy curl, options
|
308
|
-
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
309
|
-
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
310
|
-
|
311
|
-
curl.on_success do |c|
|
312
|
-
xml = decode_content(c)
|
313
|
-
klass = determine_feed_parser_for_xml(xml)
|
314
|
-
|
315
|
-
if klass
|
316
|
-
begin
|
317
|
-
feed = parse_with klass, xml, &on_parser_failure(url)
|
318
|
-
|
319
|
-
feed.feed_url = c.last_effective_url
|
320
|
-
feed.etag = etag_from_header(c.header_str)
|
321
|
-
feed.last_modified = last_modified_from_header(c.header_str)
|
322
|
-
responses[url] = feed
|
323
|
-
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
324
|
-
rescue Exception => e
|
325
|
-
call_on_failure(c, e, options[:on_failure])
|
326
|
-
end
|
327
|
-
else
|
328
|
-
call_on_failure(c, "Can't determine a parser", options[:on_failure])
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
#
|
333
|
-
# trigger on_failure for 404s
|
334
|
-
#
|
335
|
-
curl.on_complete do |c|
|
336
|
-
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
|
337
|
-
responses[url] = c.response_code unless responses.has_key?(url)
|
338
|
-
end
|
339
|
-
|
340
|
-
curl.on_redirect do |c|
|
341
|
-
if c.response_code == 304 # it's not modified. this isn't an error condition
|
342
|
-
options[:on_success].call(url, nil) if options.has_key?(:on_success)
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
curl.on_missing do |c|
|
347
|
-
if c.response_code == 404 && options.has_key?(:on_failure)
|
348
|
-
call_on_failure(c, 'Server returned a 404', options[:on_failure])
|
349
|
-
end
|
350
|
-
end
|
351
|
-
|
352
|
-
curl.on_failure do |c, err|
|
353
|
-
responses[url] = c.response_code
|
354
|
-
call_on_failure(c, err, options[:on_failure])
|
355
|
-
end
|
356
|
-
end
|
357
|
-
multi.add(easy)
|
358
|
-
end
|
359
|
-
|
360
|
-
# An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
|
361
|
-
#
|
362
|
-
# === Parameters
|
363
|
-
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
364
|
-
# [feed<Feed>] A feed object that you would like to be fetched.
|
365
|
-
# [url_queue<Array>] An array of feed objects that are queued for request.
|
366
|
-
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
367
|
-
# [feeds<String>] or <Array> A single feed object, or an array of feed objects.
|
368
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
369
|
-
# * :on_success - Block that gets executed after a successful request.
|
370
|
-
# * :on_failure - Block that gets executed after a failed request.
|
371
|
-
# * all parameters defined in setup_easy
|
372
|
-
# === Returns
|
373
|
-
# The updated Curl::Multi object with the request details added to it's stack.
|
374
|
-
def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
375
|
-
easy = Curl::Easy.new(feed.feed_url) do |curl|
|
376
|
-
setup_easy curl, options
|
377
|
-
curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
|
378
|
-
curl.headers["If-Modified-Since"] = options[:if_modified_since] if options[:if_modified_since] && (!feed.last_modified || (Time.parse(options[:if_modified_since].to_s) > feed.last_modified))
|
379
|
-
curl.headers["If-None-Match"] = feed.etag if feed.etag
|
380
|
-
|
381
|
-
curl.on_success do |c|
|
382
|
-
begin
|
383
|
-
updated_feed = Feed.parse c.body_str, &on_parser_failure(feed.feed_url)
|
384
|
-
|
385
|
-
updated_feed.feed_url = c.last_effective_url
|
386
|
-
updated_feed.etag = etag_from_header(c.header_str)
|
387
|
-
updated_feed.last_modified = last_modified_from_header(c.header_str)
|
388
|
-
feed.update_from_feed(updated_feed)
|
389
|
-
responses[feed.feed_url] = feed
|
390
|
-
options[:on_success].call(feed) if options.has_key?(:on_success)
|
391
|
-
rescue Exception => e
|
392
|
-
call_on_failure(c, e, options[:on_failure])
|
393
|
-
end
|
394
|
-
end
|
395
|
-
|
396
|
-
curl.on_failure do |c, err| # response code 50X
|
397
|
-
responses[feed.feed_url] = c.response_code
|
398
|
-
call_on_failure(c, 'Server returned a 404', options[:on_failure])
|
399
|
-
end
|
400
|
-
|
401
|
-
curl.on_redirect do |c, err| # response code 30X
|
402
|
-
if c.response_code == 304
|
403
|
-
options[:on_success].call(feed) if options.has_key?(:on_success)
|
404
|
-
else
|
405
|
-
responses[feed.feed_url] = c.response_code
|
406
|
-
call_on_failure(c, err, options[:on_failure])
|
407
|
-
end
|
408
|
-
end
|
409
|
-
|
410
|
-
curl.on_complete do |c|
|
411
|
-
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
412
|
-
responses[feed.feed_url] = feed unless responses.has_key?(feed.feed_url)
|
413
|
-
end
|
414
|
-
end
|
415
|
-
multi.add(easy)
|
416
|
-
end
|
417
|
-
|
418
|
-
# Determines the etag from the request headers.
|
419
|
-
#
|
420
|
-
# === Parameters
|
421
|
-
# [header<String>] Raw request header returned from the request
|
422
|
-
# === Returns
|
423
|
-
# A string of the etag or nil if it cannot be found in the headers.
|
424
|
-
def self.etag_from_header(header)
|
425
|
-
header =~ /.*ETag:\s(.*)\r/
|
426
|
-
$1
|
427
|
-
end
|
428
|
-
|
429
|
-
# Determines the last modified date from the request headers.
|
430
|
-
#
|
431
|
-
# === Parameters
|
432
|
-
# [header<String>] Raw request header returned from the request
|
433
|
-
# === Returns
|
434
|
-
# A Time object of the last modified date or nil if it cannot be found in the headers.
|
435
|
-
def self.last_modified_from_header(header)
|
436
|
-
header =~ /.*Last-Modified:\s(.*)\r/
|
437
|
-
Time.parse_safely($1) if $1
|
438
|
-
end
|
439
|
-
|
440
|
-
class << self
|
441
|
-
private
|
442
|
-
|
443
|
-
def on_parser_failure(url)
|
444
|
-
Proc.new { |message| raise "Error while parsing [#{url}] #{message}" }
|
445
|
-
end
|
446
|
-
|
447
|
-
def call_on_failure(c, error, on_failure)
|
448
|
-
if on_failure
|
449
|
-
if on_failure.arity == 2
|
450
|
-
on_failure.call(c, error)
|
451
|
-
else
|
452
|
-
warn "on_failure proc with invalid parameters number #{on_failure.arity} instead of 2, ignoring it"
|
453
|
-
end
|
454
|
-
end
|
79
|
+
def self.connection(url)
|
80
|
+
Faraday.new(url: url) do |conn|
|
81
|
+
conn.use FaradayMiddleware::FollowRedirects, limit: 3
|
82
|
+
conn.adapter :net_http
|
455
83
|
end
|
456
84
|
end
|
457
85
|
end
|
data/lib/feedjira/version.rb
CHANGED
data/spec/feedjira/feed_spec.rb
CHANGED
@@ -9,6 +9,32 @@ class FailParser
|
|
9
9
|
end
|
10
10
|
|
11
11
|
describe Feedjira::Feed do
|
12
|
+
describe '.fetch_and_parse' do
|
13
|
+
it 'raises an error when the fetch fails' do
|
14
|
+
url = 'http://www.example.com/feed.xml'
|
15
|
+
expect {
|
16
|
+
Feedjira::Feed.fetch_and_parse url
|
17
|
+
}.to raise_error Feedjira::FetchFailure
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'raises an error when no parser can be found' do
|
21
|
+
url = 'http://feedjira.com'
|
22
|
+
expect {
|
23
|
+
Feedjira::Feed.fetch_and_parse url
|
24
|
+
}.to raise_error Feedjira::NoParserAvailable
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'fetches and parses the feed' do
|
28
|
+
url = 'http://feedjira.com/blog/feed.xml'
|
29
|
+
feed = Feedjira::Feed.fetch_and_parse url
|
30
|
+
|
31
|
+
expect(feed.class).to eq Feedjira::Parser::Atom
|
32
|
+
expect(feed.entries.count).to eq 3
|
33
|
+
expect(feed.feed_url).to eq url
|
34
|
+
expect(feed.etag).to eq 'a22ad-3190-5037e71966e80'
|
35
|
+
expect(feed.last_modified).to eq 'Sat, 20 Sep 2014 12:34:50 GMT'
|
36
|
+
end
|
37
|
+
end
|
12
38
|
|
13
39
|
describe "#add_common_feed_element" do
|
14
40
|
before(:all) do
|
@@ -151,76 +177,6 @@ describe Feedjira::Feed do
|
|
151
177
|
|
152
178
|
end
|
153
179
|
|
154
|
-
describe "#setup_easy" do
|
155
|
-
class MockCurl
|
156
|
-
attr_accessor :follow_location, :userpwd, :proxy_url, :proxy_port, :max_redirects, :timeout, :ssl_verify_host, :ssl_verify_peer, :ssl_version, :enable_cookies, :cookiefile, :cookies
|
157
|
-
|
158
|
-
def headers
|
159
|
-
@headers ||= {}
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
let(:curl) { MockCurl.new }
|
164
|
-
|
165
|
-
it "sets defaults on curl" do
|
166
|
-
Feedjira::Feed.setup_easy curl
|
167
|
-
|
168
|
-
expect(curl.headers["User-Agent"]).to eq Feedjira::Feed::USER_AGENT
|
169
|
-
expect(curl.follow_location).to eq true
|
170
|
-
end
|
171
|
-
|
172
|
-
it "allows user agent over-ride" do
|
173
|
-
Feedjira::Feed.setup_easy(curl, user_agent: '007')
|
174
|
-
expect(curl.headers["User-Agent"]).to eq '007'
|
175
|
-
end
|
176
|
-
|
177
|
-
it "allows to set language" do
|
178
|
-
Feedjira::Feed.setup_easy(curl, language: 'en-US')
|
179
|
-
expect(curl.headers["Accept-Language"]).to eq 'en-US'
|
180
|
-
end
|
181
|
-
|
182
|
-
it "enables compression" do
|
183
|
-
Feedjira::Feed.setup_easy(curl, compress: true)
|
184
|
-
expect(curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
185
|
-
end
|
186
|
-
|
187
|
-
it "enables compression even when you act like you don't want it" do
|
188
|
-
Feedjira::Feed.setup_easy(curl, compress: false)
|
189
|
-
expect(curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
190
|
-
end
|
191
|
-
|
192
|
-
it "sets up http auth" do
|
193
|
-
Feedjira::Feed.setup_easy(curl, http_authentication: ['user', 'pass'])
|
194
|
-
expect(curl.userpwd).to eq 'user:pass'
|
195
|
-
end
|
196
|
-
|
197
|
-
it "passes known options to curl" do
|
198
|
-
known_options = {
|
199
|
-
enable_cookies: true,
|
200
|
-
cookiefile: 'cookies.txt',
|
201
|
-
cookies: 'asdf',
|
202
|
-
proxy_url: 'http://proxy.url.com',
|
203
|
-
proxy_port: '1234',
|
204
|
-
max_redirects: 2,
|
205
|
-
timeout: 500,
|
206
|
-
ssl_verify_host: true,
|
207
|
-
ssl_verify_peer: true,
|
208
|
-
ssl_version: :omg
|
209
|
-
}
|
210
|
-
|
211
|
-
Feedjira::Feed.setup_easy curl, known_options
|
212
|
-
|
213
|
-
known_options.each do |option|
|
214
|
-
key, value = option
|
215
|
-
expect(curl.send(key)).to eq value
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
it "ignores unknown options" do
|
220
|
-
expect { Feedjira::Feed.setup_easy curl, foo: :bar }.to_not raise_error
|
221
|
-
end
|
222
|
-
end
|
223
|
-
|
224
180
|
describe "when adding feed types" do
|
225
181
|
it "should prioritize added types over the built in ones" do
|
226
182
|
feed_text = "Atom asdf"
|
@@ -239,530 +195,4 @@ describe Feedjira::Feed do
|
|
239
195
|
Feedjira::Feed.feed_classes.reject! {|o| o == new_feed_type }
|
240
196
|
end
|
241
197
|
end
|
242
|
-
|
243
|
-
describe '#etag_from_header' do
|
244
|
-
before(:each) do
|
245
|
-
@header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
|
246
|
-
end
|
247
|
-
|
248
|
-
it "should return the etag from the header if it exists" do
|
249
|
-
expect(Feedjira::Feed.etag_from_header(@header)).to eq "ziEyTl4q9GH04BR4jgkImd0GvSE"
|
250
|
-
end
|
251
|
-
|
252
|
-
it "should return nil if there is no etag in the header" do
|
253
|
-
expect(Feedjira::Feed.etag_from_header("foo")).to be_nil
|
254
|
-
end
|
255
|
-
|
256
|
-
end
|
257
|
-
|
258
|
-
describe '#last_modified_from_header' do
|
259
|
-
before(:each) do
|
260
|
-
@header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
|
261
|
-
end
|
262
|
-
|
263
|
-
it "should return the last modified date from the header if it exists" do
|
264
|
-
expect(Feedjira::Feed.last_modified_from_header(@header)).to eq Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT")
|
265
|
-
end
|
266
|
-
|
267
|
-
it "should return nil if there is no last modified date in the header" do
|
268
|
-
expect(Feedjira::Feed.last_modified_from_header("foo")).to be_nil
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
describe "fetching feeds" do
|
273
|
-
before(:each) do
|
274
|
-
@paul_feed = { :xml => load_sample("PaulDixExplainsNothing.xml"), :url => "http://feeds.feedburner.com/PaulDixExplainsNothing" }
|
275
|
-
@trotter_feed = { :xml => load_sample("TrotterCashionHome.xml"), :url => "http://feeds2.feedburner.com/trottercashion" }
|
276
|
-
@invalid_feed = { :xml => 'This feed is invalid', :url => "http://feeds.feedburner.com/InvalidFeed" }
|
277
|
-
end
|
278
|
-
|
279
|
-
describe "#fetch_raw" do
|
280
|
-
before(:each) do
|
281
|
-
@cmock = double('cmock', :header_str => '', :body_str => @paul_feed[:xml] )
|
282
|
-
@multi = double('curl_multi', :add => true, :perform => true)
|
283
|
-
@curl_easy = double('curl_easy')
|
284
|
-
@curl = double('curl', :headers => {}, :follow_location= => true, :on_failure => true, :on_complete => true)
|
285
|
-
allow(@curl).to receive(:on_success).and_yield(@cmock)
|
286
|
-
|
287
|
-
allow(Curl::Multi).to receive(:new).and_return(@multi)
|
288
|
-
allow(Curl::Easy).to receive(:new).and_yield(@curl).and_return(@curl_easy)
|
289
|
-
end
|
290
|
-
|
291
|
-
it "should set user agent if it's passed as an option" do
|
292
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :user_agent => 'Custom Useragent')
|
293
|
-
expect(@curl.headers['User-Agent']).to eq 'Custom Useragent'
|
294
|
-
end
|
295
|
-
|
296
|
-
it "should set user agent to default if it's not passed as an option" do
|
297
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url])
|
298
|
-
expect(@curl.headers['User-Agent']).to eq Feedjira::Feed::USER_AGENT
|
299
|
-
end
|
300
|
-
|
301
|
-
it "should set if modified since as an option if passed" do
|
302
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :if_modified_since => Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT"))
|
303
|
-
expect(@curl.headers["If-Modified-Since"]).to eq 'Wed, 28 Jan 2009 04:10:32 GMT'
|
304
|
-
end
|
305
|
-
|
306
|
-
it "should set if none match as an option if passed" do
|
307
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
308
|
-
expect(@curl.headers["If-None-Match"]).to eq 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
309
|
-
end
|
310
|
-
|
311
|
-
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
312
|
-
expect(@curl).to receive(:userpwd=).with('username:password')
|
313
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :http_authentication => ['username', 'password'])
|
314
|
-
end
|
315
|
-
|
316
|
-
it 'should set accepted encodings' do
|
317
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :compress => true)
|
318
|
-
expect(@curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
319
|
-
end
|
320
|
-
|
321
|
-
it "should return raw xml" do
|
322
|
-
raw_xml = Feedjira::Feed.fetch_raw @paul_feed[:url]
|
323
|
-
expect(raw_xml).to match /^#{Regexp.escape('<?xml version="1.0" encoding="UTF-8"?>')}/
|
324
|
-
end
|
325
|
-
|
326
|
-
it "should take multiple feed urls and return a hash of urls and response xml" do
|
327
|
-
multi = double('curl_multi', :add => true, :perform => true)
|
328
|
-
allow(Curl::Multi).to receive(:new).and_return(multi)
|
329
|
-
|
330
|
-
paul_response = double('paul_response', :header_str => '', :body_str => @paul_feed[:xml] )
|
331
|
-
trotter_response = double('trotter_response', :header_str => '', :body_str => @trotter_feed[:xml] )
|
332
|
-
|
333
|
-
paul_curl = double('paul_curl', :headers => {}, :follow_location= => true, :on_failure => true, :on_complete => true)
|
334
|
-
allow(paul_curl).to receive(:on_success).and_yield(paul_response)
|
335
|
-
|
336
|
-
trotter_curl = double('trotter_curl', :headers => {}, :follow_location= => true, :on_failure => true, :on_complete => true)
|
337
|
-
allow(trotter_curl).to receive(:on_success).and_yield(trotter_response)
|
338
|
-
|
339
|
-
expect(Curl::Easy).to receive(:new).with(@paul_feed[:url]).ordered.and_yield(paul_curl)
|
340
|
-
expect(Curl::Easy).to receive(:new).with(@trotter_feed[:url]).ordered.and_yield(trotter_curl)
|
341
|
-
|
342
|
-
results = Feedjira::Feed.fetch_raw([@paul_feed[:url], @trotter_feed[:url]])
|
343
|
-
expect(results.keys).to include(@paul_feed[:url])
|
344
|
-
expect(results.keys).to include(@trotter_feed[:url])
|
345
|
-
expect(results[@paul_feed[:url]]).to match /Paul Dix/
|
346
|
-
expect(results[@trotter_feed[:url]]).to match /Trotter Cashion/
|
347
|
-
end
|
348
|
-
|
349
|
-
it "should always return a hash when passed an array" do
|
350
|
-
results = Feedjira::Feed.fetch_raw([@paul_feed[:url]])
|
351
|
-
expect(results.class).to eq Hash
|
352
|
-
end
|
353
|
-
end
|
354
|
-
|
355
|
-
describe "#add_url_to_multi" do
|
356
|
-
before(:each) do
|
357
|
-
allow_message_expectations_on_nil
|
358
|
-
@multi = Curl::Multi.get([@paul_feed[:url]], {:follow_location => true}, {:pipeline => true})
|
359
|
-
allow(@multi).to receive(:add)
|
360
|
-
@easy_curl = Curl::Easy.new(@paul_feed[:url])
|
361
|
-
|
362
|
-
allow(Curl::Easy).to receive(:new).and_yield(@easy_curl)
|
363
|
-
end
|
364
|
-
|
365
|
-
it "should set user agent if it's passed as an option" do
|
366
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :user_agent => 'My cool application')
|
367
|
-
expect(@easy_curl.headers["User-Agent"]).to eq 'My cool application'
|
368
|
-
end
|
369
|
-
|
370
|
-
it "should set user agent to default if it's not passed as an option" do
|
371
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
372
|
-
expect(@easy_curl.headers["User-Agent"]).to eq Feedjira::Feed::USER_AGENT
|
373
|
-
end
|
374
|
-
|
375
|
-
it "should set if modified since as an option if passed" do
|
376
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_modified_since => Time.parse_safely("Jan 25 2009 04:10:32 GMT"))
|
377
|
-
expect(@easy_curl.headers["If-Modified-Since"]).to eq 'Sun, 25 Jan 2009 04:10:32 GMT'
|
378
|
-
end
|
379
|
-
|
380
|
-
it 'should set follow location to true' do
|
381
|
-
expect(@easy_curl).to receive(:follow_location=).with(true)
|
382
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
383
|
-
end
|
384
|
-
|
385
|
-
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
386
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :http_authentication => ['myusername', 'mypassword'])
|
387
|
-
expect(@easy_curl.userpwd).to eq 'myusername:mypassword'
|
388
|
-
end
|
389
|
-
|
390
|
-
it 'should set accepted encodings' do
|
391
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {:compress => true})
|
392
|
-
expect(@easy_curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
393
|
-
end
|
394
|
-
|
395
|
-
it "should set if_none_match as an option if passed" do
|
396
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
397
|
-
expect(@easy_curl.headers["If-None-Match"]).to eq 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
398
|
-
end
|
399
|
-
|
400
|
-
describe 'on success' do
|
401
|
-
before(:each) do
|
402
|
-
@feed = double('feed', :feed_url= => true, :etag= => true, :last_modified= => true)
|
403
|
-
allow(Feedjira::Feed).to receive(:decode_content).and_return(@paul_feed[:xml])
|
404
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return(Feedjira::Parser::AtomFeedBurner)
|
405
|
-
allow(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_return(@feed)
|
406
|
-
allow(Feedjira::Feed).to receive(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
407
|
-
allow(Feedjira::Feed).to receive(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
|
408
|
-
end
|
409
|
-
|
410
|
-
it 'should decode the response body' do
|
411
|
-
expect(Feedjira::Feed).to receive(:decode_content).with(@easy_curl).and_return(@paul_feed[:xml])
|
412
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
413
|
-
@easy_curl.on_success.call(@easy_curl)
|
414
|
-
end
|
415
|
-
|
416
|
-
it 'should determine the xml parser class' do
|
417
|
-
expect(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).with(@paul_feed[:xml]).and_return(Feedjira::Parser::AtomFeedBurner)
|
418
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
419
|
-
@easy_curl.on_success.call(@easy_curl)
|
420
|
-
end
|
421
|
-
|
422
|
-
it 'should parse the xml' do
|
423
|
-
expect(Feedjira::Parser::AtomFeedBurner).to receive(:parse).
|
424
|
-
with(@paul_feed[:xml]).and_return(@feed)
|
425
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
426
|
-
@easy_curl.on_success.call(@easy_curl)
|
427
|
-
end
|
428
|
-
|
429
|
-
describe 'when a compatible xml parser class is found' do
|
430
|
-
it 'should set the last effective url to the feed url' do
|
431
|
-
expect(@easy_curl).to receive(:last_effective_url).and_return(@paul_feed[:url])
|
432
|
-
expect(@feed).to receive(:feed_url=).with(@paul_feed[:url])
|
433
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
434
|
-
@easy_curl.on_success.call(@easy_curl)
|
435
|
-
end
|
436
|
-
|
437
|
-
it 'should set the etags on the feed' do
|
438
|
-
expect(@feed).to receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
439
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
440
|
-
@easy_curl.on_success.call(@easy_curl)
|
441
|
-
end
|
442
|
-
|
443
|
-
it 'should set the last modified on the feed' do
|
444
|
-
expect(@feed).to receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
|
445
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
446
|
-
@easy_curl.on_success.call(@easy_curl)
|
447
|
-
end
|
448
|
-
|
449
|
-
it 'should add the feed to the responses' do
|
450
|
-
responses = {}
|
451
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
452
|
-
@easy_curl.on_success.call(@easy_curl)
|
453
|
-
|
454
|
-
expect(responses.length).to eq 1
|
455
|
-
expect(responses['http://feeds.feedburner.com/PaulDixExplainsNothing']).to eq @feed
|
456
|
-
end
|
457
|
-
|
458
|
-
it 'should call proc if :on_success option is passed' do
|
459
|
-
success = lambda { |url, feed| }
|
460
|
-
expect(success).to receive(:call).with(@paul_feed[:url], @feed)
|
461
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_success => success })
|
462
|
-
@easy_curl.on_success.call(@easy_curl)
|
463
|
-
end
|
464
|
-
|
465
|
-
describe 'when the parser raises an exception' do
|
466
|
-
it 'invokes the on_failure callback with that exception' do
|
467
|
-
failure = double 'Failure callback', arity: 2
|
468
|
-
expect(failure).to receive(:call).with(@easy_curl, an_instance_of(Hell))
|
469
|
-
|
470
|
-
expect(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_raise Hell
|
471
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { on_failure: failure })
|
472
|
-
|
473
|
-
@easy_curl.on_success.call(@easy_curl)
|
474
|
-
end
|
475
|
-
end
|
476
|
-
|
477
|
-
describe 'when the parser invokes its on_failure callback' do
|
478
|
-
before(:each) do
|
479
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return FailParser
|
480
|
-
end
|
481
|
-
|
482
|
-
it 'invokes the on_failure callback' do
|
483
|
-
failure = double 'Failure callback', arity: 2
|
484
|
-
expect(failure).to receive(:call).with(@easy_curl, an_instance_of(RuntimeError))
|
485
|
-
|
486
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { on_failure: failure })
|
487
|
-
@easy_curl.on_success.call(@easy_curl)
|
488
|
-
end
|
489
|
-
end
|
490
|
-
end
|
491
|
-
|
492
|
-
describe 'when no compatible xml parser class is found' do
|
493
|
-
it 'invokes the on_failure callback' do
|
494
|
-
failure = double 'Failure callback', arity: 2
|
495
|
-
expect(failure).to receive(:call).with(@easy_curl, "Can't determine a parser")
|
496
|
-
|
497
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return nil
|
498
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { on_failure: failure })
|
499
|
-
|
500
|
-
@easy_curl.on_success.call(@easy_curl)
|
501
|
-
end
|
502
|
-
end
|
503
|
-
end
|
504
|
-
|
505
|
-
describe 'on failure' do
|
506
|
-
before(:each) do
|
507
|
-
@headers = "HTTP/1.0 500 Something Bad\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
508
|
-
@body = 'Sorry, something broke'
|
509
|
-
|
510
|
-
allow(@easy_curl).to receive(:response_code).and_return(500)
|
511
|
-
allow(@easy_curl).to receive(:header_str).and_return(@headers)
|
512
|
-
allow(@easy_curl).to receive(:body_str).and_return(@body)
|
513
|
-
end
|
514
|
-
|
515
|
-
it 'should call proc if :on_failure option is passed' do
|
516
|
-
failure = double 'Failure callback', arity: 2
|
517
|
-
expect(failure).to receive(:call).with(@easy_curl, nil)
|
518
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => failure })
|
519
|
-
@easy_curl.on_failure.call(@easy_curl)
|
520
|
-
end
|
521
|
-
|
522
|
-
it 'should return the http code in the responses' do
|
523
|
-
responses = {}
|
524
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
525
|
-
@easy_curl.on_failure.call(@easy_curl)
|
526
|
-
|
527
|
-
expect(responses.length).to eq 1
|
528
|
-
expect(responses[@paul_feed[:url]]).to eq 500
|
529
|
-
end
|
530
|
-
end
|
531
|
-
|
532
|
-
describe 'on complete for 404s' do
|
533
|
-
before(:each) do
|
534
|
-
@headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
535
|
-
@body = 'Page could not be found.'
|
536
|
-
|
537
|
-
allow(@easy_curl).to receive(:response_code).and_return(404)
|
538
|
-
allow(@easy_curl).to receive(:header_str).and_return(@headers)
|
539
|
-
allow(@easy_curl).to receive(:body_str).and_return(@body)
|
540
|
-
end
|
541
|
-
|
542
|
-
it 'should call proc if :on_failure option is passed' do
|
543
|
-
complete = double 'Failure callback', arity: 2
|
544
|
-
expect(complete).to receive(:call).with(@easy_curl, 'Server returned a 404')
|
545
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => complete })
|
546
|
-
@easy_curl.on_missing.call(@easy_curl)
|
547
|
-
end
|
548
|
-
|
549
|
-
it 'should return the http code in the responses' do
|
550
|
-
responses = {}
|
551
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
552
|
-
@easy_curl.on_complete.call(@easy_curl)
|
553
|
-
|
554
|
-
expect(responses.length).to eq 1
|
555
|
-
expect(responses[@paul_feed[:url]]).to eq 404
|
556
|
-
end
|
557
|
-
end
|
558
|
-
end
|
559
|
-
|
560
|
-
describe "#add_feed_to_multi" do
|
561
|
-
before(:each) do
|
562
|
-
allow_message_expectations_on_nil
|
563
|
-
@multi = Curl::Multi.get([@paul_feed[:url]], {:follow_location => true}, {:pipeline => true})
|
564
|
-
allow(@multi).to receive(:add)
|
565
|
-
@easy_curl = Curl::Easy.new(@paul_feed[:url])
|
566
|
-
@feed = Feedjira::Feed.parse(sample_feedburner_atom_feed)
|
567
|
-
|
568
|
-
allow(Curl::Easy).to receive(:new).and_yield(@easy_curl)
|
569
|
-
end
|
570
|
-
|
571
|
-
it "should set user agent if it's passed as an option" do
|
572
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, :user_agent => 'My cool application')
|
573
|
-
expect(@easy_curl.headers["User-Agent"]).to eq 'My cool application'
|
574
|
-
end
|
575
|
-
|
576
|
-
it "should set user agent to default if it's not passed as an option" do
|
577
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
578
|
-
expect(@easy_curl.headers["User-Agent"]).to eq Feedjira::Feed::USER_AGENT
|
579
|
-
end
|
580
|
-
|
581
|
-
it "should set if modified since as an option if passed" do
|
582
|
-
modified_time = Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT")
|
583
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {:if_modified_since => modified_time})
|
584
|
-
expect(modified_time).to be > @feed.last_modified
|
585
|
-
|
586
|
-
expect(@easy_curl.headers["If-Modified-Since"]).to eq modified_time
|
587
|
-
end
|
588
|
-
|
589
|
-
it 'should set follow location to true' do
|
590
|
-
expect(@easy_curl).to receive(:follow_location=).with(true)
|
591
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
592
|
-
end
|
593
|
-
|
594
|
-
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
595
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, :http_authentication => ['myusername', 'mypassword'])
|
596
|
-
expect(@easy_curl.userpwd).to eq 'myusername:mypassword'
|
597
|
-
end
|
598
|
-
|
599
|
-
it "should set if_none_match as an option if passed" do
|
600
|
-
@feed.etag = 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
601
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
602
|
-
expect(@easy_curl.headers["If-None-Match"]).to eq 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
603
|
-
end
|
604
|
-
|
605
|
-
describe 'on success' do
|
606
|
-
before(:each) do
|
607
|
-
@new_feed = @feed.clone
|
608
|
-
allow(@feed).to receive(:update_from_feed)
|
609
|
-
allow(Feedjira::Feed).to receive(:decode_content).and_return(@paul_feed[:xml])
|
610
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return(Feedjira::Parser::AtomFeedBurner)
|
611
|
-
allow(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_return(@new_feed)
|
612
|
-
allow(Feedjira::Feed).to receive(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
613
|
-
allow(Feedjira::Feed).to receive(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
|
614
|
-
end
|
615
|
-
|
616
|
-
it 'should parse the updated feed' do
|
617
|
-
expect(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_return(@new_feed)
|
618
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
619
|
-
@easy_curl.on_success.call(@easy_curl)
|
620
|
-
end
|
621
|
-
|
622
|
-
it 'should set the last effective url to the feed url' do
|
623
|
-
expect(@easy_curl).to receive(:last_effective_url).and_return(@paul_feed[:url])
|
624
|
-
expect(@new_feed).to receive(:feed_url=).with(@paul_feed[:url])
|
625
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
626
|
-
@easy_curl.on_success.call(@easy_curl)
|
627
|
-
end
|
628
|
-
|
629
|
-
it 'should set the etags on the feed' do
|
630
|
-
expect(@new_feed).to receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
631
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
632
|
-
@easy_curl.on_success.call(@easy_curl)
|
633
|
-
end
|
634
|
-
|
635
|
-
it 'should set the last modified on the feed' do
|
636
|
-
expect(@new_feed).to receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
|
637
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
638
|
-
@easy_curl.on_success.call(@easy_curl)
|
639
|
-
end
|
640
|
-
|
641
|
-
it 'should add the feed to the responses' do
|
642
|
-
responses = {}
|
643
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
|
644
|
-
@easy_curl.on_success.call(@easy_curl)
|
645
|
-
|
646
|
-
expect(responses.length).to eq 1
|
647
|
-
expect(responses['http://feeds.feedburner.com/PaulDixExplainsNothing']).to eq @feed
|
648
|
-
end
|
649
|
-
|
650
|
-
it 'should call proc if :on_success option is passed' do
|
651
|
-
success = lambda { |feed| }
|
652
|
-
expect(success).to receive(:call).with(@feed)
|
653
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
654
|
-
@easy_curl.on_success.call(@easy_curl)
|
655
|
-
end
|
656
|
-
|
657
|
-
it 'should call update from feed on the old feed with the updated feed' do
|
658
|
-
expect(@feed).to receive(:update_from_feed).with(@new_feed)
|
659
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
660
|
-
@easy_curl.on_success.call(@easy_curl)
|
661
|
-
end
|
662
|
-
|
663
|
-
describe 'when the parser invokes its on_failure callback' do
|
664
|
-
before(:each) do
|
665
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return FailParser
|
666
|
-
end
|
667
|
-
|
668
|
-
it 'invokes the on_failure callback' do
|
669
|
-
failure = double 'Failure callback', arity: 2
|
670
|
-
expect(failure).to receive(:call)
|
671
|
-
|
672
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, { on_failure: failure })
|
673
|
-
@easy_curl.on_success.call(@easy_curl)
|
674
|
-
end
|
675
|
-
end
|
676
|
-
end
|
677
|
-
|
678
|
-
describe 'on failure' do
|
679
|
-
before(:each) do
|
680
|
-
@headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
681
|
-
@body = 'Page could not be found.'
|
682
|
-
|
683
|
-
allow(@easy_curl).to receive(:response_code).and_return(404)
|
684
|
-
allow(@easy_curl).to receive(:header_str).and_return(@headers)
|
685
|
-
allow(@easy_curl).to receive(:body_str).and_return(@body)
|
686
|
-
end
|
687
|
-
|
688
|
-
it 'should call on success callback if the response code is 304' do
|
689
|
-
success = lambda { |feed| }
|
690
|
-
expect(success).to receive(:call).with(@feed)
|
691
|
-
expect(@easy_curl).to receive(:response_code).and_return(304)
|
692
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
693
|
-
@easy_curl.on_redirect.call(@easy_curl)
|
694
|
-
end
|
695
|
-
|
696
|
-
it 'should return the http code in the responses' do
|
697
|
-
responses = {}
|
698
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
|
699
|
-
@easy_curl.on_failure.call(@easy_curl)
|
700
|
-
|
701
|
-
expect(responses.length).to eq 1
|
702
|
-
expect(responses[@paul_feed[:url]]).to eq 404
|
703
|
-
end
|
704
|
-
end
|
705
|
-
end
|
706
|
-
|
707
|
-
describe "#fetch_and_parse" do
|
708
|
-
it "passes options to multicurl" do
|
709
|
-
options = { user_agent: '007' }
|
710
|
-
|
711
|
-
expect(Feedjira::Feed).to receive(:add_url_to_multi).
|
712
|
-
with(anything, anything, anything, anything, options)
|
713
|
-
|
714
|
-
Feedjira::Feed.fetch_and_parse(sample_rss_feed, options)
|
715
|
-
end
|
716
|
-
end
|
717
|
-
|
718
|
-
describe "#decode_content" do
|
719
|
-
before(:each) do
|
720
|
-
@curl_easy = double('curl_easy', :body_str => '<xml></xml>')
|
721
|
-
end
|
722
|
-
|
723
|
-
it 'should decode the response body using gzip if the Content-Encoding: is gzip' do
|
724
|
-
allow(@curl_easy).to receive(:header_str).and_return('Content-Encoding: gzip')
|
725
|
-
string_io = double('stringio', :read => @curl_easy.body_str, :close => true)
|
726
|
-
expect(StringIO).to receive(:new).and_return(string_io)
|
727
|
-
expect(Zlib::GzipReader).to receive(:new).with(string_io).and_return(string_io)
|
728
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
729
|
-
end
|
730
|
-
|
731
|
-
it 'should decode the response body using gzip if the Content-Encoding: is gzip even when the case is wrong' do
|
732
|
-
allow(@curl_easy).to receive(:header_str).and_return('content-encoding: gzip')
|
733
|
-
string_io = double('stringio', :read => @curl_easy.body_str, :close => true)
|
734
|
-
expect(StringIO).to receive(:new).and_return(string_io)
|
735
|
-
expect(Zlib::GzipReader).to receive(:new).with(string_io).and_return(string_io)
|
736
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
737
|
-
end
|
738
|
-
|
739
|
-
it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
|
740
|
-
allow(@curl_easy).to receive(:header_str).and_return('Content-Encoding: deflate')
|
741
|
-
expect(Zlib::Inflate).to receive(:inflate).with(@curl_easy.body_str)
|
742
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
743
|
-
end
|
744
|
-
|
745
|
-
it 'should deflate the response body using inflate if the Content-Encoding: is deflate event if the case is wrong' do
|
746
|
-
allow(@curl_easy).to receive(:header_str).and_return('content-encoding: deflate')
|
747
|
-
expect(Zlib::Inflate).to receive(:inflate).with(@curl_easy.body_str)
|
748
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
749
|
-
end
|
750
|
-
|
751
|
-
it 'should return the response body if it is not encoded' do
|
752
|
-
allow(@curl_easy).to receive(:header_str).and_return('')
|
753
|
-
expect(Feedjira::Feed.decode_content(@curl_easy)).to eq '<xml></xml>'
|
754
|
-
end
|
755
|
-
end
|
756
|
-
|
757
|
-
describe "#update" do
|
758
|
-
it "passes options to multicurl" do
|
759
|
-
options = { user_agent: '007' }
|
760
|
-
|
761
|
-
expect(Feedjira::Feed).to receive(:add_feed_to_multi).
|
762
|
-
with(anything, anything, anything, anything, options)
|
763
|
-
|
764
|
-
Feedjira::Feed.update([nil], options)
|
765
|
-
end
|
766
|
-
end
|
767
|
-
end
|
768
198
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedjira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Dix
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2015-06-05 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: sax-machine
|
@@ -28,19 +28,33 @@ dependencies:
|
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '1.0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
|
-
name:
|
31
|
+
name: faraday
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
requirements:
|
34
34
|
- - "~>"
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: '0.
|
36
|
+
version: '0.9'
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
40
40
|
requirements:
|
41
41
|
- - "~>"
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: '0.
|
43
|
+
version: '0.9'
|
44
|
+
- !ruby/object:Gem::Dependency
|
45
|
+
name: faraday_middleware
|
46
|
+
requirement: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - "~>"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0.9'
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0.9'
|
44
58
|
- !ruby/object:Gem::Dependency
|
45
59
|
name: loofah
|
46
60
|
requirement: !ruby/object:Gem::Requirement
|
@@ -173,7 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
173
187
|
version: '0'
|
174
188
|
requirements: []
|
175
189
|
rubyforge_project:
|
176
|
-
rubygems_version: 2.
|
190
|
+
rubygems_version: 2.4.6
|
177
191
|
signing_key:
|
178
192
|
specification_version: 4
|
179
193
|
summary: A feed fetching and parsing library
|