feedjira 1.6.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/feedjira.gemspec +4 -3
- data/lib/feedjira.rb +3 -1
- data/lib/feedjira/feed.rb +15 -387
- data/lib/feedjira/version.rb +1 -1
- data/spec/feedjira/feed_spec.rb +26 -596
- metadata +20 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a75fe90b3327d3110c7ba5e5ebe8acdbfc9e4c10
|
4
|
+
data.tar.gz: d4313aca74f021751d583f61c140a18aea2381b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4004f548a5b6a3215294d5b8ebd81496c4603eaab3ba3c2f1b269815010fcbdc12cef8660a10ddd398159a09cb30443131e9c83b6973bf9cad9a746d78728c29
|
7
|
+
data.tar.gz: f426881efa2533cdef5bb34eb0ff57bc86259abfd26eb916fadc79dea1b30f4b95262ffc53d1539494fc306a845b4b68cd35377d79d41bcfd73976a8387fa12d
|
data/CHANGELOG.md
CHANGED
data/feedjira.gemspec
CHANGED
@@ -19,9 +19,10 @@ Gem::Specification.new do |s|
|
|
19
19
|
|
20
20
|
s.platform = Gem::Platform::RUBY
|
21
21
|
|
22
|
-
s.add_dependency 'sax-machine',
|
23
|
-
s.add_dependency '
|
24
|
-
s.add_dependency '
|
22
|
+
s.add_dependency 'sax-machine', '~> 1.0'
|
23
|
+
s.add_dependency 'faraday', '~> 0.9'
|
24
|
+
s.add_dependency 'faraday_middleware', '~> 0.9'
|
25
|
+
s.add_dependency 'loofah', '~> 2.0'
|
25
26
|
|
26
27
|
s.add_development_dependency 'rspec', '~> 3.0'
|
27
28
|
end
|
data/lib/feedjira.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'zlib'
|
2
|
-
require '
|
2
|
+
require 'faraday'
|
3
|
+
require 'faraday_middleware'
|
3
4
|
require 'sax-machine'
|
4
5
|
require 'loofah'
|
5
6
|
|
@@ -27,4 +28,5 @@ require 'feedjira/parser/google_docs_atom'
|
|
27
28
|
|
28
29
|
module Feedjira
|
29
30
|
class NoParserAvailable < StandardError; end
|
31
|
+
class FetchFailure < StandardError; end
|
30
32
|
end
|
data/lib/feedjira/feed.rb
CHANGED
@@ -1,32 +1,9 @@
|
|
1
1
|
module Feedjira
|
2
2
|
class Feed
|
3
|
-
USER_AGENT = 'feedjira http://feedjira.com'
|
4
|
-
|
5
|
-
# Passes raw XML and callbacks to a parser.
|
6
|
-
# === Parameters
|
7
|
-
# [parser<Object>] The parser to pass arguments to - must respond to
|
8
|
-
# `parse` and should return a Feed object.
|
9
|
-
# [xml<String>] The XML that you would like parsed.
|
10
|
-
# === Returns
|
11
|
-
# An instance of the parser feed type.
|
12
3
|
def self.parse_with(parser, xml, &block)
|
13
4
|
parser.parse xml, &block
|
14
5
|
end
|
15
6
|
|
16
|
-
# Takes a raw XML feed and attempts to parse it. If no parser is available a Feedjira::NoParserAvailable exception is raised.
|
17
|
-
# You can pass a block to be called when there's an error during the parsing.
|
18
|
-
# === Parameters
|
19
|
-
# [xml<String>] The XML that you would like parsed.
|
20
|
-
# === Returns
|
21
|
-
# An instance of the determined feed type. By default, one of these:
|
22
|
-
# * Feedjira::Parser::RSSFeedBurner
|
23
|
-
# * Feedjira::Parser::GoogleDocsAtom
|
24
|
-
# * Feedjira::Parser::AtomFeedBurner
|
25
|
-
# * Feedjira::Parser::Atom
|
26
|
-
# * Feedjira::Parser::ITunesRSS
|
27
|
-
# * Feedjira::Parser::RSS
|
28
|
-
# === Raises
|
29
|
-
# Feedjira::NoParserAvailable : If no valid parser classes could be found for the feed.
|
30
7
|
def self.parse(xml, &block)
|
31
8
|
if parser = determine_feed_parser_for_xml(xml)
|
32
9
|
parse_with parser, xml, &block
|
@@ -35,31 +12,15 @@ module Feedjira
|
|
35
12
|
end
|
36
13
|
end
|
37
14
|
|
38
|
-
# Determines the correct parser class to use for parsing the feed.
|
39
|
-
#
|
40
|
-
# === Parameters
|
41
|
-
# [xml<String>] The XML that you would like determine the parser for.
|
42
|
-
# === Returns
|
43
|
-
# The class name of the parser that can handle the XML.
|
44
15
|
def self.determine_feed_parser_for_xml(xml)
|
45
16
|
start_of_doc = xml.slice(0, 2000)
|
46
17
|
feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
|
47
18
|
end
|
48
19
|
|
49
|
-
# Adds a new feed parsing class that will be used for parsing.
|
50
|
-
#
|
51
|
-
# === Parameters
|
52
|
-
# [klass<Constant>] The class/constant that you want to register.
|
53
|
-
# === Returns
|
54
|
-
# A updated array of feed parser class names.
|
55
20
|
def self.add_feed_class(klass)
|
56
21
|
feed_classes.unshift klass
|
57
22
|
end
|
58
23
|
|
59
|
-
# Provides a list of registered feed parsing classes.
|
60
|
-
#
|
61
|
-
# === Returns
|
62
|
-
# A array of class names.
|
63
24
|
def self.feed_classes
|
64
25
|
@feed_classes ||= [
|
65
26
|
Feedjira::Parser::RSSFeedBurner,
|
@@ -71,61 +32,29 @@ module Feedjira
|
|
71
32
|
]
|
72
33
|
end
|
73
34
|
|
74
|
-
# Makes all registered feeds types look for the passed in element to parse.
|
75
|
-
# This is actually just a call to element (a SAXMachine call) in the class.
|
76
|
-
#
|
77
|
-
# === Parameters
|
78
|
-
# [element_tag<String>] The element tag
|
79
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
80
35
|
def self.add_common_feed_element(element_tag, options = {})
|
81
36
|
feed_classes.each do |k|
|
82
37
|
k.element element_tag, options
|
83
38
|
end
|
84
39
|
end
|
85
40
|
|
86
|
-
# Makes all registered feeds types look for the passed in elements to parse.
|
87
|
-
# This is actually just a call to elements (a SAXMachine call) in the class.
|
88
|
-
#
|
89
|
-
# === Parameters
|
90
|
-
# [element_tag<String>] The element tag
|
91
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
92
41
|
def self.add_common_feed_elements(element_tag, options = {})
|
93
42
|
feed_classes.each do |k|
|
94
43
|
k.elements element_tag, options
|
95
44
|
end
|
96
45
|
end
|
97
46
|
|
98
|
-
# Makes all registered entry types look for the passed in element to parse.
|
99
|
-
# This is actually just a call to element (a SAXMachine call) in the class.
|
100
|
-
#
|
101
|
-
# === Parameters
|
102
|
-
# [element_tag<String>]
|
103
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
104
47
|
def self.add_common_feed_entry_element(element_tag, options = {})
|
105
48
|
call_on_each_feed_entry :element, element_tag, options
|
106
49
|
end
|
107
50
|
|
108
|
-
# Makes all registered entry types look for the passed in elements to parse.
|
109
|
-
# This is actually just a call to element (a SAXMachine call) in the class.
|
110
|
-
#
|
111
|
-
# === Parameters
|
112
|
-
# [element_tag<String>]
|
113
|
-
# [options<Hash>] Valid keys are same as with SAXMachine
|
114
51
|
def self.add_common_feed_entry_elements(element_tag, options = {})
|
115
52
|
call_on_each_feed_entry :elements, element_tag, options
|
116
53
|
end
|
117
54
|
|
118
|
-
# Call a method on all feed entries classes.
|
119
|
-
#
|
120
|
-
# === Parameters
|
121
|
-
# [method<Symbol>] The method name
|
122
|
-
# [parameters<Array>] The method parameters
|
123
55
|
def self.call_on_each_feed_entry(method, *parameters)
|
124
56
|
feed_classes.each do |k|
|
125
|
-
# iterate on the collections defined in the sax collection
|
126
57
|
k.sax_config.collection_elements.each_value do |vl|
|
127
|
-
# vl is a list of CollectionConfig mapped to an attribute name
|
128
|
-
# we'll look for the one set as 'entries' and add the new element
|
129
58
|
vl.find_all{|v| (v.accessor == 'entries') && (v.data_class.class == Class)}.each do |v|
|
130
59
|
v.data_class.send(method, *parameters)
|
131
60
|
end
|
@@ -133,325 +62,24 @@ module Feedjira
|
|
133
62
|
end
|
134
63
|
end
|
135
64
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
# * :cookiefile - file to read cookies
|
143
|
-
# * :cookies - contents of cookies header
|
144
|
-
# * :http_authentication - array containing username, then password
|
145
|
-
# * :proxy_url - proxy url
|
146
|
-
# * :proxy_port - proxy port
|
147
|
-
# * :max_redirects - max number of redirections
|
148
|
-
# * :timeout - timeout
|
149
|
-
# * :ssl_verify_host - boolean
|
150
|
-
# * :ssl_verify_peer - boolean
|
151
|
-
# * :ssl_version - the ssl version to use, see OpenSSL::SSL::SSLContext::METHODS for options
|
152
|
-
def self.setup_easy(curl, options={})
|
153
|
-
curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
|
154
|
-
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
155
|
-
curl.headers["Accept-Language"] = options[:language] if options.has_key?(:language)
|
156
|
-
curl.enable_cookies = options[:enable_cookies] if options.has_key?(:enable_cookies)
|
157
|
-
curl.cookiefile = options[:cookiefile] if options.has_key?(:cookiefile)
|
158
|
-
curl.cookies = options[:cookies] if options.has_key?(:cookies)
|
159
|
-
|
160
|
-
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
161
|
-
curl.proxy_url = options[:proxy_url] if options.has_key?(:proxy_url)
|
162
|
-
curl.proxy_port = options[:proxy_port] if options.has_key?(:proxy_port)
|
163
|
-
curl.max_redirects = options[:max_redirects] if options[:max_redirects]
|
164
|
-
curl.timeout = options[:timeout] if options[:timeout]
|
165
|
-
curl.ssl_verify_host = options[:ssl_verify_host] if options.has_key?(:ssl_verify_host)
|
166
|
-
curl.ssl_verify_peer = options[:ssl_verify_peer] if options.has_key?(:ssl_verify_peer)
|
167
|
-
curl.ssl_version = options[:ssl_version] if options.has_key?(:ssl_version)
|
168
|
-
|
169
|
-
curl.follow_location = true
|
170
|
-
end
|
171
|
-
|
172
|
-
# Fetches and returns the raw XML for each URL provided.
|
173
|
-
#
|
174
|
-
# === Parameters
|
175
|
-
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
176
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
177
|
-
# :if_modified_since - Time object representing when the feed was last updated.
|
178
|
-
# :if_none_match - String that's normally an etag for the request that was stored previously.
|
179
|
-
# :on_success - Block that gets executed after a successful request.
|
180
|
-
# :on_failure - Block that gets executed after a failed request.
|
181
|
-
# * all parameters defined in setup_easy
|
182
|
-
# === Returns
|
183
|
-
# A String of XML if a single URL is passed.
|
184
|
-
#
|
185
|
-
# A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
|
186
|
-
def self.fetch_raw(urls, options = {})
|
187
|
-
url_queue = [*urls]
|
188
|
-
multi = Curl::Multi.new
|
189
|
-
responses = {}
|
190
|
-
url_queue.each do |url|
|
191
|
-
easy = Curl::Easy.new(url) do |curl|
|
192
|
-
setup_easy curl, options
|
193
|
-
|
194
|
-
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
195
|
-
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
196
|
-
|
197
|
-
curl.on_success do |c|
|
198
|
-
responses[url] = decode_content(c)
|
199
|
-
end
|
200
|
-
|
201
|
-
curl.on_complete do |c, err|
|
202
|
-
responses[url] = c.response_code unless responses.has_key?(url)
|
203
|
-
end
|
204
|
-
end
|
205
|
-
multi.add(easy)
|
206
|
-
end
|
207
|
-
|
208
|
-
multi.perform
|
209
|
-
urls.is_a?(String) ? responses.values.first : responses
|
210
|
-
end
|
211
|
-
|
212
|
-
# Fetches and returns the parsed XML for each URL provided.
|
213
|
-
#
|
214
|
-
# === Parameters
|
215
|
-
# [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
|
216
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
217
|
-
# * :user_agent - String that overrides the default user agent.
|
218
|
-
# * :if_modified_since - Time object representing when the feed was last updated.
|
219
|
-
# * :if_none_match - String, an etag for the request that was stored previously.
|
220
|
-
# * :on_success - Block that gets executed after a successful request.
|
221
|
-
# * :on_failure - Block that gets executed after a failed request.
|
222
|
-
# === Returns
|
223
|
-
# A Feed object if a single URL is passed.
|
224
|
-
#
|
225
|
-
# A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
|
226
|
-
def self.fetch_and_parse(urls, options = {})
|
227
|
-
url_queue = [*urls]
|
228
|
-
multi = Curl::Multi.new
|
229
|
-
responses = {}
|
230
|
-
|
231
|
-
# I broke these down so I would only try to do 30 simultaneously because
|
232
|
-
# I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
|
233
|
-
url_queue.slice!(0, 30).each do |url|
|
234
|
-
add_url_to_multi(multi, url, url_queue, responses, options)
|
235
|
-
end
|
65
|
+
def self.fetch_and_parse(url)
|
66
|
+
response = connection(url).get
|
67
|
+
raise FetchFailure.new("Fetch failed - #{response.status}") unless response.success?
|
68
|
+
xml = response.body
|
69
|
+
parser_klass = determine_feed_parser_for_xml xml
|
70
|
+
raise NoParserAvailable.new("No valid parser for XML.") unless parser_klass
|
236
71
|
|
237
|
-
|
238
|
-
|
72
|
+
feed = parse_with parser_klass, xml
|
73
|
+
feed.feed_url = url
|
74
|
+
feed.etag = response.headers['etag'].to_s.gsub(/"/, '')
|
75
|
+
feed.last_modified = response.headers['last-modified']
|
76
|
+
feed
|
239
77
|
end
|
240
78
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
# === Returns
|
246
|
-
# A decoded string of XML.
|
247
|
-
def self.decode_content(c)
|
248
|
-
if c.header_str.match(/Content-Encoding: gzip/i)
|
249
|
-
begin
|
250
|
-
gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
|
251
|
-
xml = gz.read
|
252
|
-
gz.close
|
253
|
-
rescue Zlib::GzipFile::Error
|
254
|
-
# Maybe this is not gzipped?
|
255
|
-
xml = c.body_str
|
256
|
-
end
|
257
|
-
elsif c.header_str.match(/Content-Encoding: deflate/i)
|
258
|
-
xml = Zlib::Inflate.inflate(c.body_str)
|
259
|
-
else
|
260
|
-
xml = c.body_str
|
261
|
-
end
|
262
|
-
|
263
|
-
xml
|
264
|
-
end
|
265
|
-
|
266
|
-
# Updates each feed for each Feed object provided.
|
267
|
-
#
|
268
|
-
# === Parameters
|
269
|
-
# [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
|
270
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
271
|
-
# * :on_success - Block that gets executed after a successful request.
|
272
|
-
# * :on_failure - Block that gets executed after a failed request.
|
273
|
-
# * all parameters defined in setup_easy
|
274
|
-
# === Returns
|
275
|
-
# A updated Feed object if a single URL is passed.
|
276
|
-
#
|
277
|
-
# A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
|
278
|
-
def self.update(feeds, options = {})
|
279
|
-
feed_queue = [*feeds]
|
280
|
-
multi = Curl::Multi.new
|
281
|
-
responses = {}
|
282
|
-
|
283
|
-
feed_queue.slice!(0, 30).each do |feed|
|
284
|
-
add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
285
|
-
end
|
286
|
-
|
287
|
-
multi.perform
|
288
|
-
feeds.is_a?(Array) ? responses : responses.values.first
|
289
|
-
end
|
290
|
-
|
291
|
-
# An abstraction for adding a feed by URL to the passed Curb::multi stack.
|
292
|
-
#
|
293
|
-
# === Parameters
|
294
|
-
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
295
|
-
# [url<String>] The URL of the feed that you would like to be fetched.
|
296
|
-
# [url_queue<Array>] An array of URLs that are queued for request.
|
297
|
-
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
298
|
-
# [feeds<String> or <Array>] A single feed object, or an array of feed objects.
|
299
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
300
|
-
# * :on_success - Block that gets executed after a successful request.
|
301
|
-
# * :on_failure - Block that gets executed after a failed request.
|
302
|
-
# * all parameters defined in setup_easy
|
303
|
-
# === Returns
|
304
|
-
# The updated Curl::Multi object with the request details added to it's stack.
|
305
|
-
def self.add_url_to_multi(multi, url, url_queue, responses, options)
|
306
|
-
easy = Curl::Easy.new(url) do |curl|
|
307
|
-
setup_easy curl, options
|
308
|
-
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
309
|
-
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
310
|
-
|
311
|
-
curl.on_success do |c|
|
312
|
-
xml = decode_content(c)
|
313
|
-
klass = determine_feed_parser_for_xml(xml)
|
314
|
-
|
315
|
-
if klass
|
316
|
-
begin
|
317
|
-
feed = parse_with klass, xml, &on_parser_failure(url)
|
318
|
-
|
319
|
-
feed.feed_url = c.last_effective_url
|
320
|
-
feed.etag = etag_from_header(c.header_str)
|
321
|
-
feed.last_modified = last_modified_from_header(c.header_str)
|
322
|
-
responses[url] = feed
|
323
|
-
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
324
|
-
rescue Exception => e
|
325
|
-
call_on_failure(c, e, options[:on_failure])
|
326
|
-
end
|
327
|
-
else
|
328
|
-
call_on_failure(c, "Can't determine a parser", options[:on_failure])
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
#
|
333
|
-
# trigger on_failure for 404s
|
334
|
-
#
|
335
|
-
curl.on_complete do |c|
|
336
|
-
add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
|
337
|
-
responses[url] = c.response_code unless responses.has_key?(url)
|
338
|
-
end
|
339
|
-
|
340
|
-
curl.on_redirect do |c|
|
341
|
-
if c.response_code == 304 # it's not modified. this isn't an error condition
|
342
|
-
options[:on_success].call(url, nil) if options.has_key?(:on_success)
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
curl.on_missing do |c|
|
347
|
-
if c.response_code == 404 && options.has_key?(:on_failure)
|
348
|
-
call_on_failure(c, 'Server returned a 404', options[:on_failure])
|
349
|
-
end
|
350
|
-
end
|
351
|
-
|
352
|
-
curl.on_failure do |c, err|
|
353
|
-
responses[url] = c.response_code
|
354
|
-
call_on_failure(c, err, options[:on_failure])
|
355
|
-
end
|
356
|
-
end
|
357
|
-
multi.add(easy)
|
358
|
-
end
|
359
|
-
|
360
|
-
# An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
|
361
|
-
#
|
362
|
-
# === Parameters
|
363
|
-
# [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
|
364
|
-
# [feed<Feed>] A feed object that you would like to be fetched.
|
365
|
-
# [url_queue<Array>] An array of feed objects that are queued for request.
|
366
|
-
# [responses<Hash>] Existing responses that you want the response from the request added to.
|
367
|
-
# [feeds<String>] or <Array> A single feed object, or an array of feed objects.
|
368
|
-
# [options<Hash>] Valid keys for this argument as as followed:
|
369
|
-
# * :on_success - Block that gets executed after a successful request.
|
370
|
-
# * :on_failure - Block that gets executed after a failed request.
|
371
|
-
# * all parameters defined in setup_easy
|
372
|
-
# === Returns
|
373
|
-
# The updated Curl::Multi object with the request details added to it's stack.
|
374
|
-
def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
|
375
|
-
easy = Curl::Easy.new(feed.feed_url) do |curl|
|
376
|
-
setup_easy curl, options
|
377
|
-
curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
|
378
|
-
curl.headers["If-Modified-Since"] = options[:if_modified_since] if options[:if_modified_since] && (!feed.last_modified || (Time.parse(options[:if_modified_since].to_s) > feed.last_modified))
|
379
|
-
curl.headers["If-None-Match"] = feed.etag if feed.etag
|
380
|
-
|
381
|
-
curl.on_success do |c|
|
382
|
-
begin
|
383
|
-
updated_feed = Feed.parse c.body_str, &on_parser_failure(feed.feed_url)
|
384
|
-
|
385
|
-
updated_feed.feed_url = c.last_effective_url
|
386
|
-
updated_feed.etag = etag_from_header(c.header_str)
|
387
|
-
updated_feed.last_modified = last_modified_from_header(c.header_str)
|
388
|
-
feed.update_from_feed(updated_feed)
|
389
|
-
responses[feed.feed_url] = feed
|
390
|
-
options[:on_success].call(feed) if options.has_key?(:on_success)
|
391
|
-
rescue Exception => e
|
392
|
-
call_on_failure(c, e, options[:on_failure])
|
393
|
-
end
|
394
|
-
end
|
395
|
-
|
396
|
-
curl.on_failure do |c, err| # response code 50X
|
397
|
-
responses[feed.feed_url] = c.response_code
|
398
|
-
call_on_failure(c, 'Server returned a 404', options[:on_failure])
|
399
|
-
end
|
400
|
-
|
401
|
-
curl.on_redirect do |c, err| # response code 30X
|
402
|
-
if c.response_code == 304
|
403
|
-
options[:on_success].call(feed) if options.has_key?(:on_success)
|
404
|
-
else
|
405
|
-
responses[feed.feed_url] = c.response_code
|
406
|
-
call_on_failure(c, err, options[:on_failure])
|
407
|
-
end
|
408
|
-
end
|
409
|
-
|
410
|
-
curl.on_complete do |c|
|
411
|
-
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
412
|
-
responses[feed.feed_url] = feed unless responses.has_key?(feed.feed_url)
|
413
|
-
end
|
414
|
-
end
|
415
|
-
multi.add(easy)
|
416
|
-
end
|
417
|
-
|
418
|
-
# Determines the etag from the request headers.
|
419
|
-
#
|
420
|
-
# === Parameters
|
421
|
-
# [header<String>] Raw request header returned from the request
|
422
|
-
# === Returns
|
423
|
-
# A string of the etag or nil if it cannot be found in the headers.
|
424
|
-
def self.etag_from_header(header)
|
425
|
-
header =~ /.*ETag:\s(.*)\r/
|
426
|
-
$1
|
427
|
-
end
|
428
|
-
|
429
|
-
# Determines the last modified date from the request headers.
|
430
|
-
#
|
431
|
-
# === Parameters
|
432
|
-
# [header<String>] Raw request header returned from the request
|
433
|
-
# === Returns
|
434
|
-
# A Time object of the last modified date or nil if it cannot be found in the headers.
|
435
|
-
def self.last_modified_from_header(header)
|
436
|
-
header =~ /.*Last-Modified:\s(.*)\r/
|
437
|
-
Time.parse_safely($1) if $1
|
438
|
-
end
|
439
|
-
|
440
|
-
class << self
|
441
|
-
private
|
442
|
-
|
443
|
-
def on_parser_failure(url)
|
444
|
-
Proc.new { |message| raise "Error while parsing [#{url}] #{message}" }
|
445
|
-
end
|
446
|
-
|
447
|
-
def call_on_failure(c, error, on_failure)
|
448
|
-
if on_failure
|
449
|
-
if on_failure.arity == 2
|
450
|
-
on_failure.call(c, error)
|
451
|
-
else
|
452
|
-
warn "on_failure proc with invalid parameters number #{on_failure.arity} instead of 2, ignoring it"
|
453
|
-
end
|
454
|
-
end
|
79
|
+
def self.connection(url)
|
80
|
+
Faraday.new(url: url) do |conn|
|
81
|
+
conn.use FaradayMiddleware::FollowRedirects, limit: 3
|
82
|
+
conn.adapter :net_http
|
455
83
|
end
|
456
84
|
end
|
457
85
|
end
|
data/lib/feedjira/version.rb
CHANGED
data/spec/feedjira/feed_spec.rb
CHANGED
@@ -9,6 +9,32 @@ class FailParser
|
|
9
9
|
end
|
10
10
|
|
11
11
|
describe Feedjira::Feed do
|
12
|
+
describe '.fetch_and_parse' do
|
13
|
+
it 'raises an error when the fetch fails' do
|
14
|
+
url = 'http://www.example.com/feed.xml'
|
15
|
+
expect {
|
16
|
+
Feedjira::Feed.fetch_and_parse url
|
17
|
+
}.to raise_error Feedjira::FetchFailure
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'raises an error when no parser can be found' do
|
21
|
+
url = 'http://feedjira.com'
|
22
|
+
expect {
|
23
|
+
Feedjira::Feed.fetch_and_parse url
|
24
|
+
}.to raise_error Feedjira::NoParserAvailable
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'fetches and parses the feed' do
|
28
|
+
url = 'http://feedjira.com/blog/feed.xml'
|
29
|
+
feed = Feedjira::Feed.fetch_and_parse url
|
30
|
+
|
31
|
+
expect(feed.class).to eq Feedjira::Parser::Atom
|
32
|
+
expect(feed.entries.count).to eq 3
|
33
|
+
expect(feed.feed_url).to eq url
|
34
|
+
expect(feed.etag).to eq 'a22ad-3190-5037e71966e80'
|
35
|
+
expect(feed.last_modified).to eq 'Sat, 20 Sep 2014 12:34:50 GMT'
|
36
|
+
end
|
37
|
+
end
|
12
38
|
|
13
39
|
describe "#add_common_feed_element" do
|
14
40
|
before(:all) do
|
@@ -151,76 +177,6 @@ describe Feedjira::Feed do
|
|
151
177
|
|
152
178
|
end
|
153
179
|
|
154
|
-
describe "#setup_easy" do
|
155
|
-
class MockCurl
|
156
|
-
attr_accessor :follow_location, :userpwd, :proxy_url, :proxy_port, :max_redirects, :timeout, :ssl_verify_host, :ssl_verify_peer, :ssl_version, :enable_cookies, :cookiefile, :cookies
|
157
|
-
|
158
|
-
def headers
|
159
|
-
@headers ||= {}
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
let(:curl) { MockCurl.new }
|
164
|
-
|
165
|
-
it "sets defaults on curl" do
|
166
|
-
Feedjira::Feed.setup_easy curl
|
167
|
-
|
168
|
-
expect(curl.headers["User-Agent"]).to eq Feedjira::Feed::USER_AGENT
|
169
|
-
expect(curl.follow_location).to eq true
|
170
|
-
end
|
171
|
-
|
172
|
-
it "allows user agent over-ride" do
|
173
|
-
Feedjira::Feed.setup_easy(curl, user_agent: '007')
|
174
|
-
expect(curl.headers["User-Agent"]).to eq '007'
|
175
|
-
end
|
176
|
-
|
177
|
-
it "allows to set language" do
|
178
|
-
Feedjira::Feed.setup_easy(curl, language: 'en-US')
|
179
|
-
expect(curl.headers["Accept-Language"]).to eq 'en-US'
|
180
|
-
end
|
181
|
-
|
182
|
-
it "enables compression" do
|
183
|
-
Feedjira::Feed.setup_easy(curl, compress: true)
|
184
|
-
expect(curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
185
|
-
end
|
186
|
-
|
187
|
-
it "enables compression even when you act like you don't want it" do
|
188
|
-
Feedjira::Feed.setup_easy(curl, compress: false)
|
189
|
-
expect(curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
190
|
-
end
|
191
|
-
|
192
|
-
it "sets up http auth" do
|
193
|
-
Feedjira::Feed.setup_easy(curl, http_authentication: ['user', 'pass'])
|
194
|
-
expect(curl.userpwd).to eq 'user:pass'
|
195
|
-
end
|
196
|
-
|
197
|
-
it "passes known options to curl" do
|
198
|
-
known_options = {
|
199
|
-
enable_cookies: true,
|
200
|
-
cookiefile: 'cookies.txt',
|
201
|
-
cookies: 'asdf',
|
202
|
-
proxy_url: 'http://proxy.url.com',
|
203
|
-
proxy_port: '1234',
|
204
|
-
max_redirects: 2,
|
205
|
-
timeout: 500,
|
206
|
-
ssl_verify_host: true,
|
207
|
-
ssl_verify_peer: true,
|
208
|
-
ssl_version: :omg
|
209
|
-
}
|
210
|
-
|
211
|
-
Feedjira::Feed.setup_easy curl, known_options
|
212
|
-
|
213
|
-
known_options.each do |option|
|
214
|
-
key, value = option
|
215
|
-
expect(curl.send(key)).to eq value
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
it "ignores unknown options" do
|
220
|
-
expect { Feedjira::Feed.setup_easy curl, foo: :bar }.to_not raise_error
|
221
|
-
end
|
222
|
-
end
|
223
|
-
|
224
180
|
describe "when adding feed types" do
|
225
181
|
it "should prioritize added types over the built in ones" do
|
226
182
|
feed_text = "Atom asdf"
|
@@ -239,530 +195,4 @@ describe Feedjira::Feed do
|
|
239
195
|
Feedjira::Feed.feed_classes.reject! {|o| o == new_feed_type }
|
240
196
|
end
|
241
197
|
end
|
242
|
-
|
243
|
-
describe '#etag_from_header' do
|
244
|
-
before(:each) do
|
245
|
-
@header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
|
246
|
-
end
|
247
|
-
|
248
|
-
it "should return the etag from the header if it exists" do
|
249
|
-
expect(Feedjira::Feed.etag_from_header(@header)).to eq "ziEyTl4q9GH04BR4jgkImd0GvSE"
|
250
|
-
end
|
251
|
-
|
252
|
-
it "should return nil if there is no etag in the header" do
|
253
|
-
expect(Feedjira::Feed.etag_from_header("foo")).to be_nil
|
254
|
-
end
|
255
|
-
|
256
|
-
end
|
257
|
-
|
258
|
-
describe '#last_modified_from_header' do
|
259
|
-
before(:each) do
|
260
|
-
@header = "HTTP/1.0 200 OK\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\nETag: ziEyTl4q9GH04BR4jgkImd0GvSE\r\nP3P: CP=\"ALL DSP COR NID CUR OUR NOR\"\r\nConnection: close\r\nContent-Type: text/xml;charset=utf-8\r\n\r\n"
|
261
|
-
end
|
262
|
-
|
263
|
-
it "should return the last modified date from the header if it exists" do
|
264
|
-
expect(Feedjira::Feed.last_modified_from_header(@header)).to eq Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT")
|
265
|
-
end
|
266
|
-
|
267
|
-
it "should return nil if there is no last modified date in the header" do
|
268
|
-
expect(Feedjira::Feed.last_modified_from_header("foo")).to be_nil
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
describe "fetching feeds" do
|
273
|
-
before(:each) do
|
274
|
-
@paul_feed = { :xml => load_sample("PaulDixExplainsNothing.xml"), :url => "http://feeds.feedburner.com/PaulDixExplainsNothing" }
|
275
|
-
@trotter_feed = { :xml => load_sample("TrotterCashionHome.xml"), :url => "http://feeds2.feedburner.com/trottercashion" }
|
276
|
-
@invalid_feed = { :xml => 'This feed is invalid', :url => "http://feeds.feedburner.com/InvalidFeed" }
|
277
|
-
end
|
278
|
-
|
279
|
-
describe "#fetch_raw" do
|
280
|
-
before(:each) do
|
281
|
-
@cmock = double('cmock', :header_str => '', :body_str => @paul_feed[:xml] )
|
282
|
-
@multi = double('curl_multi', :add => true, :perform => true)
|
283
|
-
@curl_easy = double('curl_easy')
|
284
|
-
@curl = double('curl', :headers => {}, :follow_location= => true, :on_failure => true, :on_complete => true)
|
285
|
-
allow(@curl).to receive(:on_success).and_yield(@cmock)
|
286
|
-
|
287
|
-
allow(Curl::Multi).to receive(:new).and_return(@multi)
|
288
|
-
allow(Curl::Easy).to receive(:new).and_yield(@curl).and_return(@curl_easy)
|
289
|
-
end
|
290
|
-
|
291
|
-
it "should set user agent if it's passed as an option" do
|
292
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :user_agent => 'Custom Useragent')
|
293
|
-
expect(@curl.headers['User-Agent']).to eq 'Custom Useragent'
|
294
|
-
end
|
295
|
-
|
296
|
-
it "should set user agent to default if it's not passed as an option" do
|
297
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url])
|
298
|
-
expect(@curl.headers['User-Agent']).to eq Feedjira::Feed::USER_AGENT
|
299
|
-
end
|
300
|
-
|
301
|
-
it "should set if modified since as an option if passed" do
|
302
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :if_modified_since => Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT"))
|
303
|
-
expect(@curl.headers["If-Modified-Since"]).to eq 'Wed, 28 Jan 2009 04:10:32 GMT'
|
304
|
-
end
|
305
|
-
|
306
|
-
it "should set if none match as an option if passed" do
|
307
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
308
|
-
expect(@curl.headers["If-None-Match"]).to eq 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
309
|
-
end
|
310
|
-
|
311
|
-
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
312
|
-
expect(@curl).to receive(:userpwd=).with('username:password')
|
313
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :http_authentication => ['username', 'password'])
|
314
|
-
end
|
315
|
-
|
316
|
-
it 'should set accepted encodings' do
|
317
|
-
Feedjira::Feed.fetch_raw(@paul_feed[:url], :compress => true)
|
318
|
-
expect(@curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
319
|
-
end
|
320
|
-
|
321
|
-
it "should return raw xml" do
|
322
|
-
raw_xml = Feedjira::Feed.fetch_raw @paul_feed[:url]
|
323
|
-
expect(raw_xml).to match /^#{Regexp.escape('<?xml version="1.0" encoding="UTF-8"?>')}/
|
324
|
-
end
|
325
|
-
|
326
|
-
it "should take multiple feed urls and return a hash of urls and response xml" do
|
327
|
-
multi = double('curl_multi', :add => true, :perform => true)
|
328
|
-
allow(Curl::Multi).to receive(:new).and_return(multi)
|
329
|
-
|
330
|
-
paul_response = double('paul_response', :header_str => '', :body_str => @paul_feed[:xml] )
|
331
|
-
trotter_response = double('trotter_response', :header_str => '', :body_str => @trotter_feed[:xml] )
|
332
|
-
|
333
|
-
paul_curl = double('paul_curl', :headers => {}, :follow_location= => true, :on_failure => true, :on_complete => true)
|
334
|
-
allow(paul_curl).to receive(:on_success).and_yield(paul_response)
|
335
|
-
|
336
|
-
trotter_curl = double('trotter_curl', :headers => {}, :follow_location= => true, :on_failure => true, :on_complete => true)
|
337
|
-
allow(trotter_curl).to receive(:on_success).and_yield(trotter_response)
|
338
|
-
|
339
|
-
expect(Curl::Easy).to receive(:new).with(@paul_feed[:url]).ordered.and_yield(paul_curl)
|
340
|
-
expect(Curl::Easy).to receive(:new).with(@trotter_feed[:url]).ordered.and_yield(trotter_curl)
|
341
|
-
|
342
|
-
results = Feedjira::Feed.fetch_raw([@paul_feed[:url], @trotter_feed[:url]])
|
343
|
-
expect(results.keys).to include(@paul_feed[:url])
|
344
|
-
expect(results.keys).to include(@trotter_feed[:url])
|
345
|
-
expect(results[@paul_feed[:url]]).to match /Paul Dix/
|
346
|
-
expect(results[@trotter_feed[:url]]).to match /Trotter Cashion/
|
347
|
-
end
|
348
|
-
|
349
|
-
it "should always return a hash when passed an array" do
|
350
|
-
results = Feedjira::Feed.fetch_raw([@paul_feed[:url]])
|
351
|
-
expect(results.class).to eq Hash
|
352
|
-
end
|
353
|
-
end
|
354
|
-
|
355
|
-
describe "#add_url_to_multi" do
|
356
|
-
before(:each) do
|
357
|
-
allow_message_expectations_on_nil
|
358
|
-
@multi = Curl::Multi.get([@paul_feed[:url]], {:follow_location => true}, {:pipeline => true})
|
359
|
-
allow(@multi).to receive(:add)
|
360
|
-
@easy_curl = Curl::Easy.new(@paul_feed[:url])
|
361
|
-
|
362
|
-
allow(Curl::Easy).to receive(:new).and_yield(@easy_curl)
|
363
|
-
end
|
364
|
-
|
365
|
-
it "should set user agent if it's passed as an option" do
|
366
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :user_agent => 'My cool application')
|
367
|
-
expect(@easy_curl.headers["User-Agent"]).to eq 'My cool application'
|
368
|
-
end
|
369
|
-
|
370
|
-
it "should set user agent to default if it's not passed as an option" do
|
371
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
372
|
-
expect(@easy_curl.headers["User-Agent"]).to eq Feedjira::Feed::USER_AGENT
|
373
|
-
end
|
374
|
-
|
375
|
-
it "should set if modified since as an option if passed" do
|
376
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_modified_since => Time.parse_safely("Jan 25 2009 04:10:32 GMT"))
|
377
|
-
expect(@easy_curl.headers["If-Modified-Since"]).to eq 'Sun, 25 Jan 2009 04:10:32 GMT'
|
378
|
-
end
|
379
|
-
|
380
|
-
it 'should set follow location to true' do
|
381
|
-
expect(@easy_curl).to receive(:follow_location=).with(true)
|
382
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
383
|
-
end
|
384
|
-
|
385
|
-
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
386
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :http_authentication => ['myusername', 'mypassword'])
|
387
|
-
expect(@easy_curl.userpwd).to eq 'myusername:mypassword'
|
388
|
-
end
|
389
|
-
|
390
|
-
it 'should set accepted encodings' do
|
391
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {:compress => true})
|
392
|
-
expect(@easy_curl.headers["Accept-encoding"]).to eq 'gzip, deflate'
|
393
|
-
end
|
394
|
-
|
395
|
-
it "should set if_none_match as an option if passed" do
|
396
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
397
|
-
expect(@easy_curl.headers["If-None-Match"]).to eq 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
398
|
-
end
|
399
|
-
|
400
|
-
describe 'on success' do
|
401
|
-
before(:each) do
|
402
|
-
@feed = double('feed', :feed_url= => true, :etag= => true, :last_modified= => true)
|
403
|
-
allow(Feedjira::Feed).to receive(:decode_content).and_return(@paul_feed[:xml])
|
404
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return(Feedjira::Parser::AtomFeedBurner)
|
405
|
-
allow(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_return(@feed)
|
406
|
-
allow(Feedjira::Feed).to receive(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
407
|
-
allow(Feedjira::Feed).to receive(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
|
408
|
-
end
|
409
|
-
|
410
|
-
it 'should decode the response body' do
|
411
|
-
expect(Feedjira::Feed).to receive(:decode_content).with(@easy_curl).and_return(@paul_feed[:xml])
|
412
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
413
|
-
@easy_curl.on_success.call(@easy_curl)
|
414
|
-
end
|
415
|
-
|
416
|
-
it 'should determine the xml parser class' do
|
417
|
-
expect(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).with(@paul_feed[:xml]).and_return(Feedjira::Parser::AtomFeedBurner)
|
418
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
419
|
-
@easy_curl.on_success.call(@easy_curl)
|
420
|
-
end
|
421
|
-
|
422
|
-
it 'should parse the xml' do
|
423
|
-
expect(Feedjira::Parser::AtomFeedBurner).to receive(:parse).
|
424
|
-
with(@paul_feed[:xml]).and_return(@feed)
|
425
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
426
|
-
@easy_curl.on_success.call(@easy_curl)
|
427
|
-
end
|
428
|
-
|
429
|
-
describe 'when a compatible xml parser class is found' do
|
430
|
-
it 'should set the last effective url to the feed url' do
|
431
|
-
expect(@easy_curl).to receive(:last_effective_url).and_return(@paul_feed[:url])
|
432
|
-
expect(@feed).to receive(:feed_url=).with(@paul_feed[:url])
|
433
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
434
|
-
@easy_curl.on_success.call(@easy_curl)
|
435
|
-
end
|
436
|
-
|
437
|
-
it 'should set the etags on the feed' do
|
438
|
-
expect(@feed).to receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
439
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
440
|
-
@easy_curl.on_success.call(@easy_curl)
|
441
|
-
end
|
442
|
-
|
443
|
-
it 'should set the last modified on the feed' do
|
444
|
-
expect(@feed).to receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
|
445
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
446
|
-
@easy_curl.on_success.call(@easy_curl)
|
447
|
-
end
|
448
|
-
|
449
|
-
it 'should add the feed to the responses' do
|
450
|
-
responses = {}
|
451
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
452
|
-
@easy_curl.on_success.call(@easy_curl)
|
453
|
-
|
454
|
-
expect(responses.length).to eq 1
|
455
|
-
expect(responses['http://feeds.feedburner.com/PaulDixExplainsNothing']).to eq @feed
|
456
|
-
end
|
457
|
-
|
458
|
-
it 'should call proc if :on_success option is passed' do
|
459
|
-
success = lambda { |url, feed| }
|
460
|
-
expect(success).to receive(:call).with(@paul_feed[:url], @feed)
|
461
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_success => success })
|
462
|
-
@easy_curl.on_success.call(@easy_curl)
|
463
|
-
end
|
464
|
-
|
465
|
-
describe 'when the parser raises an exception' do
|
466
|
-
it 'invokes the on_failure callback with that exception' do
|
467
|
-
failure = double 'Failure callback', arity: 2
|
468
|
-
expect(failure).to receive(:call).with(@easy_curl, an_instance_of(Hell))
|
469
|
-
|
470
|
-
expect(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_raise Hell
|
471
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { on_failure: failure })
|
472
|
-
|
473
|
-
@easy_curl.on_success.call(@easy_curl)
|
474
|
-
end
|
475
|
-
end
|
476
|
-
|
477
|
-
describe 'when the parser invokes its on_failure callback' do
|
478
|
-
before(:each) do
|
479
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return FailParser
|
480
|
-
end
|
481
|
-
|
482
|
-
it 'invokes the on_failure callback' do
|
483
|
-
failure = double 'Failure callback', arity: 2
|
484
|
-
expect(failure).to receive(:call).with(@easy_curl, an_instance_of(RuntimeError))
|
485
|
-
|
486
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { on_failure: failure })
|
487
|
-
@easy_curl.on_success.call(@easy_curl)
|
488
|
-
end
|
489
|
-
end
|
490
|
-
end
|
491
|
-
|
492
|
-
describe 'when no compatible xml parser class is found' do
|
493
|
-
it 'invokes the on_failure callback' do
|
494
|
-
failure = double 'Failure callback', arity: 2
|
495
|
-
expect(failure).to receive(:call).with(@easy_curl, "Can't determine a parser")
|
496
|
-
|
497
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return nil
|
498
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { on_failure: failure })
|
499
|
-
|
500
|
-
@easy_curl.on_success.call(@easy_curl)
|
501
|
-
end
|
502
|
-
end
|
503
|
-
end
|
504
|
-
|
505
|
-
describe 'on failure' do
|
506
|
-
before(:each) do
|
507
|
-
@headers = "HTTP/1.0 500 Something Bad\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
508
|
-
@body = 'Sorry, something broke'
|
509
|
-
|
510
|
-
allow(@easy_curl).to receive(:response_code).and_return(500)
|
511
|
-
allow(@easy_curl).to receive(:header_str).and_return(@headers)
|
512
|
-
allow(@easy_curl).to receive(:body_str).and_return(@body)
|
513
|
-
end
|
514
|
-
|
515
|
-
it 'should call proc if :on_failure option is passed' do
|
516
|
-
failure = double 'Failure callback', arity: 2
|
517
|
-
expect(failure).to receive(:call).with(@easy_curl, nil)
|
518
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => failure })
|
519
|
-
@easy_curl.on_failure.call(@easy_curl)
|
520
|
-
end
|
521
|
-
|
522
|
-
it 'should return the http code in the responses' do
|
523
|
-
responses = {}
|
524
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
525
|
-
@easy_curl.on_failure.call(@easy_curl)
|
526
|
-
|
527
|
-
expect(responses.length).to eq 1
|
528
|
-
expect(responses[@paul_feed[:url]]).to eq 500
|
529
|
-
end
|
530
|
-
end
|
531
|
-
|
532
|
-
describe 'on complete for 404s' do
|
533
|
-
before(:each) do
|
534
|
-
@headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
535
|
-
@body = 'Page could not be found.'
|
536
|
-
|
537
|
-
allow(@easy_curl).to receive(:response_code).and_return(404)
|
538
|
-
allow(@easy_curl).to receive(:header_str).and_return(@headers)
|
539
|
-
allow(@easy_curl).to receive(:body_str).and_return(@body)
|
540
|
-
end
|
541
|
-
|
542
|
-
it 'should call proc if :on_failure option is passed' do
|
543
|
-
complete = double 'Failure callback', arity: 2
|
544
|
-
expect(complete).to receive(:call).with(@easy_curl, 'Server returned a 404')
|
545
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => complete })
|
546
|
-
@easy_curl.on_missing.call(@easy_curl)
|
547
|
-
end
|
548
|
-
|
549
|
-
it 'should return the http code in the responses' do
|
550
|
-
responses = {}
|
551
|
-
Feedjira::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
552
|
-
@easy_curl.on_complete.call(@easy_curl)
|
553
|
-
|
554
|
-
expect(responses.length).to eq 1
|
555
|
-
expect(responses[@paul_feed[:url]]).to eq 404
|
556
|
-
end
|
557
|
-
end
|
558
|
-
end
|
559
|
-
|
560
|
-
describe "#add_feed_to_multi" do
|
561
|
-
before(:each) do
|
562
|
-
allow_message_expectations_on_nil
|
563
|
-
@multi = Curl::Multi.get([@paul_feed[:url]], {:follow_location => true}, {:pipeline => true})
|
564
|
-
allow(@multi).to receive(:add)
|
565
|
-
@easy_curl = Curl::Easy.new(@paul_feed[:url])
|
566
|
-
@feed = Feedjira::Feed.parse(sample_feedburner_atom_feed)
|
567
|
-
|
568
|
-
allow(Curl::Easy).to receive(:new).and_yield(@easy_curl)
|
569
|
-
end
|
570
|
-
|
571
|
-
it "should set user agent if it's passed as an option" do
|
572
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, :user_agent => 'My cool application')
|
573
|
-
expect(@easy_curl.headers["User-Agent"]).to eq 'My cool application'
|
574
|
-
end
|
575
|
-
|
576
|
-
it "should set user agent to default if it's not passed as an option" do
|
577
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
578
|
-
expect(@easy_curl.headers["User-Agent"]).to eq Feedjira::Feed::USER_AGENT
|
579
|
-
end
|
580
|
-
|
581
|
-
it "should set if modified since as an option if passed" do
|
582
|
-
modified_time = Time.parse_safely("Wed, 28 Jan 2009 04:10:32 GMT")
|
583
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {:if_modified_since => modified_time})
|
584
|
-
expect(modified_time).to be > @feed.last_modified
|
585
|
-
|
586
|
-
expect(@easy_curl.headers["If-Modified-Since"]).to eq modified_time
|
587
|
-
end
|
588
|
-
|
589
|
-
it 'should set follow location to true' do
|
590
|
-
expect(@easy_curl).to receive(:follow_location=).with(true)
|
591
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
592
|
-
end
|
593
|
-
|
594
|
-
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
595
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, :http_authentication => ['myusername', 'mypassword'])
|
596
|
-
expect(@easy_curl.userpwd).to eq 'myusername:mypassword'
|
597
|
-
end
|
598
|
-
|
599
|
-
it "should set if_none_match as an option if passed" do
|
600
|
-
@feed.etag = 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
601
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
602
|
-
expect(@easy_curl.headers["If-None-Match"]).to eq 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
603
|
-
end
|
604
|
-
|
605
|
-
describe 'on success' do
|
606
|
-
before(:each) do
|
607
|
-
@new_feed = @feed.clone
|
608
|
-
allow(@feed).to receive(:update_from_feed)
|
609
|
-
allow(Feedjira::Feed).to receive(:decode_content).and_return(@paul_feed[:xml])
|
610
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return(Feedjira::Parser::AtomFeedBurner)
|
611
|
-
allow(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_return(@new_feed)
|
612
|
-
allow(Feedjira::Feed).to receive(:etag_from_header).and_return('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
613
|
-
allow(Feedjira::Feed).to receive(:last_modified_from_header).and_return('Wed, 28 Jan 2009 04:10:32 GMT')
|
614
|
-
end
|
615
|
-
|
616
|
-
it 'should parse the updated feed' do
|
617
|
-
expect(Feedjira::Parser::AtomFeedBurner).to receive(:parse).and_return(@new_feed)
|
618
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
619
|
-
@easy_curl.on_success.call(@easy_curl)
|
620
|
-
end
|
621
|
-
|
622
|
-
it 'should set the last effective url to the feed url' do
|
623
|
-
expect(@easy_curl).to receive(:last_effective_url).and_return(@paul_feed[:url])
|
624
|
-
expect(@new_feed).to receive(:feed_url=).with(@paul_feed[:url])
|
625
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
626
|
-
@easy_curl.on_success.call(@easy_curl)
|
627
|
-
end
|
628
|
-
|
629
|
-
it 'should set the etags on the feed' do
|
630
|
-
expect(@new_feed).to receive(:etag=).with('ziEyTl4q9GH04BR4jgkImd0GvSE')
|
631
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
632
|
-
@easy_curl.on_success.call(@easy_curl)
|
633
|
-
end
|
634
|
-
|
635
|
-
it 'should set the last modified on the feed' do
|
636
|
-
expect(@new_feed).to receive(:last_modified=).with('Wed, 28 Jan 2009 04:10:32 GMT')
|
637
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
638
|
-
@easy_curl.on_success.call(@easy_curl)
|
639
|
-
end
|
640
|
-
|
641
|
-
it 'should add the feed to the responses' do
|
642
|
-
responses = {}
|
643
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
|
644
|
-
@easy_curl.on_success.call(@easy_curl)
|
645
|
-
|
646
|
-
expect(responses.length).to eq 1
|
647
|
-
expect(responses['http://feeds.feedburner.com/PaulDixExplainsNothing']).to eq @feed
|
648
|
-
end
|
649
|
-
|
650
|
-
it 'should call proc if :on_success option is passed' do
|
651
|
-
success = lambda { |feed| }
|
652
|
-
expect(success).to receive(:call).with(@feed)
|
653
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
654
|
-
@easy_curl.on_success.call(@easy_curl)
|
655
|
-
end
|
656
|
-
|
657
|
-
it 'should call update from feed on the old feed with the updated feed' do
|
658
|
-
expect(@feed).to receive(:update_from_feed).with(@new_feed)
|
659
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
660
|
-
@easy_curl.on_success.call(@easy_curl)
|
661
|
-
end
|
662
|
-
|
663
|
-
describe 'when the parser invokes its on_failure callback' do
|
664
|
-
before(:each) do
|
665
|
-
allow(Feedjira::Feed).to receive(:determine_feed_parser_for_xml).and_return FailParser
|
666
|
-
end
|
667
|
-
|
668
|
-
it 'invokes the on_failure callback' do
|
669
|
-
failure = double 'Failure callback', arity: 2
|
670
|
-
expect(failure).to receive(:call)
|
671
|
-
|
672
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, { on_failure: failure })
|
673
|
-
@easy_curl.on_success.call(@easy_curl)
|
674
|
-
end
|
675
|
-
end
|
676
|
-
end
|
677
|
-
|
678
|
-
describe 'on failure' do
|
679
|
-
before(:each) do
|
680
|
-
@headers = "HTTP/1.0 404 Not Found\r\nDate: Thu, 29 Jan 2009 03:55:24 GMT\r\nServer: Apache\r\nX-FB-Host: chi-write6\r\nLast-Modified: Wed, 28 Jan 2009 04:10:32 GMT\r\n"
|
681
|
-
@body = 'Page could not be found.'
|
682
|
-
|
683
|
-
allow(@easy_curl).to receive(:response_code).and_return(404)
|
684
|
-
allow(@easy_curl).to receive(:header_str).and_return(@headers)
|
685
|
-
allow(@easy_curl).to receive(:body_str).and_return(@body)
|
686
|
-
end
|
687
|
-
|
688
|
-
it 'should call on success callback if the response code is 304' do
|
689
|
-
success = lambda { |feed| }
|
690
|
-
expect(success).to receive(:call).with(@feed)
|
691
|
-
expect(@easy_curl).to receive(:response_code).and_return(304)
|
692
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
693
|
-
@easy_curl.on_redirect.call(@easy_curl)
|
694
|
-
end
|
695
|
-
|
696
|
-
it 'should return the http code in the responses' do
|
697
|
-
responses = {}
|
698
|
-
Feedjira::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
|
699
|
-
@easy_curl.on_failure.call(@easy_curl)
|
700
|
-
|
701
|
-
expect(responses.length).to eq 1
|
702
|
-
expect(responses[@paul_feed[:url]]).to eq 404
|
703
|
-
end
|
704
|
-
end
|
705
|
-
end
|
706
|
-
|
707
|
-
describe "#fetch_and_parse" do
|
708
|
-
it "passes options to multicurl" do
|
709
|
-
options = { user_agent: '007' }
|
710
|
-
|
711
|
-
expect(Feedjira::Feed).to receive(:add_url_to_multi).
|
712
|
-
with(anything, anything, anything, anything, options)
|
713
|
-
|
714
|
-
Feedjira::Feed.fetch_and_parse(sample_rss_feed, options)
|
715
|
-
end
|
716
|
-
end
|
717
|
-
|
718
|
-
describe "#decode_content" do
|
719
|
-
before(:each) do
|
720
|
-
@curl_easy = double('curl_easy', :body_str => '<xml></xml>')
|
721
|
-
end
|
722
|
-
|
723
|
-
it 'should decode the response body using gzip if the Content-Encoding: is gzip' do
|
724
|
-
allow(@curl_easy).to receive(:header_str).and_return('Content-Encoding: gzip')
|
725
|
-
string_io = double('stringio', :read => @curl_easy.body_str, :close => true)
|
726
|
-
expect(StringIO).to receive(:new).and_return(string_io)
|
727
|
-
expect(Zlib::GzipReader).to receive(:new).with(string_io).and_return(string_io)
|
728
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
729
|
-
end
|
730
|
-
|
731
|
-
it 'should decode the response body using gzip if the Content-Encoding: is gzip even when the case is wrong' do
|
732
|
-
allow(@curl_easy).to receive(:header_str).and_return('content-encoding: gzip')
|
733
|
-
string_io = double('stringio', :read => @curl_easy.body_str, :close => true)
|
734
|
-
expect(StringIO).to receive(:new).and_return(string_io)
|
735
|
-
expect(Zlib::GzipReader).to receive(:new).with(string_io).and_return(string_io)
|
736
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
737
|
-
end
|
738
|
-
|
739
|
-
it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
|
740
|
-
allow(@curl_easy).to receive(:header_str).and_return('Content-Encoding: deflate')
|
741
|
-
expect(Zlib::Inflate).to receive(:inflate).with(@curl_easy.body_str)
|
742
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
743
|
-
end
|
744
|
-
|
745
|
-
it 'should deflate the response body using inflate if the Content-Encoding: is deflate event if the case is wrong' do
|
746
|
-
allow(@curl_easy).to receive(:header_str).and_return('content-encoding: deflate')
|
747
|
-
expect(Zlib::Inflate).to receive(:inflate).with(@curl_easy.body_str)
|
748
|
-
Feedjira::Feed.decode_content(@curl_easy)
|
749
|
-
end
|
750
|
-
|
751
|
-
it 'should return the response body if it is not encoded' do
|
752
|
-
allow(@curl_easy).to receive(:header_str).and_return('')
|
753
|
-
expect(Feedjira::Feed.decode_content(@curl_easy)).to eq '<xml></xml>'
|
754
|
-
end
|
755
|
-
end
|
756
|
-
|
757
|
-
describe "#update" do
|
758
|
-
it "passes options to multicurl" do
|
759
|
-
options = { user_agent: '007' }
|
760
|
-
|
761
|
-
expect(Feedjira::Feed).to receive(:add_feed_to_multi).
|
762
|
-
with(anything, anything, anything, anything, options)
|
763
|
-
|
764
|
-
Feedjira::Feed.update([nil], options)
|
765
|
-
end
|
766
|
-
end
|
767
|
-
end
|
768
198
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedjira
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Paul Dix
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2015-06-05 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: sax-machine
|
@@ -28,19 +28,33 @@ dependencies:
|
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '1.0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
|
-
name:
|
31
|
+
name: faraday
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
requirements:
|
34
34
|
- - "~>"
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version: '0.
|
36
|
+
version: '0.9'
|
37
37
|
type: :runtime
|
38
38
|
prerelease: false
|
39
39
|
version_requirements: !ruby/object:Gem::Requirement
|
40
40
|
requirements:
|
41
41
|
- - "~>"
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: '0.
|
43
|
+
version: '0.9'
|
44
|
+
- !ruby/object:Gem::Dependency
|
45
|
+
name: faraday_middleware
|
46
|
+
requirement: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - "~>"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0.9'
|
51
|
+
type: :runtime
|
52
|
+
prerelease: false
|
53
|
+
version_requirements: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - "~>"
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0.9'
|
44
58
|
- !ruby/object:Gem::Dependency
|
45
59
|
name: loofah
|
46
60
|
requirement: !ruby/object:Gem::Requirement
|
@@ -173,7 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
173
187
|
version: '0'
|
174
188
|
requirements: []
|
175
189
|
rubyforge_project:
|
176
|
-
rubygems_version: 2.
|
190
|
+
rubygems_version: 2.4.6
|
177
191
|
signing_key:
|
178
192
|
specification_version: 4
|
179
193
|
summary: A feed fetching and parsing library
|