simple-rss 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +321 -0
- data/lib/simple-rss.rb +490 -4
- data/simple-rss.gemspec +4 -4
- data/test/base/enumerable_test.rb +101 -0
- data/test/base/feed_merging_and_diffing_test.rb +140 -0
- data/test/base/fetch_integration_test.rb +25 -0
- data/test/base/fetch_test.rb +90 -0
- data/test/base/filtering_and_validation_test.rb +187 -0
- data/test/base/hash_xml_serialization_test.rb +142 -0
- data/test/base/json_serialization_test.rb +81 -0
- data/test/base/media_and_enclosure_helpers_test.rb +84 -0
- metadata +13 -5
- data/README.markdown +0 -47
data/lib/simple-rss.rb
CHANGED
|
@@ -3,15 +3,25 @@
|
|
|
3
3
|
require "cgi"
|
|
4
4
|
require "time"
|
|
5
5
|
|
|
6
|
-
class SimpleRSS
|
|
7
|
-
|
|
6
|
+
class SimpleRSS # rubocop:disable Metrics/ClassLength
|
|
7
|
+
# @rbs skip
|
|
8
|
+
include Enumerable
|
|
9
|
+
|
|
10
|
+
# @rbs!
|
|
11
|
+
# include Enumerable[Hash[Symbol, untyped]]
|
|
12
|
+
|
|
13
|
+
VERSION = "2.2.0".freeze
|
|
8
14
|
|
|
9
15
|
# @rbs @items: Array[Hash[Symbol, untyped]]
|
|
10
16
|
# @rbs @source: String
|
|
11
17
|
# @rbs @options: Hash[Symbol, untyped]
|
|
18
|
+
# @rbs @etag: String?
|
|
19
|
+
# @rbs @last_modified: String?
|
|
12
20
|
|
|
13
21
|
attr_reader :items #: Array[Hash[Symbol, untyped]]
|
|
14
22
|
attr_reader :source #: String
|
|
23
|
+
attr_reader :etag #: String?
|
|
24
|
+
attr_reader :last_modified #: String?
|
|
15
25
|
alias entries items #: Array[Hash[Symbol, untyped]]
|
|
16
26
|
|
|
17
27
|
@@feed_tags = %i[
|
|
@@ -42,6 +52,9 @@ class SimpleRSS
|
|
|
42
52
|
media:title media:thumbnail#url media:thumbnail#height media:thumbnail#width
|
|
43
53
|
media:credit media:credit#role
|
|
44
54
|
media:category media:category#scheme
|
|
55
|
+
media:description
|
|
56
|
+
enclosure#url enclosure#type enclosure#length
|
|
57
|
+
itunes:duration itunes:image#href
|
|
45
58
|
]
|
|
46
59
|
|
|
47
60
|
# @rbs (untyped, ?Hash[Symbol, untyped]) -> void
|
|
@@ -60,6 +73,159 @@ class SimpleRSS
|
|
|
60
73
|
end
|
|
61
74
|
alias feed channel
|
|
62
75
|
|
|
76
|
+
# Iterate over all items in the feed
|
|
77
|
+
#
|
|
78
|
+
# @rbs () { (Hash[Symbol, untyped]) -> void } -> self
|
|
79
|
+
# | () -> Enumerator[Hash[Symbol, untyped], self]
|
|
80
|
+
def each(&block)
|
|
81
|
+
return enum_for(:each) unless block
|
|
82
|
+
|
|
83
|
+
items.each(&block)
|
|
84
|
+
self
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Access an item by index
|
|
88
|
+
#
|
|
89
|
+
# @rbs (Integer) -> Hash[Symbol, untyped]?
|
|
90
|
+
def [](index)
|
|
91
|
+
items[index]
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Get the n most recent items, sorted by date
|
|
95
|
+
#
|
|
96
|
+
# @rbs (?Integer) -> Array[Hash[Symbol, untyped]]
|
|
97
|
+
def latest(count = 10)
|
|
98
|
+
items.sort_by { |item| item[:pubDate] || item[:updated] || Time.at(0) }.reverse.first(count)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# @rbs () -> Symbol
|
|
102
|
+
def feed_type
|
|
103
|
+
atom_namespaced_feed = source.match?(/<(atom:)?feed\b[^>]*xmlns(:\w+)?=['"][^'"]*atom/i)
|
|
104
|
+
return :atom if atom_namespaced_feed
|
|
105
|
+
return :rss2 if source.match?(/<rss[^>]*version=['"]2/i)
|
|
106
|
+
return :rss1 if source.match?(/<rdf:RDF/i)
|
|
107
|
+
return :rss09 if source.match?(/<rss[^>]*version=['"]0\.9/i)
|
|
108
|
+
|
|
109
|
+
:unknown
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# @rbs () -> bool
|
|
113
|
+
def valid?
|
|
114
|
+
return false if items.empty?
|
|
115
|
+
|
|
116
|
+
title_value = instance_variable_get(:@title)
|
|
117
|
+
link_value = instance_variable_get(:@link)
|
|
118
|
+
return true if title_value || link_value
|
|
119
|
+
|
|
120
|
+
false
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# @rbs (Time) -> Array[Hash[Symbol, untyped]]
|
|
124
|
+
def items_since(time)
|
|
125
|
+
items.select do |item|
|
|
126
|
+
item_date = item[:pubDate] || item[:updated] || item[:published]
|
|
127
|
+
item_date.is_a?(Time) && item_date > time
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# @rbs (String) -> Array[Hash[Symbol, untyped]]
|
|
132
|
+
def items_by_category(name)
|
|
133
|
+
query = name.to_s.downcase
|
|
134
|
+
|
|
135
|
+
items.select do |item|
|
|
136
|
+
category = item[:category]
|
|
137
|
+
next false if category.nil?
|
|
138
|
+
|
|
139
|
+
category_matches_query?(category, query)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @rbs (String) -> Array[Hash[Symbol, untyped]]
|
|
144
|
+
def search(query)
|
|
145
|
+
pattern = Regexp.new(Regexp.escape(query.to_s), Regexp::IGNORECASE)
|
|
146
|
+
|
|
147
|
+
items.select do |item|
|
|
148
|
+
searchable_fields(item).any? { |field| field.to_s.match?(pattern) }
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# @rbs (*SimpleRSS) -> Array[Hash[Symbol, untyped]]
|
|
153
|
+
def merge(*feeds)
|
|
154
|
+
all_items = [items, *feeds.map(&:items)].flatten
|
|
155
|
+
dedupe_items(sorted_items_by_date(all_items))
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# @rbs (SimpleRSS) -> Hash[Symbol, Array[Hash[Symbol, untyped]]]
|
|
159
|
+
def diff(other)
|
|
160
|
+
other_keys = keyed_item_set(other.items)
|
|
161
|
+
current_keys = keyed_item_set(items)
|
|
162
|
+
|
|
163
|
+
{
|
|
164
|
+
added: select_new_keyed_items(other.items, current_keys),
|
|
165
|
+
removed: select_new_keyed_items(items, other_keys)
|
|
166
|
+
}
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# @rbs () -> self
|
|
170
|
+
def dedupe
|
|
171
|
+
@items = dedupe_items(items)
|
|
172
|
+
self
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# @rbs () -> Array[Hash[Symbol, untyped]]
|
|
176
|
+
def enclosures
|
|
177
|
+
items.filter_map do |item|
|
|
178
|
+
enclosure_url = item[:enclosure_url]
|
|
179
|
+
next if blank_value?(enclosure_url)
|
|
180
|
+
|
|
181
|
+
{
|
|
182
|
+
url: enclosure_url,
|
|
183
|
+
type: item[:enclosure_type],
|
|
184
|
+
length: item[:enclosure_length],
|
|
185
|
+
item: item
|
|
186
|
+
}
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# @rbs () -> Array[String]
|
|
191
|
+
def images
|
|
192
|
+
items.flat_map { |item| item_image_urls(item) }.uniq
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# @rbs (?Hash[Symbol, untyped]) -> Hash[Symbol, untyped]
|
|
196
|
+
def as_json(_options = {})
|
|
197
|
+
hash = {} #: Hash[Symbol, untyped]
|
|
198
|
+
|
|
199
|
+
@@feed_tags.each do |tag|
|
|
200
|
+
tag_cleaned = clean_tag(tag)
|
|
201
|
+
value = instance_variable_get("@#{tag_cleaned}")
|
|
202
|
+
hash[tag_cleaned] = serialize_value(value) if value
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
hash[:items] = items.map do |item|
|
|
206
|
+
item.transform_values { |v| serialize_value(v) }
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
hash
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# @rbs (*untyped) -> String
|
|
213
|
+
def to_json(*)
|
|
214
|
+
require "json"
|
|
215
|
+
JSON.generate(as_json)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
alias to_hash as_json
|
|
219
|
+
|
|
220
|
+
# @rbs (?format: Symbol) -> String
|
|
221
|
+
def to_xml(format: :rss2)
|
|
222
|
+
case format
|
|
223
|
+
when :rss2 then to_rss2_xml
|
|
224
|
+
when :atom then to_atom_xml
|
|
225
|
+
else raise ArgumentError, "Unknown format: #{format}. Supported: :rss2, :atom"
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
63
229
|
class << self
|
|
64
230
|
# @rbs () -> Array[Symbol]
|
|
65
231
|
def feed_tags
|
|
@@ -87,6 +253,100 @@ class SimpleRSS
|
|
|
87
253
|
def parse(source, options = {})
|
|
88
254
|
new source, options
|
|
89
255
|
end
|
|
256
|
+
|
|
257
|
+
# @rbs (untyped, ?Hash[Symbol, untyped]) -> bool
|
|
258
|
+
def valid?(source, options = {})
|
|
259
|
+
parse(source, options)
|
|
260
|
+
true
|
|
261
|
+
rescue StandardError
|
|
262
|
+
false
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# @rbs (*SimpleRSS) -> Array[Hash[Symbol, untyped]]
|
|
266
|
+
def merge(*feeds)
|
|
267
|
+
first_feed = feeds.first
|
|
268
|
+
return [] if first_feed.nil?
|
|
269
|
+
|
|
270
|
+
first_feed.merge(*feeds.drop(1))
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Fetch and parse a feed from a URL
|
|
274
|
+
# Returns nil if conditional GET returns 304 Not Modified
|
|
275
|
+
#
|
|
276
|
+
# @rbs (String, ?Hash[Symbol, untyped]) -> SimpleRSS?
|
|
277
|
+
def fetch(url, options = {})
|
|
278
|
+
require "net/http"
|
|
279
|
+
require "uri"
|
|
280
|
+
|
|
281
|
+
uri = URI.parse(url)
|
|
282
|
+
response = perform_fetch(uri, options)
|
|
283
|
+
|
|
284
|
+
return nil if response.is_a?(Net::HTTPNotModified)
|
|
285
|
+
|
|
286
|
+
raise SimpleRSSError, "HTTP #{response.code}: #{response.message}" unless response.is_a?(Net::HTTPSuccess)
|
|
287
|
+
|
|
288
|
+
body = response.body.force_encoding(Encoding::UTF_8)
|
|
289
|
+
feed = parse(body, options)
|
|
290
|
+
feed.instance_variable_set(:@etag, response["ETag"])
|
|
291
|
+
feed.instance_variable_set(:@last_modified, response["Last-Modified"])
|
|
292
|
+
feed
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
private
|
|
296
|
+
|
|
297
|
+
# @rbs (untyped, Hash[Symbol, untyped]) -> untyped
|
|
298
|
+
def perform_fetch(uri, options)
|
|
299
|
+
http = build_http(uri, options)
|
|
300
|
+
request = build_request(uri, options)
|
|
301
|
+
|
|
302
|
+
response = http.request(request)
|
|
303
|
+
handle_redirect(response, options) || response
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# @rbs (untyped, Hash[Symbol, untyped]) -> untyped
|
|
307
|
+
def build_http(uri, options)
|
|
308
|
+
host = uri.host || raise(SimpleRSSError, "Invalid URL: missing host")
|
|
309
|
+
http = Net::HTTP.new(host, uri.port)
|
|
310
|
+
http.use_ssl = uri.scheme == "https"
|
|
311
|
+
|
|
312
|
+
timeout = options[:timeout]
|
|
313
|
+
if timeout
|
|
314
|
+
http.open_timeout = timeout
|
|
315
|
+
http.read_timeout = timeout
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
http
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# @rbs (untyped, Hash[Symbol, untyped]) -> untyped
|
|
322
|
+
def build_request(uri, options)
|
|
323
|
+
request = Net::HTTP::Get.new(uri)
|
|
324
|
+
request["User-Agent"] = "SimpleRSS/#{VERSION}"
|
|
325
|
+
|
|
326
|
+
# Conditional GET headers
|
|
327
|
+
request["If-None-Match"] = options[:etag] if options[:etag]
|
|
328
|
+
request["If-Modified-Since"] = options[:last_modified] if options[:last_modified]
|
|
329
|
+
|
|
330
|
+
# Custom headers
|
|
331
|
+
options[:headers]&.each { |key, value| request[key] = value }
|
|
332
|
+
|
|
333
|
+
request
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# @rbs (untyped, Hash[Symbol, untyped]) -> untyped
|
|
337
|
+
def handle_redirect(response, options)
|
|
338
|
+
return nil unless response.is_a?(Net::HTTPRedirection)
|
|
339
|
+
return nil if options[:follow_redirects] == false
|
|
340
|
+
|
|
341
|
+
location = response["Location"]
|
|
342
|
+
return nil unless location
|
|
343
|
+
|
|
344
|
+
redirects = (options[:_redirects] || 0) + 1
|
|
345
|
+
raise SimpleRSSError, "Too many redirects" if redirects > 5
|
|
346
|
+
|
|
347
|
+
new_options = options.merge(_redirects: redirects)
|
|
348
|
+
perform_fetch(URI.parse(location), new_options)
|
|
349
|
+
end
|
|
90
350
|
end
|
|
91
351
|
|
|
92
352
|
DATE_TAGS = %i[pubDate lastBuildDate published updated expirationDate modified dc:date].freeze
|
|
@@ -143,11 +403,33 @@ class SimpleRSS
|
|
|
143
403
|
|
|
144
404
|
parse_item_tag(item, tag, match[3], match[2])
|
|
145
405
|
end
|
|
146
|
-
item.define_singleton_method(:method_missing) { |name,
|
|
406
|
+
item.define_singleton_method(:method_missing) { |name, *_args| self[name] }
|
|
407
|
+
add_item_media_helpers(item)
|
|
147
408
|
@items << item
|
|
148
409
|
end
|
|
149
410
|
end
|
|
150
411
|
|
|
412
|
+
# @rbs (Hash[Symbol, untyped]) -> void
|
|
413
|
+
def add_item_media_helpers(item)
|
|
414
|
+
item.define_singleton_method(:has_media?) do
|
|
415
|
+
[
|
|
416
|
+
self[:media_content_url],
|
|
417
|
+
self[:media_thumbnail_url],
|
|
418
|
+
self[:enclosure_url],
|
|
419
|
+
self[:itunes_image_href]
|
|
420
|
+
].any? { |value| !value.nil? && !value.to_s.strip.empty? }
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
item.define_singleton_method(:media_url) do
|
|
424
|
+
[
|
|
425
|
+
self[:media_content_url],
|
|
426
|
+
self[:media_thumbnail_url],
|
|
427
|
+
self[:enclosure_url],
|
|
428
|
+
self[:itunes_image_href]
|
|
429
|
+
].find { |value| !value.nil? && !value.to_s.strip.empty? }
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
151
433
|
# @rbs (Hash[Symbol, untyped], Symbol, String?, String?) -> void
|
|
152
434
|
def parse_item_tag(item, tag, content, item_attrs = nil)
|
|
153
435
|
return if content.nil?
|
|
@@ -265,6 +547,210 @@ class SimpleRSS
|
|
|
265
547
|
tag.to_s.tr(":", "_").intern
|
|
266
548
|
end
|
|
267
549
|
|
|
550
|
+
# @rbs (untyped, String) -> bool
|
|
551
|
+
def category_matches_query?(category, query)
|
|
552
|
+
return category.any? { |value| value.to_s.downcase.include?(query) } if category.is_a?(Array)
|
|
553
|
+
|
|
554
|
+
category.to_s.downcase.include?(query)
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
# @rbs (Hash[Symbol, untyped]) -> Array[untyped]
|
|
558
|
+
def searchable_fields(item)
|
|
559
|
+
[item[:title], item[:description], item[:summary], item[:content]]
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
# @rbs (Array[Hash[Symbol, untyped]]) -> Set[String]
|
|
563
|
+
def keyed_item_set(item_list)
|
|
564
|
+
item_list.each_with_object(Set.new) do |item, keys|
|
|
565
|
+
key = item_key(item)
|
|
566
|
+
next if key.nil?
|
|
567
|
+
|
|
568
|
+
keys.add(key)
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# @rbs (Array[Hash[Symbol, untyped]], Set[String]) -> Array[Hash[Symbol, untyped]]
|
|
573
|
+
def select_new_keyed_items(item_list, known_keys)
|
|
574
|
+
item_list.select do |item|
|
|
575
|
+
key = item_key(item)
|
|
576
|
+
!key.nil? && !known_keys.include?(key)
|
|
577
|
+
end
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
# @rbs (Array[Hash[Symbol, untyped]]) -> Array[Hash[Symbol, untyped]]
|
|
581
|
+
def sorted_items_by_date(item_list)
|
|
582
|
+
keyed_items, unkeyed_items = item_list.partition { |item| !item_key(item).nil? }
|
|
583
|
+
keyed_items.sort_by { |item| item_date(item) || Time.at(0) }.reverse + unkeyed_items
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# @rbs (Array[Hash[Symbol, untyped]]) -> Array[Hash[Symbol, untyped]]
|
|
587
|
+
def dedupe_items(item_list)
|
|
588
|
+
seen_keys = Set.new
|
|
589
|
+
|
|
590
|
+
item_list.each_with_object([]) do |item, unique_items|
|
|
591
|
+
key = item_key(item)
|
|
592
|
+
|
|
593
|
+
if key.nil?
|
|
594
|
+
unique_items << item
|
|
595
|
+
next
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
next if seen_keys.include?(key)
|
|
599
|
+
|
|
600
|
+
seen_keys.add(key)
|
|
601
|
+
unique_items << item
|
|
602
|
+
end
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
# @rbs (Hash[Symbol, untyped]) -> String?
|
|
606
|
+
def item_key(item)
|
|
607
|
+
key = item[:guid] || item[:id] || item[:link]
|
|
608
|
+
return nil if key.nil?
|
|
609
|
+
|
|
610
|
+
key.to_s
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
# @rbs (Hash[Symbol, untyped]) -> Time?
|
|
614
|
+
def item_date(item)
|
|
615
|
+
date = item[:pubDate] || item[:updated] || item[:published]
|
|
616
|
+
return date if date.is_a?(Time)
|
|
617
|
+
|
|
618
|
+
nil
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
# @rbs (Hash[Symbol, untyped]) -> Array[String]
|
|
622
|
+
def item_image_urls(item)
|
|
623
|
+
[item[:media_thumbnail_url], item[:media_content_url], item[:itunes_image_href]]
|
|
624
|
+
.compact
|
|
625
|
+
.reject { |url| blank_value?(url) }
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
# @rbs (untyped) -> bool
|
|
629
|
+
def blank_value?(value)
|
|
630
|
+
value.to_s.strip.empty?
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
# @rbs (untyped) -> untyped
|
|
634
|
+
def serialize_value(value)
|
|
635
|
+
case value
|
|
636
|
+
when Time then value.iso8601
|
|
637
|
+
else value
|
|
638
|
+
end
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
# @rbs (String?) -> String
|
|
642
|
+
def escape_xml(text)
|
|
643
|
+
return "" if text.nil?
|
|
644
|
+
|
|
645
|
+
text.to_s
|
|
646
|
+
.gsub("&", "&")
|
|
647
|
+
.gsub("<", "<")
|
|
648
|
+
.gsub(">", ">")
|
|
649
|
+
.gsub("'", "'")
|
|
650
|
+
.gsub('"', """)
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
# @rbs (Array[String], String, untyped) -> void
|
|
654
|
+
def add_xml_element(elements, tag, value)
|
|
655
|
+
elements << "<#{tag}>#{escape_xml(value)}</#{tag}>" if value
|
|
656
|
+
end
|
|
657
|
+
|
|
658
|
+
# @rbs (Array[String], String, untyped, Symbol) -> void
|
|
659
|
+
def add_xml_time_element(elements, tag, value, format)
|
|
660
|
+
return unless value.is_a?(Time)
|
|
661
|
+
|
|
662
|
+
formatted = format == :rfc2822 ? value.rfc2822 : value.iso8601
|
|
663
|
+
elements << "<#{tag}>#{formatted}</#{tag}>"
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# @rbs () -> String
|
|
667
|
+
def to_rss2_xml
|
|
668
|
+
xml = ['<?xml version="1.0" encoding="UTF-8"?>', '<rss version="2.0">', "<channel>"]
|
|
669
|
+
xml.concat(rss2_channel_elements)
|
|
670
|
+
items.each { |item| xml.concat(rss2_item_elements(item)) }
|
|
671
|
+
xml << "</channel>"
|
|
672
|
+
xml << "</rss>"
|
|
673
|
+
xml.join("\n")
|
|
674
|
+
end
|
|
675
|
+
|
|
676
|
+
# @rbs () -> Array[String]
|
|
677
|
+
def rss2_channel_elements
|
|
678
|
+
elements = [] #: Array[String]
|
|
679
|
+
add_xml_element(elements, "title", instance_variable_get(:@title))
|
|
680
|
+
add_xml_element(elements, "link", instance_variable_get(:@link))
|
|
681
|
+
add_xml_element(elements, "description", instance_variable_get(:@description))
|
|
682
|
+
add_xml_element(elements, "language", instance_variable_get(:@language))
|
|
683
|
+
add_xml_time_element(elements, "pubDate", instance_variable_get(:@pubDate), :rfc2822)
|
|
684
|
+
add_xml_time_element(elements, "lastBuildDate", instance_variable_get(:@lastBuildDate), :rfc2822)
|
|
685
|
+
add_xml_element(elements, "generator", instance_variable_get(:@generator))
|
|
686
|
+
elements
|
|
687
|
+
end
|
|
688
|
+
|
|
689
|
+
# @rbs (Hash[Symbol, untyped]) -> Array[String]
|
|
690
|
+
def rss2_item_elements(item)
|
|
691
|
+
elements = ["<item>"] #: Array[String]
|
|
692
|
+
elements << "<title>#{escape_xml(item[:title])}</title>" if item[:title]
|
|
693
|
+
elements << "<link>#{escape_xml(item[:link])}</link>" if item[:link]
|
|
694
|
+
elements << "<description><![CDATA[#{item[:description]}]]></description>" if item[:description]
|
|
695
|
+
elements << "<pubDate>#{item[:pubDate].rfc2822}</pubDate>" if item[:pubDate].is_a?(Time)
|
|
696
|
+
elements << "<guid>#{escape_xml(item[:guid])}</guid>" if item[:guid]
|
|
697
|
+
elements << "<author>#{escape_xml(item[:author])}</author>" if item[:author]
|
|
698
|
+
elements << "<category>#{escape_xml(item[:category])}</category>" if item[:category]
|
|
699
|
+
elements << "</item>"
|
|
700
|
+
elements
|
|
701
|
+
end
|
|
702
|
+
|
|
703
|
+
# @rbs () -> String
|
|
704
|
+
def to_atom_xml
|
|
705
|
+
xml = ['<?xml version="1.0" encoding="UTF-8"?>', '<feed xmlns="http://www.w3.org/2005/Atom">']
|
|
706
|
+
xml.concat(atom_feed_elements)
|
|
707
|
+
items.each { |item| xml.concat(atom_entry_elements(item)) }
|
|
708
|
+
xml << "</feed>"
|
|
709
|
+
xml.join("\n")
|
|
710
|
+
end
|
|
711
|
+
|
|
712
|
+
# @rbs () -> Array[String]
|
|
713
|
+
def atom_feed_elements
|
|
714
|
+
elements = [] #: Array[String]
|
|
715
|
+
title_val = instance_variable_get(:@title)
|
|
716
|
+
link_val = instance_variable_get(:@link)
|
|
717
|
+
id_val = instance_variable_get(:@id)
|
|
718
|
+
add_xml_element(elements, "title", title_val)
|
|
719
|
+
elements << "<link href=\"#{escape_xml(link_val)}\" rel=\"alternate\"/>" if link_val
|
|
720
|
+
elements << "<id>#{escape_xml(id_val || link_val)}</id>" if link_val
|
|
721
|
+
add_xml_time_element(elements, "updated", instance_variable_get(:@updated), :iso8601)
|
|
722
|
+
add_xml_element(elements, "subtitle", instance_variable_get(:@subtitle))
|
|
723
|
+
author_val = instance_variable_get(:@author)
|
|
724
|
+
elements << "<author><name>#{escape_xml(author_val)}</name></author>" if author_val
|
|
725
|
+
add_xml_element(elements, "generator", instance_variable_get(:@generator))
|
|
726
|
+
elements
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
# @rbs (Hash[Symbol, untyped]) -> Array[String]
|
|
730
|
+
def atom_entry_elements(item)
|
|
731
|
+
elements = ["<entry>"] #: Array[String]
|
|
732
|
+
elements << "<title>#{escape_xml(item[:title])}</title>" if item[:title]
|
|
733
|
+
elements << "<link href=\"#{escape_xml(item[:link])}\" rel=\"alternate\"/>" if item[:link]
|
|
734
|
+
elements << "<id>#{escape_xml(item[:id] || item[:guid] || item[:link])}</id>" if item[:id] || item[:guid] || item[:link]
|
|
735
|
+
elements << "<updated>#{item[:updated].iso8601}</updated>" if item[:updated].is_a?(Time)
|
|
736
|
+
atom_entry_published(elements, item)
|
|
737
|
+
elements << "<summary><![CDATA[#{item[:summary] || item[:description]}]]></summary>" if item[:summary] || item[:description]
|
|
738
|
+
elements << "<content><![CDATA[#{item[:content]}]]></content>" if item[:content]
|
|
739
|
+
elements << "<author><name>#{escape_xml(item[:author])}</name></author>" if item[:author]
|
|
740
|
+
elements << "<category term=\"#{escape_xml(item[:category])}\"/>" if item[:category]
|
|
741
|
+
elements << "</entry>"
|
|
742
|
+
elements
|
|
743
|
+
end
|
|
744
|
+
|
|
745
|
+
# @rbs (Array[String], Hash[Symbol, untyped]) -> void
|
|
746
|
+
def atom_entry_published(elements, item)
|
|
747
|
+
if item[:published].is_a?(Time)
|
|
748
|
+
elements << "<published>#{item[:published].iso8601}</published>"
|
|
749
|
+
elsif item[:pubDate].is_a?(Time)
|
|
750
|
+
elements << "<published>#{item[:pubDate].iso8601}</published>"
|
|
751
|
+
end
|
|
752
|
+
end
|
|
753
|
+
|
|
268
754
|
# @rbs (String) -> String
|
|
269
755
|
def unescape(content)
|
|
270
756
|
result = if content =~ %r{([^-_.!~*'()a-zA-Z\d;/?:@&=+$,\[\]]%)}
|
|
@@ -277,5 +763,5 @@ class SimpleRSS
|
|
|
277
763
|
end
|
|
278
764
|
end
|
|
279
765
|
|
|
280
|
-
class SimpleRSSError < StandardError
|
|
766
|
+
class SimpleRSSError < StandardError # rubocop:disable Style/OneClassPerFile
|
|
281
767
|
end
|
data/simple-rss.gemspec
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = "simple-rss"
|
|
3
|
-
s.version = "2.
|
|
3
|
+
s.version = "2.2.0"
|
|
4
4
|
s.date = "2025-12-28"
|
|
5
5
|
s.summary = "A simple, flexible, extensible, and liberal RSS and Atom reader for Ruby. It is designed to be backwards compatible with the standard RSS parser, but will never do RSS generation."
|
|
6
6
|
s.email = "lucas@rufy.com"
|
|
7
|
-
s.homepage = "
|
|
7
|
+
s.homepage = "https://github.com/cardmagic/simple-rss"
|
|
8
8
|
s.description = "A simple, flexible, extensible, and liberal RSS and Atom reader for Ruby. It is designed to be backwards compatible with the standard RSS parser, but will never do RSS generation."
|
|
9
9
|
s.authors = ["Lucas Carlson"]
|
|
10
|
-
s.files = Dir["lib/**/*", "test/**/*", "LICENSE", "README.
|
|
11
|
-
s.
|
|
10
|
+
s.files = Dir["lib/**/*", "test/**/*", "LICENSE", "README.md", "Rakefile", "simple-rss.gemspec"]
|
|
11
|
+
s.required_ruby_version = ">= 3.1"
|
|
12
12
|
s.add_development_dependency "rake"
|
|
13
13
|
s.add_development_dependency "rdoc"
|
|
14
14
|
s.add_development_dependency "test-unit"
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class EnumerableTest < Test::Unit::TestCase
|
|
4
|
+
def setup
|
|
5
|
+
@rss20 = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/rss20.xml")
|
|
6
|
+
@atom = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/atom.xml")
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def test_includes_enumerable
|
|
10
|
+
assert_includes SimpleRSS.included_modules, Enumerable
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_each_iterates_over_items
|
|
14
|
+
titles = @rss20.map { |item| item[:title] }
|
|
15
|
+
assert_equal @rss20.items.map { |i| i[:title] }, titles
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_each_returns_enumerator_without_block
|
|
19
|
+
enumerator = @rss20.each
|
|
20
|
+
assert_kind_of Enumerator, enumerator
|
|
21
|
+
assert_equal @rss20.items.size, enumerator.count
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_each_returns_self_with_block
|
|
25
|
+
count = 0
|
|
26
|
+
result = @rss20.each { |_item| count += 1 }
|
|
27
|
+
assert_equal @rss20, result
|
|
28
|
+
assert_equal @rss20.items.size, count
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_enumerable_map
|
|
32
|
+
titles = @rss20.map { |item| item[:title] }
|
|
33
|
+
assert_equal @rss20.items.map { |i| i[:title] }, titles
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_enumerable_select
|
|
37
|
+
items_with_link = @rss20.select { |item| item[:link] }
|
|
38
|
+
assert_equal @rss20.items.select { |i| i[:link] }, items_with_link
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def test_enumerable_first
|
|
42
|
+
assert_equal @rss20.items.first, @rss20.first
|
|
43
|
+
assert_equal @rss20.items.first(3), @rss20.first(3)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def test_enumerable_count
|
|
47
|
+
assert_equal @rss20.items.size, @rss20.count
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_index_accessor
|
|
51
|
+
assert_equal @rss20.items[0], @rss20[0]
|
|
52
|
+
assert_equal @rss20.items[5], @rss20[5]
|
|
53
|
+
assert_equal @rss20.items[-1], @rss20[-1]
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def test_index_accessor_out_of_bounds
|
|
57
|
+
assert_nil @rss20[100]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def test_latest_returns_sorted_items
|
|
61
|
+
latest = @rss20.latest(3)
|
|
62
|
+
assert_equal 3, latest.size
|
|
63
|
+
|
|
64
|
+
dates = latest.map { |item| item[:pubDate] }
|
|
65
|
+
assert_equal dates, dates.sort.reverse
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def test_latest_default_count
|
|
69
|
+
latest = @rss20.latest
|
|
70
|
+
assert latest.size <= 10
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def test_latest_with_atom_uses_updated
|
|
74
|
+
latest = @atom.latest(1)
|
|
75
|
+
assert_equal 1, latest.size
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def test_latest_handles_missing_dates
|
|
79
|
+
rss_with_missing_dates = SimpleRSS.parse <<~RSS
|
|
80
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
81
|
+
<rss version="2.0">
|
|
82
|
+
<channel>
|
|
83
|
+
<title>Test Feed</title>
|
|
84
|
+
<link>http://example.com</link>
|
|
85
|
+
<item>
|
|
86
|
+
<title>No Date</title>
|
|
87
|
+
</item>
|
|
88
|
+
<item>
|
|
89
|
+
<title>Has Date</title>
|
|
90
|
+
<pubDate>Wed, 24 Aug 2005 13:33:34 GMT</pubDate>
|
|
91
|
+
</item>
|
|
92
|
+
</channel>
|
|
93
|
+
</rss>
|
|
94
|
+
RSS
|
|
95
|
+
|
|
96
|
+
latest = rss_with_missing_dates.latest(2)
|
|
97
|
+
assert_equal 2, latest.size
|
|
98
|
+
assert_equal "Has Date", latest.first[:title]
|
|
99
|
+
assert_equal "No Date", latest.last[:title]
|
|
100
|
+
end
|
|
101
|
+
end
|