simple-rss 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/simple-rss.rb +222 -4
- data/simple-rss.gemspec +2 -2
- data/test/base/feed_merging_and_diffing_test.rb +140 -0
- data/test/base/fetch_integration_test.rb +25 -0
- data/test/base/fetch_test.rb +0 -27
- data/test/base/filtering_and_validation_test.rb +187 -0
- data/test/base/media_and_enclosure_helpers_test.rb +84 -0
- metadata +7 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0b914acfc63bfc4e787b6a0373e6c4fbbcec1b953a6517a2709a70b96e5993a6
|
|
4
|
+
data.tar.gz: 26e9dddcca6e05b34e8ef6da52a3654fadb6932ac9558f9ee8e70117e99f1e9f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0bbb1967e261cec7c2fdb1bb00fd511562a4647043c40f5b7b6e56aedc5d8d8f003727988ffd2fb77fb7291b397d41008e40eb3a14d156889168423d90f934c5
|
|
7
|
+
data.tar.gz: c1428ee431c4bfd718a573d2d89e5121e9cef46cbd1ce5ade80b21b9bc98b939f35dcdd133287fb70de83c3a00b7a5099d97a2f12699c6224a194c874a42e215
|
data/lib/simple-rss.rb
CHANGED
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
require "cgi"
|
|
4
4
|
require "time"
|
|
5
5
|
|
|
6
|
-
class SimpleRSS
|
|
6
|
+
class SimpleRSS # rubocop:disable Metrics/ClassLength
|
|
7
7
|
# @rbs skip
|
|
8
8
|
include Enumerable
|
|
9
9
|
|
|
10
10
|
# @rbs!
|
|
11
11
|
# include Enumerable[Hash[Symbol, untyped]]
|
|
12
12
|
|
|
13
|
-
VERSION = "2.
|
|
13
|
+
VERSION = "2.2.0".freeze
|
|
14
14
|
|
|
15
15
|
# @rbs @items: Array[Hash[Symbol, untyped]]
|
|
16
16
|
# @rbs @source: String
|
|
@@ -52,6 +52,9 @@ class SimpleRSS
|
|
|
52
52
|
media:title media:thumbnail#url media:thumbnail#height media:thumbnail#width
|
|
53
53
|
media:credit media:credit#role
|
|
54
54
|
media:category media:category#scheme
|
|
55
|
+
media:description
|
|
56
|
+
enclosure#url enclosure#type enclosure#length
|
|
57
|
+
itunes:duration itunes:image#href
|
|
55
58
|
]
|
|
56
59
|
|
|
57
60
|
# @rbs (untyped, ?Hash[Symbol, untyped]) -> void
|
|
@@ -95,6 +98,100 @@ class SimpleRSS
|
|
|
95
98
|
items.sort_by { |item| item[:pubDate] || item[:updated] || Time.at(0) }.reverse.first(count)
|
|
96
99
|
end
|
|
97
100
|
|
|
101
|
+
# @rbs () -> Symbol
|
|
102
|
+
def feed_type
|
|
103
|
+
atom_namespaced_feed = source.match?(/<(atom:)?feed\b[^>]*xmlns(:\w+)?=['"][^'"]*atom/i)
|
|
104
|
+
return :atom if atom_namespaced_feed
|
|
105
|
+
return :rss2 if source.match?(/<rss[^>]*version=['"]2/i)
|
|
106
|
+
return :rss1 if source.match?(/<rdf:RDF/i)
|
|
107
|
+
return :rss09 if source.match?(/<rss[^>]*version=['"]0\.9/i)
|
|
108
|
+
|
|
109
|
+
:unknown
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# @rbs () -> bool
|
|
113
|
+
def valid?
|
|
114
|
+
return false if items.empty?
|
|
115
|
+
|
|
116
|
+
title_value = instance_variable_get(:@title)
|
|
117
|
+
link_value = instance_variable_get(:@link)
|
|
118
|
+
return true if title_value || link_value
|
|
119
|
+
|
|
120
|
+
false
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# @rbs (Time) -> Array[Hash[Symbol, untyped]]
|
|
124
|
+
def items_since(time)
|
|
125
|
+
items.select do |item|
|
|
126
|
+
item_date = item[:pubDate] || item[:updated] || item[:published]
|
|
127
|
+
item_date.is_a?(Time) && item_date > time
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# @rbs (String) -> Array[Hash[Symbol, untyped]]
|
|
132
|
+
def items_by_category(name)
|
|
133
|
+
query = name.to_s.downcase
|
|
134
|
+
|
|
135
|
+
items.select do |item|
|
|
136
|
+
category = item[:category]
|
|
137
|
+
next false if category.nil?
|
|
138
|
+
|
|
139
|
+
category_matches_query?(category, query)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @rbs (String) -> Array[Hash[Symbol, untyped]]
|
|
144
|
+
def search(query)
|
|
145
|
+
pattern = Regexp.new(Regexp.escape(query.to_s), Regexp::IGNORECASE)
|
|
146
|
+
|
|
147
|
+
items.select do |item|
|
|
148
|
+
searchable_fields(item).any? { |field| field.to_s.match?(pattern) }
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# @rbs (*SimpleRSS) -> Array[Hash[Symbol, untyped]]
|
|
153
|
+
def merge(*feeds)
|
|
154
|
+
all_items = [items, *feeds.map(&:items)].flatten
|
|
155
|
+
dedupe_items(sorted_items_by_date(all_items))
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# @rbs (SimpleRSS) -> Hash[Symbol, Array[Hash[Symbol, untyped]]]
|
|
159
|
+
def diff(other)
|
|
160
|
+
other_keys = keyed_item_set(other.items)
|
|
161
|
+
current_keys = keyed_item_set(items)
|
|
162
|
+
|
|
163
|
+
{
|
|
164
|
+
added: select_new_keyed_items(other.items, current_keys),
|
|
165
|
+
removed: select_new_keyed_items(items, other_keys)
|
|
166
|
+
}
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# @rbs () -> self
|
|
170
|
+
def dedupe
|
|
171
|
+
@items = dedupe_items(items)
|
|
172
|
+
self
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# @rbs () -> Array[Hash[Symbol, untyped]]
|
|
176
|
+
def enclosures
|
|
177
|
+
items.filter_map do |item|
|
|
178
|
+
enclosure_url = item[:enclosure_url]
|
|
179
|
+
next if blank_value?(enclosure_url)
|
|
180
|
+
|
|
181
|
+
{
|
|
182
|
+
url: enclosure_url,
|
|
183
|
+
type: item[:enclosure_type],
|
|
184
|
+
length: item[:enclosure_length],
|
|
185
|
+
item: item
|
|
186
|
+
}
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# @rbs () -> Array[String]
|
|
191
|
+
def images
|
|
192
|
+
items.flat_map { |item| item_image_urls(item) }.uniq
|
|
193
|
+
end
|
|
194
|
+
|
|
98
195
|
# @rbs (?Hash[Symbol, untyped]) -> Hash[Symbol, untyped]
|
|
99
196
|
def as_json(_options = {})
|
|
100
197
|
hash = {} #: Hash[Symbol, untyped]
|
|
@@ -157,6 +254,22 @@ class SimpleRSS
|
|
|
157
254
|
new source, options
|
|
158
255
|
end
|
|
159
256
|
|
|
257
|
+
# @rbs (untyped, ?Hash[Symbol, untyped]) -> bool
|
|
258
|
+
def valid?(source, options = {})
|
|
259
|
+
parse(source, options)
|
|
260
|
+
true
|
|
261
|
+
rescue StandardError
|
|
262
|
+
false
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# @rbs (*SimpleRSS) -> Array[Hash[Symbol, untyped]]
|
|
266
|
+
def merge(*feeds)
|
|
267
|
+
first_feed = feeds.first
|
|
268
|
+
return [] if first_feed.nil?
|
|
269
|
+
|
|
270
|
+
first_feed.merge(*feeds.drop(1))
|
|
271
|
+
end
|
|
272
|
+
|
|
160
273
|
# Fetch and parse a feed from a URL
|
|
161
274
|
# Returns nil if conditional GET returns 304 Not Modified
|
|
162
275
|
#
|
|
@@ -290,11 +403,33 @@ class SimpleRSS
|
|
|
290
403
|
|
|
291
404
|
parse_item_tag(item, tag, match[3], match[2])
|
|
292
405
|
end
|
|
293
|
-
item.define_singleton_method(:method_missing) { |name,
|
|
406
|
+
item.define_singleton_method(:method_missing) { |name, *_args| self[name] }
|
|
407
|
+
add_item_media_helpers(item)
|
|
294
408
|
@items << item
|
|
295
409
|
end
|
|
296
410
|
end
|
|
297
411
|
|
|
412
|
+
# @rbs (Hash[Symbol, untyped]) -> void
|
|
413
|
+
def add_item_media_helpers(item)
|
|
414
|
+
item.define_singleton_method(:has_media?) do
|
|
415
|
+
[
|
|
416
|
+
self[:media_content_url],
|
|
417
|
+
self[:media_thumbnail_url],
|
|
418
|
+
self[:enclosure_url],
|
|
419
|
+
self[:itunes_image_href]
|
|
420
|
+
].any? { |value| !value.nil? && !value.to_s.strip.empty? }
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
item.define_singleton_method(:media_url) do
|
|
424
|
+
[
|
|
425
|
+
self[:media_content_url],
|
|
426
|
+
self[:media_thumbnail_url],
|
|
427
|
+
self[:enclosure_url],
|
|
428
|
+
self[:itunes_image_href]
|
|
429
|
+
].find { |value| !value.nil? && !value.to_s.strip.empty? }
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
298
433
|
# @rbs (Hash[Symbol, untyped], Symbol, String?, String?) -> void
|
|
299
434
|
def parse_item_tag(item, tag, content, item_attrs = nil)
|
|
300
435
|
return if content.nil?
|
|
@@ -412,6 +547,89 @@ class SimpleRSS
|
|
|
412
547
|
tag.to_s.tr(":", "_").intern
|
|
413
548
|
end
|
|
414
549
|
|
|
550
|
+
# @rbs (untyped, String) -> bool
|
|
551
|
+
def category_matches_query?(category, query)
|
|
552
|
+
return category.any? { |value| value.to_s.downcase.include?(query) } if category.is_a?(Array)
|
|
553
|
+
|
|
554
|
+
category.to_s.downcase.include?(query)
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
# @rbs (Hash[Symbol, untyped]) -> Array[untyped]
|
|
558
|
+
def searchable_fields(item)
|
|
559
|
+
[item[:title], item[:description], item[:summary], item[:content]]
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
# @rbs (Array[Hash[Symbol, untyped]]) -> Set[String]
|
|
563
|
+
def keyed_item_set(item_list)
|
|
564
|
+
item_list.each_with_object(Set.new) do |item, keys|
|
|
565
|
+
key = item_key(item)
|
|
566
|
+
next if key.nil?
|
|
567
|
+
|
|
568
|
+
keys.add(key)
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# @rbs (Array[Hash[Symbol, untyped]], Set[String]) -> Array[Hash[Symbol, untyped]]
|
|
573
|
+
def select_new_keyed_items(item_list, known_keys)
|
|
574
|
+
item_list.select do |item|
|
|
575
|
+
key = item_key(item)
|
|
576
|
+
!key.nil? && !known_keys.include?(key)
|
|
577
|
+
end
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
# @rbs (Array[Hash[Symbol, untyped]]) -> Array[Hash[Symbol, untyped]]
|
|
581
|
+
def sorted_items_by_date(item_list)
|
|
582
|
+
keyed_items, unkeyed_items = item_list.partition { |item| !item_key(item).nil? }
|
|
583
|
+
keyed_items.sort_by { |item| item_date(item) || Time.at(0) }.reverse + unkeyed_items
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# @rbs (Array[Hash[Symbol, untyped]]) -> Array[Hash[Symbol, untyped]]
|
|
587
|
+
def dedupe_items(item_list)
|
|
588
|
+
seen_keys = Set.new
|
|
589
|
+
|
|
590
|
+
item_list.each_with_object([]) do |item, unique_items|
|
|
591
|
+
key = item_key(item)
|
|
592
|
+
|
|
593
|
+
if key.nil?
|
|
594
|
+
unique_items << item
|
|
595
|
+
next
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
next if seen_keys.include?(key)
|
|
599
|
+
|
|
600
|
+
seen_keys.add(key)
|
|
601
|
+
unique_items << item
|
|
602
|
+
end
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
# @rbs (Hash[Symbol, untyped]) -> String?
|
|
606
|
+
def item_key(item)
|
|
607
|
+
key = item[:guid] || item[:id] || item[:link]
|
|
608
|
+
return nil if key.nil?
|
|
609
|
+
|
|
610
|
+
key.to_s
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
# @rbs (Hash[Symbol, untyped]) -> Time?
|
|
614
|
+
def item_date(item)
|
|
615
|
+
date = item[:pubDate] || item[:updated] || item[:published]
|
|
616
|
+
return date if date.is_a?(Time)
|
|
617
|
+
|
|
618
|
+
nil
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
# @rbs (Hash[Symbol, untyped]) -> Array[String]
|
|
622
|
+
def item_image_urls(item)
|
|
623
|
+
[item[:media_thumbnail_url], item[:media_content_url], item[:itunes_image_href]]
|
|
624
|
+
.compact
|
|
625
|
+
.reject { |url| blank_value?(url) }
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
# @rbs (untyped) -> bool
|
|
629
|
+
def blank_value?(value)
|
|
630
|
+
value.to_s.strip.empty?
|
|
631
|
+
end
|
|
632
|
+
|
|
415
633
|
# @rbs (untyped) -> untyped
|
|
416
634
|
def serialize_value(value)
|
|
417
635
|
case value
|
|
@@ -545,5 +763,5 @@ class SimpleRSS
|
|
|
545
763
|
end
|
|
546
764
|
end
|
|
547
765
|
|
|
548
|
-
class SimpleRSSError < StandardError
|
|
766
|
+
class SimpleRSSError < StandardError # rubocop:disable Style/OneClassPerFile
|
|
549
767
|
end
|
data/simple-rss.gemspec
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = "simple-rss"
|
|
3
|
-
s.version = "2.
|
|
4
|
-
s.date = "2025-12-
|
|
3
|
+
s.version = "2.2.0"
|
|
4
|
+
s.date = "2025-12-28"
|
|
5
5
|
s.summary = "A simple, flexible, extensible, and liberal RSS and Atom reader for Ruby. It is designed to be backwards compatible with the standard RSS parser, but will never do RSS generation."
|
|
6
6
|
s.email = "lucas@rufy.com"
|
|
7
7
|
s.homepage = "https://github.com/cardmagic/simple-rss"
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class FeedMergingAndDiffingTest < Test::Unit::TestCase
|
|
4
|
+
def setup
|
|
5
|
+
@feed_one = SimpleRSS.parse <<~XML
|
|
6
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
7
|
+
<rss version="2.0">
|
|
8
|
+
<channel>
|
|
9
|
+
<title>Feed One</title>
|
|
10
|
+
<item>
|
|
11
|
+
<guid>shared-guid</guid>
|
|
12
|
+
<title>Shared (older)</title>
|
|
13
|
+
<pubDate>Mon, 01 Jan 2024 10:00:00 UTC</pubDate>
|
|
14
|
+
</item>
|
|
15
|
+
<item>
|
|
16
|
+
<guid>one-guid</guid>
|
|
17
|
+
<title>Only One</title>
|
|
18
|
+
<pubDate>Mon, 01 Jan 2024 11:00:00 UTC</pubDate>
|
|
19
|
+
</item>
|
|
20
|
+
<item>
|
|
21
|
+
<title>Unidentified One</title>
|
|
22
|
+
</item>
|
|
23
|
+
</channel>
|
|
24
|
+
</rss>
|
|
25
|
+
XML
|
|
26
|
+
|
|
27
|
+
@feed_two = SimpleRSS.parse <<~XML
|
|
28
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
29
|
+
<rss version="2.0">
|
|
30
|
+
<channel>
|
|
31
|
+
<title>Feed Two</title>
|
|
32
|
+
<item>
|
|
33
|
+
<guid>shared-guid</guid>
|
|
34
|
+
<title>Shared (newer)</title>
|
|
35
|
+
<pubDate>Mon, 01 Jan 2024 12:00:00 UTC</pubDate>
|
|
36
|
+
</item>
|
|
37
|
+
<item>
|
|
38
|
+
<link>https://example.com/two-only</link>
|
|
39
|
+
<title>Only Two</title>
|
|
40
|
+
<pubDate>Mon, 01 Jan 2024 13:00:00 UTC</pubDate>
|
|
41
|
+
</item>
|
|
42
|
+
<item>
|
|
43
|
+
<title>Unidentified Two</title>
|
|
44
|
+
</item>
|
|
45
|
+
</channel>
|
|
46
|
+
</rss>
|
|
47
|
+
XML
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_merge_dedupes_and_sorts_newest_first
|
|
51
|
+
merged = @feed_one.merge(@feed_two)
|
|
52
|
+
titles = merged.map { |item| item[:title] }
|
|
53
|
+
|
|
54
|
+
assert_equal [
|
|
55
|
+
"Only Two",
|
|
56
|
+
"Shared (newer)",
|
|
57
|
+
"Only One",
|
|
58
|
+
"Unidentified One",
|
|
59
|
+
"Unidentified Two"
|
|
60
|
+
], titles
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def test_class_merge_combines_multiple_feeds
|
|
64
|
+
merged = SimpleRSS.merge(@feed_one, @feed_two)
|
|
65
|
+
|
|
66
|
+
assert_equal 5, merged.size
|
|
67
|
+
assert_equal "Only Two", merged.first[:title]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def test_diff_reports_added_and_removed_items
|
|
71
|
+
old_feed = SimpleRSS.parse <<~XML
|
|
72
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
73
|
+
<rss version="2.0">
|
|
74
|
+
<channel>
|
|
75
|
+
<title>Old Feed</title>
|
|
76
|
+
<item>
|
|
77
|
+
<guid>stay</guid>
|
|
78
|
+
<title>Stay</title>
|
|
79
|
+
</item>
|
|
80
|
+
<item>
|
|
81
|
+
<guid>remove</guid>
|
|
82
|
+
<title>Remove</title>
|
|
83
|
+
</item>
|
|
84
|
+
</channel>
|
|
85
|
+
</rss>
|
|
86
|
+
XML
|
|
87
|
+
|
|
88
|
+
new_feed = SimpleRSS.parse <<~XML
|
|
89
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
90
|
+
<rss version="2.0">
|
|
91
|
+
<channel>
|
|
92
|
+
<title>New Feed</title>
|
|
93
|
+
<item>
|
|
94
|
+
<guid>stay</guid>
|
|
95
|
+
<title>Stay</title>
|
|
96
|
+
</item>
|
|
97
|
+
<item>
|
|
98
|
+
<guid>add</guid>
|
|
99
|
+
<title>Add</title>
|
|
100
|
+
</item>
|
|
101
|
+
</channel>
|
|
102
|
+
</rss>
|
|
103
|
+
XML
|
|
104
|
+
|
|
105
|
+
diff = old_feed.diff(new_feed)
|
|
106
|
+
|
|
107
|
+
assert_equal(["Add"], diff[:added].map { |item| item[:title] })
|
|
108
|
+
assert_equal(["Remove"], diff[:removed].map { |item| item[:title] })
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def test_dedupe_mutates_items_and_keeps_unidentified_entries
|
|
112
|
+
feed = SimpleRSS.parse <<~XML
|
|
113
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
114
|
+
<rss version="2.0">
|
|
115
|
+
<channel>
|
|
116
|
+
<title>Dedupe Feed</title>
|
|
117
|
+
<item>
|
|
118
|
+
<guid>duplicate</guid>
|
|
119
|
+
<title>First duplicate</title>
|
|
120
|
+
</item>
|
|
121
|
+
<item>
|
|
122
|
+
<guid>duplicate</guid>
|
|
123
|
+
<title>Second duplicate</title>
|
|
124
|
+
</item>
|
|
125
|
+
<item>
|
|
126
|
+
<title>Unidentified One</title>
|
|
127
|
+
</item>
|
|
128
|
+
<item>
|
|
129
|
+
<title>Unidentified Two</title>
|
|
130
|
+
</item>
|
|
131
|
+
</channel>
|
|
132
|
+
</rss>
|
|
133
|
+
XML
|
|
134
|
+
|
|
135
|
+
result = feed.dedupe
|
|
136
|
+
|
|
137
|
+
assert_same feed, result
|
|
138
|
+
assert_equal(["First duplicate", "Unidentified One", "Unidentified Two"], feed.items.map { |item| item[:title] })
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
# Integration tests that require network access
|
|
4
|
+
# These are skipped by default, run with NETWORK_TESTS=1
|
|
5
|
+
class FetchIntegrationTest < Test::Unit::TestCase
|
|
6
|
+
def test_fetch_real_feed
|
|
7
|
+
omit unless ENV["NETWORK_TESTS"]
|
|
8
|
+
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
|
|
9
|
+
assert_kind_of SimpleRSS, rss
|
|
10
|
+
assert rss.title
|
|
11
|
+
assert rss.items.any?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_fetch_stores_caching_headers
|
|
15
|
+
omit unless ENV["NETWORK_TESTS"]
|
|
16
|
+
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
|
|
17
|
+
assert(rss.etag || rss.last_modified, "Expected ETag or Last-Modified header")
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_fetch_follows_redirect
|
|
21
|
+
omit unless ENV["NETWORK_TESTS"]
|
|
22
|
+
rss = SimpleRSS.fetch("https://github.com/cardmagic/simple-rss/commits/master.atom", timeout: 10)
|
|
23
|
+
assert_kind_of SimpleRSS, rss
|
|
24
|
+
end
|
|
25
|
+
end
|
data/test/base/fetch_test.rb
CHANGED
|
@@ -88,30 +88,3 @@ class FetchTest < Test::Unit::TestCase
|
|
|
88
88
|
end
|
|
89
89
|
end
|
|
90
90
|
end
|
|
91
|
-
|
|
92
|
-
# Integration tests that require network access
|
|
93
|
-
# These are skipped by default, run with NETWORK_TESTS=1
|
|
94
|
-
class FetchIntegrationTest < Test::Unit::TestCase
|
|
95
|
-
def test_fetch_real_feed
|
|
96
|
-
omit unless ENV["NETWORK_TESTS"]
|
|
97
|
-
# Use a reliable, long-lived RSS feed
|
|
98
|
-
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
|
|
99
|
-
assert_kind_of SimpleRSS, rss
|
|
100
|
-
assert rss.title
|
|
101
|
-
assert rss.items.any?
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def test_fetch_stores_caching_headers
|
|
105
|
-
omit unless ENV["NETWORK_TESTS"]
|
|
106
|
-
rss = SimpleRSS.fetch("https://feeds.bbci.co.uk/news/rss.xml", timeout: 10)
|
|
107
|
-
# At least one of these should be present for most feeds
|
|
108
|
-
assert(rss.etag || rss.last_modified, "Expected ETag or Last-Modified header")
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def test_fetch_follows_redirect
|
|
112
|
-
omit unless ENV["NETWORK_TESTS"]
|
|
113
|
-
# GitHub raw URLs often redirect
|
|
114
|
-
rss = SimpleRSS.fetch("https://github.com/cardmagic/simple-rss/commits/master.atom", timeout: 10)
|
|
115
|
-
assert_kind_of SimpleRSS, rss
|
|
116
|
-
end
|
|
117
|
-
end
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class FilteringAndValidationTest < Test::Unit::TestCase
|
|
4
|
+
def setup
|
|
5
|
+
@rss09 = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/rss09.rdf")
|
|
6
|
+
@rss20 = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/rss20.xml")
|
|
7
|
+
@atom = SimpleRSS.parse open(File.dirname(__FILE__) + "/../data/atom.xml")
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def test_feed_type_for_known_formats
|
|
11
|
+
assert_equal :rss1, @rss09.feed_type
|
|
12
|
+
assert_equal :rss2, @rss20.feed_type
|
|
13
|
+
assert_equal :atom, @atom.feed_type
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_feed_type_unknown_for_non_standard_feed
|
|
17
|
+
feed = SimpleRSS.parse <<~XML
|
|
18
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
19
|
+
<feed>
|
|
20
|
+
<title>Unknown Feed</title>
|
|
21
|
+
<entry>
|
|
22
|
+
<title>Post</title>
|
|
23
|
+
</entry>
|
|
24
|
+
</feed>
|
|
25
|
+
XML
|
|
26
|
+
|
|
27
|
+
assert_equal :unknown, feed.feed_type
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def test_class_valid_returns_true_for_well_formed_feed
|
|
31
|
+
xml = <<~XML
|
|
32
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
33
|
+
<rss version="2.0">
|
|
34
|
+
<channel>
|
|
35
|
+
<title>Valid Feed</title>
|
|
36
|
+
<link>http://example.com</link>
|
|
37
|
+
<item>
|
|
38
|
+
<title>Post</title>
|
|
39
|
+
</item>
|
|
40
|
+
</channel>
|
|
41
|
+
</rss>
|
|
42
|
+
XML
|
|
43
|
+
|
|
44
|
+
assert_equal true, SimpleRSS.valid?(xml)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_class_valid_returns_false_for_invalid_feed
|
|
48
|
+
invalid_xml = open(File.dirname(__FILE__) + "/../data/not-rss.xml").read
|
|
49
|
+
|
|
50
|
+
assert_equal false, SimpleRSS.valid?(invalid_xml)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def test_class_valid_returns_false_when_source_read_fails
|
|
54
|
+
unreadable_source = Object.new
|
|
55
|
+
unreadable_source.define_singleton_method(:read) do
|
|
56
|
+
raise IOError, "stream closed"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
assert_equal false, SimpleRSS.valid?(unreadable_source)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def test_instance_valid_requires_metadata_and_items
|
|
63
|
+
valid_feed = SimpleRSS.parse <<~XML
|
|
64
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
65
|
+
<rss version="2.0">
|
|
66
|
+
<channel>
|
|
67
|
+
<title>Valid Feed</title>
|
|
68
|
+
<item>
|
|
69
|
+
<title>Post</title>
|
|
70
|
+
</item>
|
|
71
|
+
</channel>
|
|
72
|
+
</rss>
|
|
73
|
+
XML
|
|
74
|
+
|
|
75
|
+
invalid_feed = SimpleRSS.parse <<~XML
|
|
76
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
77
|
+
<rss version="2.0">
|
|
78
|
+
<channel>
|
|
79
|
+
<description>No title and no link</description>
|
|
80
|
+
<item>
|
|
81
|
+
<description>Body only</description>
|
|
82
|
+
</item>
|
|
83
|
+
</channel>
|
|
84
|
+
</rss>
|
|
85
|
+
XML
|
|
86
|
+
|
|
87
|
+
empty_feed = SimpleRSS.parse <<~XML
|
|
88
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
89
|
+
<rss version="2.0">
|
|
90
|
+
<channel>
|
|
91
|
+
<title>No Items</title>
|
|
92
|
+
</channel>
|
|
93
|
+
</rss>
|
|
94
|
+
XML
|
|
95
|
+
|
|
96
|
+
assert_equal true, valid_feed.valid?
|
|
97
|
+
assert_equal false, invalid_feed.valid?
|
|
98
|
+
assert_equal false, empty_feed.valid?
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def test_items_since_filters_by_date
|
|
102
|
+
threshold = Time.parse("Wed Aug 24 13:30:00 UTC 2005")
|
|
103
|
+
|
|
104
|
+
filtered = @rss20.items_since(threshold)
|
|
105
|
+
|
|
106
|
+
assert_equal 1, filtered.size
|
|
107
|
+
assert_operator filtered.first[:pubDate], :>, threshold
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def test_items_by_category_matches_strings_and_arrays
|
|
111
|
+
feed_with_string_category = SimpleRSS.parse <<~XML
|
|
112
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
113
|
+
<rss version="2.0">
|
|
114
|
+
<channel>
|
|
115
|
+
<title>String Category Feed</title>
|
|
116
|
+
<item>
|
|
117
|
+
<title>Ruby News</title>
|
|
118
|
+
<category>Technology</category>
|
|
119
|
+
</item>
|
|
120
|
+
<item>
|
|
121
|
+
<title>Sports News</title>
|
|
122
|
+
<category>Sports</category>
|
|
123
|
+
</item>
|
|
124
|
+
</channel>
|
|
125
|
+
</rss>
|
|
126
|
+
XML
|
|
127
|
+
|
|
128
|
+
feed_with_array_category = SimpleRSS.parse(
|
|
129
|
+
<<~XML,
|
|
130
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
131
|
+
<rss version="2.0">
|
|
132
|
+
<channel>
|
|
133
|
+
<title>Array Category Feed</title>
|
|
134
|
+
<item>
|
|
135
|
+
<title>Dev Update</title>
|
|
136
|
+
<category>Technology</category>
|
|
137
|
+
<category>Ruby</category>
|
|
138
|
+
</item>
|
|
139
|
+
</channel>
|
|
140
|
+
</rss>
|
|
141
|
+
XML
|
|
142
|
+
array_tags: [:category]
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
string_results = feed_with_string_category.items_by_category("tech")
|
|
146
|
+
array_results = feed_with_array_category.items_by_category("ruby")
|
|
147
|
+
|
|
148
|
+
assert_equal 1, string_results.size
|
|
149
|
+
assert_equal "Ruby News", string_results.first[:title]
|
|
150
|
+
assert_equal 1, array_results.size
|
|
151
|
+
assert_equal "Dev Update", array_results.first[:title]
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def test_search_matches_title_description_summary_and_content
|
|
155
|
+
feed = SimpleRSS.parse <<~XML
|
|
156
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
157
|
+
<rss version="2.0">
|
|
158
|
+
<channel>
|
|
159
|
+
<title>Search Feed</title>
|
|
160
|
+
<item>
|
|
161
|
+
<title>Ruby Patterns</title>
|
|
162
|
+
<description>Language design</description>
|
|
163
|
+
</item>
|
|
164
|
+
<item>
|
|
165
|
+
<title>Other Topic</title>
|
|
166
|
+
<description>Talks about BREAKING updates</description>
|
|
167
|
+
</item>
|
|
168
|
+
<item>
|
|
169
|
+
<title>Third Topic</title>
|
|
170
|
+
<summary>A quick ruby summary</summary>
|
|
171
|
+
</item>
|
|
172
|
+
<item>
|
|
173
|
+
<title>Fourth Topic</title>
|
|
174
|
+
<content>Deep dive into Ruby internals</content>
|
|
175
|
+
</item>
|
|
176
|
+
</channel>
|
|
177
|
+
</rss>
|
|
178
|
+
XML
|
|
179
|
+
|
|
180
|
+
ruby_results = feed.search("ruby")
|
|
181
|
+
breaking_results = feed.search("breaking")
|
|
182
|
+
|
|
183
|
+
assert_equal 3, ruby_results.size
|
|
184
|
+
assert_equal 1, breaking_results.size
|
|
185
|
+
assert_equal "Other Topic", breaking_results.first[:title]
|
|
186
|
+
end
|
|
187
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class MediaAndEnclosureHelpersTest < Test::Unit::TestCase
|
|
4
|
+
def setup
|
|
5
|
+
@feed = SimpleRSS.parse <<~XML
|
|
6
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
7
|
+
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
|
8
|
+
<channel>
|
|
9
|
+
<title>Podcast Feed</title>
|
|
10
|
+
<item>
|
|
11
|
+
<title>Episode 1</title>
|
|
12
|
+
<enclosure url="https://example.com/audio-1.mp3" type="audio/mpeg" length="12345" />
|
|
13
|
+
<media:content url="https://example.com/image-1.jpg" type="image/jpeg" />
|
|
14
|
+
<media:description>Episode image</media:description>
|
|
15
|
+
<media:thumbnail url="https://example.com/thumb-1.jpg" />
|
|
16
|
+
<itunes:duration>00:42:00</itunes:duration>
|
|
17
|
+
<itunes:image href="https://example.com/itunes-1.jpg" />
|
|
18
|
+
</item>
|
|
19
|
+
<item>
|
|
20
|
+
<title>Episode 2</title>
|
|
21
|
+
<media:thumbnail url="https://example.com/thumb-2.jpg" />
|
|
22
|
+
</item>
|
|
23
|
+
<item>
|
|
24
|
+
<title>Episode 3</title>
|
|
25
|
+
<enclosure url="https://example.com/audio-3.mp3" type="audio/mpeg" length="999" />
|
|
26
|
+
</item>
|
|
27
|
+
<item>
|
|
28
|
+
<title>Episode 4</title>
|
|
29
|
+
<enclosure url="" type="audio/mpeg" length="111" />
|
|
30
|
+
<media:thumbnail url="" />
|
|
31
|
+
<itunes:image href="" />
|
|
32
|
+
</item>
|
|
33
|
+
</channel>
|
|
34
|
+
</rss>
|
|
35
|
+
XML
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def test_enclosures_extracts_podcast_enclosures
|
|
39
|
+
enclosures = @feed.enclosures
|
|
40
|
+
|
|
41
|
+
assert_equal 2, enclosures.size
|
|
42
|
+
assert_equal "https://example.com/audio-1.mp3", enclosures.first[:url]
|
|
43
|
+
assert_equal "audio/mpeg", enclosures.first[:type]
|
|
44
|
+
assert_equal "12345", enclosures.first[:length]
|
|
45
|
+
assert_equal "Episode 1", enclosures.first[:item][:title]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def test_images_collects_unique_media_urls
|
|
49
|
+
images = @feed.images
|
|
50
|
+
|
|
51
|
+
assert_equal [
|
|
52
|
+
"https://example.com/thumb-1.jpg",
|
|
53
|
+
"https://example.com/image-1.jpg",
|
|
54
|
+
"https://example.com/itunes-1.jpg",
|
|
55
|
+
"https://example.com/thumb-2.jpg"
|
|
56
|
+
], images
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def test_item_media_helpers
|
|
60
|
+
first_item = @feed.items.first
|
|
61
|
+
second_item = @feed.items[1]
|
|
62
|
+
third_item = @feed.items[2]
|
|
63
|
+
fourth_item = @feed.items.last
|
|
64
|
+
|
|
65
|
+
assert_equal true, first_item.has_media?
|
|
66
|
+
assert_equal "https://example.com/image-1.jpg", first_item.media_url
|
|
67
|
+
|
|
68
|
+
assert_equal true, second_item.has_media?
|
|
69
|
+
assert_equal "https://example.com/thumb-2.jpg", second_item.media_url
|
|
70
|
+
|
|
71
|
+
assert_equal true, third_item.has_media?
|
|
72
|
+
assert_equal "https://example.com/audio-3.mp3", third_item.media_url
|
|
73
|
+
|
|
74
|
+
assert_equal false, fourth_item.has_media?
|
|
75
|
+
assert_nil fourth_item.media_url
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def test_media_description_and_itunes_duration_are_parsed
|
|
79
|
+
first_item = @feed.items.first
|
|
80
|
+
|
|
81
|
+
assert_equal "Episode image", first_item[:media_description]
|
|
82
|
+
assert_equal "00:42:00", first_item[:itunes_duration]
|
|
83
|
+
end
|
|
84
|
+
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: simple-rss
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Lucas Carlson
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2025-12-
|
|
10
|
+
date: 2025-12-28 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: rake
|
|
@@ -70,10 +70,14 @@ files:
|
|
|
70
70
|
- test/base/encoding_test.rb
|
|
71
71
|
- test/base/enumerable_test.rb
|
|
72
72
|
- test/base/feed_attributes_test.rb
|
|
73
|
+
- test/base/feed_merging_and_diffing_test.rb
|
|
74
|
+
- test/base/fetch_integration_test.rb
|
|
73
75
|
- test/base/fetch_test.rb
|
|
76
|
+
- test/base/filtering_and_validation_test.rb
|
|
74
77
|
- test/base/hash_xml_serialization_test.rb
|
|
75
78
|
- test/base/item_attributes_test.rb
|
|
76
79
|
- test/base/json_serialization_test.rb
|
|
80
|
+
- test/base/media_and_enclosure_helpers_test.rb
|
|
77
81
|
- test/data/atom.xml
|
|
78
82
|
- test/data/atom_with_entry_attrs.xml
|
|
79
83
|
- test/data/atom_with_feed_attrs.xml
|
|
@@ -102,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
102
106
|
- !ruby/object:Gem::Version
|
|
103
107
|
version: '0'
|
|
104
108
|
requirements: []
|
|
105
|
-
rubygems_version: 4.0.
|
|
109
|
+
rubygems_version: 4.0.6
|
|
106
110
|
specification_version: 4
|
|
107
111
|
summary: A simple, flexible, extensible, and liberal RSS and Atom reader for Ruby.
|
|
108
112
|
It is designed to be backwards compatible with the standard RSS parser, but will
|