simple-rss 1.3.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/simple-rss.rb CHANGED
@@ -1,160 +1,547 @@
1
- require 'cgi'
2
- require 'time'
1
+ # rbs_inline: enabled
2
+
3
+ require "cgi"
4
+ require "time"
3
5
 
4
6
  class SimpleRSS
5
- VERSION = "1.3.3"
6
-
7
- attr_reader :items, :source
8
- alias :entries :items
9
-
10
- @@feed_tags = [
11
- :id,
12
- :title, :subtitle, :link,
13
- :description,
14
- :author, :webMaster, :managingEditor, :contributor,
15
- :pubDate, :lastBuildDate, :updated, :'dc:date',
16
- :generator, :language, :docs, :cloud,
17
- :ttl, :skipHours, :skipDays,
18
- :image, :logo, :icon, :rating,
19
- :rights, :copyright,
20
- :textInput, :'feedburner:browserFriendly',
21
- :'itunes:author', :'itunes:category'
22
- ]
23
-
24
- @@item_tags = [
25
- :id,
26
- :title, :link, :'link+alternate', :'link+self', :'link+edit', :'link+replies',
27
- :author, :contributor,
28
- :description, :summary, :content, :'content:encoded', :comments,
29
- :pubDate, :published, :updated, :expirationDate, :modified, :'dc:date',
30
- :category, :guid,
31
- :'trackback:ping', :'trackback:about',
32
- :'dc:creator', :'dc:title', :'dc:subject', :'dc:rights', :'dc:publisher',
33
- :'feedburner:origLink',
34
- :'media:content#url', :'media:content#type', :'media:content#height', :'media:content#width', :'media:content#duration',
35
- :'media:title', :'media:thumbnail#url', :'media:thumbnail#height', :'media:thumbnail#width',
36
- :'media:credit', :'media:credit#role',
37
- :'media:category', :'media:category#scheme'
38
- ]
39
-
40
- def initialize(source, options={})
41
- @source = source.respond_to?(:read) ? source.read : source.to_s
42
- @items = Array.new
43
- @options = Hash.new.update(options)
44
-
45
- parse
46
- end
47
-
48
- def channel() self end
49
- alias :feed :channel
50
-
51
- class << self
52
- def feed_tags
53
- @@feed_tags
54
- end
55
- def feed_tags=(ft)
56
- @@feed_tags = ft
57
- end
58
-
59
- def item_tags
60
- @@item_tags
61
- end
62
- def item_tags=(it)
63
- @@item_tags = it
64
- end
65
-
66
- # The strict attribute is for compatibility with Ruby's standard RSS parser
67
- def parse(source, options={})
68
- new source, options
69
- end
70
- end
71
-
72
- private
73
-
74
- def parse
75
- raise SimpleRSSError, "Poorly formatted feed" unless @source =~ %r{<(channel|feed).*?>.*?</(channel|feed)>}mi
76
-
77
- # Feed's title and link
78
- feed_content = $1 if @source =~ %r{(.*?)<(rss:|atom:)?(item|entry).*?>.*?</(rss:|atom:)?(item|entry)>}mi
79
-
80
- @@feed_tags.each do |tag|
81
- if feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
82
- nil
83
- elsif feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
84
- nil
85
- elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
86
- nil
87
- elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
88
- nil
89
- end
90
-
91
- if $2 || $3
92
- tag_cleaned = clean_tag(tag)
93
- instance_variable_set("@#{ tag_cleaned }", clean_content(tag, $2, $3))
94
- self.class.class_eval("attr_reader :#{ tag_cleaned }")
95
- end
96
- end
97
-
98
- # RSS items' title, link, and description
99
- @source.scan( %r{<(rss:|atom:)?(item|entry)([\s][^>]*)?>(.*?)</(rss:|atom:)?(item|entry)>}mi ) do |match|
100
- item = Hash.new
101
- @@item_tags.each do |tag|
102
- if tag.to_s.include?("+")
103
- tag_data = tag.to_s.split("+")
104
- tag = tag_data[0]
105
- rel = tag_data[1]
106
- if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
107
- nil
108
- elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)/\s*>}mi
109
- nil
110
- end
111
- item[clean_tag("#{tag}+#{rel}")] = clean_content(tag, $3, $4) if $3 || $4
112
- elsif tag.to_s.include?("#")
113
- tag_data = tag.to_s.split("#")
114
- tag = tag_data[0]
115
- attrib = tag_data[1]
116
- if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
117
- nil
118
- elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)/\s*>}mi
119
- nil
120
- end
121
- item[clean_tag("#{tag}_#{attrib}")] = clean_content(tag, attrib, $3) if $3
122
- else
123
- if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
124
- nil
125
- elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)/\s*>}mi
126
- nil
127
- end
128
- item[clean_tag(tag)] = clean_content(tag, $2, $3) if $2 || $3
129
- end
130
- end
131
- def item.method_missing(name, *args) self[name] end
132
- @items << item
133
- end
134
- end
135
-
136
- def clean_content(tag, attrs, content)
137
- content = content.to_s
138
- case tag
139
- when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date'
140
- Time.parse(content) rescue unescape(content)
141
- when :author, :contributor, :skipHours, :skipDays
142
- unescape(content.gsub(/<.*?>/,''))
143
- else
144
- content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content)
145
- end
146
- end
147
-
148
- def clean_tag(tag)
149
- tag.to_s.gsub(':','_').intern
150
- end
151
-
7
+ # @rbs skip
8
+ include Enumerable
9
+
10
+ # @rbs!
11
+ # include Enumerable[Hash[Symbol, untyped]]
12
+
13
+ VERSION = "2.1.0".freeze
14
+
15
+ # @rbs @items: Array[Hash[Symbol, untyped]]
16
+ # @rbs @source: String
17
+ # @rbs @options: Hash[Symbol, untyped]
18
+ # @rbs @etag: String?
19
+ # @rbs @last_modified: String?
20
+
21
+ attr_reader :items #: Array[Hash[Symbol, untyped]]
22
+ attr_reader :source #: String
23
+ attr_reader :etag #: String?
24
+ attr_reader :last_modified #: String?
25
+ alias entries items #: Array[Hash[Symbol, untyped]]
26
+
27
+ @@feed_tags = %i[
28
+ id
29
+ title subtitle link
30
+ description
31
+ author webMaster managingEditor contributor
32
+ pubDate lastBuildDate updated dc:date
33
+ generator language docs cloud
34
+ ttl skipHours skipDays
35
+ image logo icon rating
36
+ rights copyright
37
+ textInput feedburner:browserFriendly
38
+ itunes:author itunes:category
39
+ ]
40
+
41
+ @@item_tags = %i[
42
+ id
43
+ title link link+alternate link+self link+edit link+replies
44
+ author contributor
45
+ description summary content content:encoded comments
46
+ pubDate published updated expirationDate modified dc:date
47
+ category guid
48
+ trackback:ping trackback:about
49
+ dc:creator dc:title dc:subject dc:rights dc:publisher
50
+ feedburner:origLink
51
+ media:content#url media:content#type media:content#height media:content#width media:content#duration
52
+ media:title media:thumbnail#url media:thumbnail#height media:thumbnail#width
53
+ media:credit media:credit#role
54
+ media:category media:category#scheme
55
+ ]
56
+
57
+ # @rbs (untyped, ?Hash[Symbol, untyped]) -> void
58
+ def initialize(source, options = {})
59
+ @source = source.respond_to?(:read) ? source.read.to_s : source.to_s
60
+ @items = [] #: Array[Hash[Symbol, untyped]]
61
+ @options = {} #: Hash[Symbol, untyped]
62
+ @options.update(options)
63
+
64
+ parse
65
+ end
66
+
67
+ # @rbs () -> SimpleRSS
68
+ def channel
69
+ self
70
+ end
71
+ alias feed channel
72
+
73
+ # Iterate over all items in the feed
74
+ #
75
+ # @rbs () { (Hash[Symbol, untyped]) -> void } -> self
76
+ # | () -> Enumerator[Hash[Symbol, untyped], self]
77
+ def each(&block)
78
+ return enum_for(:each) unless block
79
+
80
+ items.each(&block)
81
+ self
82
+ end
83
+
84
+ # Access an item by index
85
+ #
86
+ # @rbs (Integer) -> Hash[Symbol, untyped]?
87
+ def [](index)
88
+ items[index]
89
+ end
90
+
91
+ # Get the n most recent items, sorted by date
92
+ #
93
+ # @rbs (?Integer) -> Array[Hash[Symbol, untyped]]
94
+ def latest(count = 10)
95
+ items.sort_by { |item| item[:pubDate] || item[:updated] || Time.at(0) }.reverse.first(count)
96
+ end
97
+
98
+ # @rbs (?Hash[Symbol, untyped]) -> Hash[Symbol, untyped]
99
+ def as_json(_options = {})
100
+ hash = {} #: Hash[Symbol, untyped]
101
+
102
+ @@feed_tags.each do |tag|
103
+ tag_cleaned = clean_tag(tag)
104
+ value = instance_variable_get("@#{tag_cleaned}")
105
+ hash[tag_cleaned] = serialize_value(value) if value
106
+ end
107
+
108
+ hash[:items] = items.map do |item|
109
+ item.transform_values { |v| serialize_value(v) }
110
+ end
111
+
112
+ hash
113
+ end
114
+
115
+ # @rbs (*untyped) -> String
116
+ def to_json(*)
117
+ require "json"
118
+ JSON.generate(as_json)
119
+ end
120
+
121
+ alias to_hash as_json
122
+
123
+ # @rbs (?format: Symbol) -> String
124
+ def to_xml(format: :rss2)
125
+ case format
126
+ when :rss2 then to_rss2_xml
127
+ when :atom then to_atom_xml
128
+ else raise ArgumentError, "Unknown format: #{format}. Supported: :rss2, :atom"
129
+ end
130
+ end
131
+
132
+ class << self
133
+ # @rbs () -> Array[Symbol]
134
+ def feed_tags
135
+ @@feed_tags
136
+ end
137
+
138
+ # @rbs (Array[Symbol]) -> Array[Symbol]
139
+ def feed_tags=(ft)
140
+ @@feed_tags = ft
141
+ end
142
+
143
+ # @rbs () -> Array[Symbol]
144
+ def item_tags
145
+ @@item_tags
146
+ end
147
+
148
+ # @rbs (Array[Symbol]) -> Array[Symbol]
149
+ def item_tags=(it)
150
+ @@item_tags = it
151
+ end
152
+
153
+ # The strict attribute is for compatibility with Ruby's standard RSS parser
154
+ #
155
+ # @rbs (untyped, ?Hash[Symbol, untyped]) -> SimpleRSS
156
+ def parse(source, options = {})
157
+ new source, options
158
+ end
159
+
160
+ # Fetch and parse a feed from a URL
161
+ # Returns nil if conditional GET returns 304 Not Modified
162
+ #
163
+ # @rbs (String, ?Hash[Symbol, untyped]) -> SimpleRSS?
164
+ def fetch(url, options = {})
165
+ require "net/http"
166
+ require "uri"
167
+
168
+ uri = URI.parse(url)
169
+ response = perform_fetch(uri, options)
170
+
171
+ return nil if response.is_a?(Net::HTTPNotModified)
172
+
173
+ raise SimpleRSSError, "HTTP #{response.code}: #{response.message}" unless response.is_a?(Net::HTTPSuccess)
174
+
175
+ body = response.body.force_encoding(Encoding::UTF_8)
176
+ feed = parse(body, options)
177
+ feed.instance_variable_set(:@etag, response["ETag"])
178
+ feed.instance_variable_set(:@last_modified, response["Last-Modified"])
179
+ feed
180
+ end
181
+
182
+ private
183
+
184
+ # @rbs (untyped, Hash[Symbol, untyped]) -> untyped
185
+ def perform_fetch(uri, options)
186
+ http = build_http(uri, options)
187
+ request = build_request(uri, options)
188
+
189
+ response = http.request(request)
190
+ handle_redirect(response, options) || response
191
+ end
192
+
193
+ # @rbs (untyped, Hash[Symbol, untyped]) -> untyped
194
+ def build_http(uri, options)
195
+ host = uri.host || raise(SimpleRSSError, "Invalid URL: missing host")
196
+ http = Net::HTTP.new(host, uri.port)
197
+ http.use_ssl = uri.scheme == "https"
198
+
199
+ timeout = options[:timeout]
200
+ if timeout
201
+ http.open_timeout = timeout
202
+ http.read_timeout = timeout
203
+ end
204
+
205
+ http
206
+ end
207
+
208
+ # @rbs (untyped, Hash[Symbol, untyped]) -> untyped
209
+ def build_request(uri, options)
210
+ request = Net::HTTP::Get.new(uri)
211
+ request["User-Agent"] = "SimpleRSS/#{VERSION}"
212
+
213
+ # Conditional GET headers
214
+ request["If-None-Match"] = options[:etag] if options[:etag]
215
+ request["If-Modified-Since"] = options[:last_modified] if options[:last_modified]
216
+
217
+ # Custom headers
218
+ options[:headers]&.each { |key, value| request[key] = value }
219
+
220
+ request
221
+ end
222
+
223
+ # @rbs (untyped, Hash[Symbol, untyped]) -> untyped
224
+ def handle_redirect(response, options)
225
+ return nil unless response.is_a?(Net::HTTPRedirection)
226
+ return nil if options[:follow_redirects] == false
227
+
228
+ location = response["Location"]
229
+ return nil unless location
230
+
231
+ redirects = (options[:_redirects] || 0) + 1
232
+ raise SimpleRSSError, "Too many redirects" if redirects > 5
233
+
234
+ new_options = options.merge(_redirects: redirects)
235
+ perform_fetch(URI.parse(location), new_options)
236
+ end
237
+ end
238
+
239
+ DATE_TAGS = %i[pubDate lastBuildDate published updated expirationDate modified dc:date].freeze
240
+ STRIP_HTML_TAGS = %i[author contributor skipHours skipDays].freeze
241
+
242
+ private
243
+
244
+ # @rbs () -> void
245
+ def parse
246
+ raise SimpleRSSError, "Poorly formatted feed" unless @source =~ %r{<(channel|feed).*?>.*?</(channel|feed)>}mi
247
+
248
+ # Feed's title and link
249
+ feed_content = Regexp.last_match(1) if @source =~ %r{(.*?)<(rss:|atom:)?(item|entry).*?>.*?</(rss:|atom:)?(item|entry)>}mi
250
+
251
+ # Capture channel/feed tag attributes
252
+ feed_attrs = nil
253
+ if @source =~ /<(channel|feed)([\s][^>]*)?>/mi
254
+ feed_attrs = Regexp.last_match(2)
255
+ end
256
+
257
+ @@feed_tags.each do |tag|
258
+ next if tag.to_s.strip.empty?
259
+
260
+ tag_str = tag.to_s
261
+
262
+ # Handle channel#attr or feed#attr syntax
263
+ if tag_str.include?("#")
264
+ parse_feed_attr_tag(tag_str, feed_attrs)
265
+ next
266
+ end
267
+
268
+ if feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
269
+ nil
270
+ elsif feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
271
+ nil
272
+ elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
273
+ nil
274
+ elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
275
+ nil
276
+ end
277
+
278
+ next unless Regexp.last_match(2) || Regexp.last_match(3)
279
+
280
+ tag_cleaned = clean_tag(tag)
281
+ instance_variable_set("@#{tag_cleaned}", clean_content(tag, Regexp.last_match(2), Regexp.last_match(3)))
282
+ self.class.class_eval("attr_reader :#{tag_cleaned}")
283
+ end
284
+
285
+ # RSS items' title, link, and description
286
+ @source.scan(%r{<(rss:|atom:)?(item|entry)([\s][^>]*)?>(.*?)</(rss:|atom:)?(item|entry)>}mi) do |match|
287
+ item = {} #: Hash[Symbol, untyped]
288
+ @@item_tags.each do |tag|
289
+ next if tag.to_s.strip.empty?
290
+
291
+ parse_item_tag(item, tag, match[3], match[2])
292
+ end
293
+ item.define_singleton_method(:method_missing) { |name, *| self[name] }
294
+ @items << item
295
+ end
296
+ end
297
+
298
+ # @rbs (Hash[Symbol, untyped], Symbol, String?, String?) -> void
299
+ def parse_item_tag(item, tag, content, item_attrs = nil)
300
+ return if content.nil?
301
+
302
+ tag_str = tag.to_s
303
+
304
+ return parse_rel_tag(item, tag_str, content) if tag_str.include?("+")
305
+ return parse_attr_tag(item, tag_str, content, item_attrs) if tag_str.include?("#")
306
+
307
+ parse_simple_tag(item, tag, content)
308
+ end
309
+
310
+ # @rbs (Hash[Symbol, untyped], String, String) -> void
311
+ def parse_rel_tag(item, tag_str, content)
312
+ tag, rel = tag_str.split("+")
313
+ return unless tag && rel
314
+
315
+ content =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi ||
316
+ content =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)/\s*>}mi
317
+
318
+ return unless Regexp.last_match(3) || Regexp.last_match(4)
319
+
320
+ item[clean_tag("#{tag}+#{rel}")] = clean_content(tag.to_sym, Regexp.last_match(3), Regexp.last_match(4))
321
+ end
322
+
323
+ # @rbs (String, String?) -> void
324
+ def parse_feed_attr_tag(tag_str, feed_attrs)
325
+ tag, attrib = tag_str.split("#")
326
+ return unless tag && attrib && feed_attrs
327
+
328
+ # Only handle channel or feed tags
329
+ return unless %w[channel feed].include?(tag)
330
+ return unless feed_attrs =~ /#{attrib}=['"](.*?)['"]/mi
331
+
332
+ tag_cleaned = clean_tag("#{tag}_#{attrib}")
333
+ instance_variable_set("@#{tag_cleaned}", clean_content(tag.to_sym, attrib, Regexp.last_match(1)))
334
+ self.class.class_eval("attr_reader :#{tag_cleaned}")
335
+ end
336
+
337
+ # @rbs (Hash[Symbol, untyped], String, String, String?) -> void
338
+ def parse_attr_tag(item, tag_str, content, item_attrs = nil)
339
+ tag, attrib = tag_str.split("#")
340
+ return unless tag && attrib
341
+
342
+ # Handle attributes on the item/entry tag itself
343
+ if %w[item entry].include?(tag) && item_attrs
344
+ return unless item_attrs =~ /#{attrib}=['"](.*?)['"]/mi
345
+
346
+ item[clean_tag("#{tag}_#{attrib}")] = clean_content(tag.to_sym, attrib, Regexp.last_match(1))
347
+ return
348
+ end
349
+
350
+ content =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi ||
351
+ content =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)/\s*>}mi
352
+
353
+ return unless Regexp.last_match(3)
354
+
355
+ item[clean_tag("#{tag}_#{attrib}")] = clean_content(tag.to_sym, attrib, Regexp.last_match(3))
356
+ end
357
+
358
+ # @rbs (Hash[Symbol, untyped], Symbol, String) -> void
359
+ def parse_simple_tag(item, tag, content)
360
+ # Handle array_tags option - collect all values for this tag
361
+ if array_tag?(tag)
362
+ values = content.scan(%r{<(rss:|atom:)?#{tag}(?:[^>]*)>(.*?)</(rss:|atom:)?#{tag}>}mi).map do |match|
363
+ clean_content(tag, nil, match[1])
364
+ end
365
+ item[clean_tag(tag)] = values unless values.empty?
366
+ return
367
+ end
368
+
369
+ content =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi ||
370
+ content =~ %r{<(rss:|atom:)?#{tag}(.*?)/\s*>}mi
371
+
372
+ return unless Regexp.last_match(2) || Regexp.last_match(3)
373
+
374
+ item[clean_tag(tag)] = clean_content(tag, Regexp.last_match(2), Regexp.last_match(3))
375
+ end
376
+
377
+ # @rbs (Symbol) -> bool
378
+ def array_tag?(tag)
379
+ array_tags = @options[:array_tags]
380
+ return false unless array_tags.is_a?(Array)
381
+
382
+ array_tags.include?(tag) || array_tags.include?(tag.to_sym)
383
+ end
384
+
385
+ # @rbs (Symbol, String?, String?) -> (Time | String)
386
+ def clean_content(tag, attrs, content)
387
+ content = content.to_s
388
+
389
+ return parse_date(content) if DATE_TAGS.include?(tag)
390
+ return unescape(content.gsub(/<.*?>/, "")) if STRIP_HTML_TAGS.include?(tag)
391
+ return extract_href(attrs) if content.empty? && attrs
392
+
393
+ unescape(content)
394
+ end
395
+
396
+ # @rbs (String) -> (Time | String)
397
+ def parse_date(content)
398
+ Time.parse(content)
399
+ rescue StandardError
400
+ unescape(content)
401
+ end
402
+
403
+ # @rbs (String?) -> String
404
+ def extract_href(attrs)
405
+ return "" unless "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi
406
+
407
+ Regexp.last_match(1)&.strip || ""
408
+ end
409
+
410
+ # @rbs (Symbol | String) -> Symbol
411
+ def clean_tag(tag)
412
+ tag.to_s.tr(":", "_").intern
413
+ end
414
+
415
+ # @rbs (untyped) -> untyped
416
+ def serialize_value(value)
417
+ case value
418
+ when Time then value.iso8601
419
+ else value
420
+ end
421
+ end
422
+
423
+ # @rbs (String?) -> String
424
+ def escape_xml(text)
425
+ return "" if text.nil?
426
+
427
+ text.to_s
428
+ .gsub("&", "&amp;")
429
+ .gsub("<", "&lt;")
430
+ .gsub(">", "&gt;")
431
+ .gsub("'", "&apos;")
432
+ .gsub('"', "&quot;")
433
+ end
434
+
435
+ # @rbs (Array[String], String, untyped) -> void
436
+ def add_xml_element(elements, tag, value)
437
+ elements << "<#{tag}>#{escape_xml(value)}</#{tag}>" if value
438
+ end
439
+
440
+ # @rbs (Array[String], String, untyped, Symbol) -> void
441
+ def add_xml_time_element(elements, tag, value, format)
442
+ return unless value.is_a?(Time)
443
+
444
+ formatted = format == :rfc2822 ? value.rfc2822 : value.iso8601
445
+ elements << "<#{tag}>#{formatted}</#{tag}>"
446
+ end
447
+
448
+ # @rbs () -> String
449
+ def to_rss2_xml
450
+ xml = ['<?xml version="1.0" encoding="UTF-8"?>', '<rss version="2.0">', "<channel>"]
451
+ xml.concat(rss2_channel_elements)
452
+ items.each { |item| xml.concat(rss2_item_elements(item)) }
453
+ xml << "</channel>"
454
+ xml << "</rss>"
455
+ xml.join("\n")
456
+ end
457
+
458
+ # @rbs () -> Array[String]
459
+ def rss2_channel_elements
460
+ elements = [] #: Array[String]
461
+ add_xml_element(elements, "title", instance_variable_get(:@title))
462
+ add_xml_element(elements, "link", instance_variable_get(:@link))
463
+ add_xml_element(elements, "description", instance_variable_get(:@description))
464
+ add_xml_element(elements, "language", instance_variable_get(:@language))
465
+ add_xml_time_element(elements, "pubDate", instance_variable_get(:@pubDate), :rfc2822)
466
+ add_xml_time_element(elements, "lastBuildDate", instance_variable_get(:@lastBuildDate), :rfc2822)
467
+ add_xml_element(elements, "generator", instance_variable_get(:@generator))
468
+ elements
469
+ end
470
+
471
+ # @rbs (Hash[Symbol, untyped]) -> Array[String]
472
+ def rss2_item_elements(item)
473
+ elements = ["<item>"] #: Array[String]
474
+ elements << "<title>#{escape_xml(item[:title])}</title>" if item[:title]
475
+ elements << "<link>#{escape_xml(item[:link])}</link>" if item[:link]
476
+ elements << "<description><![CDATA[#{item[:description]}]]></description>" if item[:description]
477
+ elements << "<pubDate>#{item[:pubDate].rfc2822}</pubDate>" if item[:pubDate].is_a?(Time)
478
+ elements << "<guid>#{escape_xml(item[:guid])}</guid>" if item[:guid]
479
+ elements << "<author>#{escape_xml(item[:author])}</author>" if item[:author]
480
+ elements << "<category>#{escape_xml(item[:category])}</category>" if item[:category]
481
+ elements << "</item>"
482
+ elements
483
+ end
484
+
485
+ # @rbs () -> String
486
+ def to_atom_xml
487
+ xml = ['<?xml version="1.0" encoding="UTF-8"?>', '<feed xmlns="http://www.w3.org/2005/Atom">']
488
+ xml.concat(atom_feed_elements)
489
+ items.each { |item| xml.concat(atom_entry_elements(item)) }
490
+ xml << "</feed>"
491
+ xml.join("\n")
492
+ end
493
+
494
+ # @rbs () -> Array[String]
495
+ def atom_feed_elements
496
+ elements = [] #: Array[String]
497
+ title_val = instance_variable_get(:@title)
498
+ link_val = instance_variable_get(:@link)
499
+ id_val = instance_variable_get(:@id)
500
+ add_xml_element(elements, "title", title_val)
501
+ elements << "<link href=\"#{escape_xml(link_val)}\" rel=\"alternate\"/>" if link_val
502
+ elements << "<id>#{escape_xml(id_val || link_val)}</id>" if link_val
503
+ add_xml_time_element(elements, "updated", instance_variable_get(:@updated), :iso8601)
504
+ add_xml_element(elements, "subtitle", instance_variable_get(:@subtitle))
505
+ author_val = instance_variable_get(:@author)
506
+ elements << "<author><name>#{escape_xml(author_val)}</name></author>" if author_val
507
+ add_xml_element(elements, "generator", instance_variable_get(:@generator))
508
+ elements
509
+ end
510
+
511
+ # @rbs (Hash[Symbol, untyped]) -> Array[String]
512
+ def atom_entry_elements(item)
513
+ elements = ["<entry>"] #: Array[String]
514
+ elements << "<title>#{escape_xml(item[:title])}</title>" if item[:title]
515
+ elements << "<link href=\"#{escape_xml(item[:link])}\" rel=\"alternate\"/>" if item[:link]
516
+ elements << "<id>#{escape_xml(item[:id] || item[:guid] || item[:link])}</id>" if item[:id] || item[:guid] || item[:link]
517
+ elements << "<updated>#{item[:updated].iso8601}</updated>" if item[:updated].is_a?(Time)
518
+ atom_entry_published(elements, item)
519
+ elements << "<summary><![CDATA[#{item[:summary] || item[:description]}]]></summary>" if item[:summary] || item[:description]
520
+ elements << "<content><![CDATA[#{item[:content]}]]></content>" if item[:content]
521
+ elements << "<author><name>#{escape_xml(item[:author])}</name></author>" if item[:author]
522
+ elements << "<category term=\"#{escape_xml(item[:category])}\"/>" if item[:category]
523
+ elements << "</entry>"
524
+ elements
525
+ end
526
+
527
+ # @rbs (Array[String], Hash[Symbol, untyped]) -> void
528
+ def atom_entry_published(elements, item)
529
+ if item[:published].is_a?(Time)
530
+ elements << "<published>#{item[:published].iso8601}</published>"
531
+ elsif item[:pubDate].is_a?(Time)
532
+ elements << "<published>#{item[:pubDate].iso8601}</published>"
533
+ end
534
+ end
535
+
536
+ # @rbs (String) -> String
152
537
  def unescape(content)
153
- if content =~ /([^-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]%)/ then
154
- CGI.unescape(content)
155
- else
156
- content
157
- end.gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
538
+ result = if content =~ %r{([^-_.!~*'()a-zA-Z\d;/?:@&=+$,\[\]]%)}
539
+ CGI.unescape(content)
540
+ else
541
+ content
542
+ end.gsub(/(<!\[CDATA\[|\]\]>)/, "").strip
543
+
544
+ result.encode(Encoding::UTF_8)
158
545
  end
159
546
  end
160
547