ruby-feedparser 0.7 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e198cf8ee7423ff4edf5ba4367ac809ba1fe2a9d6361fcf53d12b984aa138228
4
+ data.tar.gz: bbbd8c024c4e85c991ae2ceae4494e24d8b0865d2fe6a4df2646007e798e96ac
5
+ SHA512:
6
+ metadata.gz: ac90154cfa40180e03d4b7b1d631186c6db1d70d79bdbb7f4edb4c54a66eddab3085e04480ed965b7c2e055770976873224b9d12f20be3d282817d6cd34245be
7
+ data.tar.gz: 4f658dc07c1d692b44f9abd0d400449cb0e1aa3d1cda8c782b052e45b193a8ad462ce7e2d8f951b3d15b301764e651f3781da93cf07f28c1ea8438ab87a1c989
@@ -1,22 +1,29 @@
1
- Ruby-Feedparser 0.7 (27/07/2009)
2
- ================================
1
+ # 0.9.4 (25/03/2016)
2
+
3
+ Bug fixes:
4
+
5
+ * feedparser: relax exception check for Magic errors; by Eric Wong
6
+ * Always sort author list to avoid unecessary invalidation of caches; by Sébastien Dailly
7
+
8
+ # 0.7 (27/07/2009)
9
+
3
10
  * Handled several creators per feed item
4
11
  * Fix bug with urls into tag attributes
5
12
  * Better item categories support
6
13
  * Reworked text output formatting
7
14
  * Ignore ­, as some blog software (dotclear2) misuse it.
8
15
 
9
- Ruby-Feedparser 0.6 (23/07/2008)
10
- ================================
11
- * Moved to_human_readable from class Fixnum to class Integer.
16
+ # 0.6 (23/07/2008)
17
+
18
+ * Moved `to_human_readable` from class Fixnum to class Integer.
12
19
  * Correctly parse http://www.tbray.org/ongoing/ongoing.atom. Thanks
13
20
  to Janico Greifenberg for reporting this.
14
21
  * String#html2text now takes an additional wrapto parameter, allowing
15
22
  to wrap the text to a specified number of chars. Thanks to
16
23
  Maxime Petazzoni for the patch.
17
24
 
18
- Ruby-Feedparser 0.5 (26/10/2007)
19
- ================================
25
+ # 0.5 (26/10/2007)
26
+
20
27
  * Fixed a bug with items with both non-escaped and escaped HTML. Reported,
21
28
  then patch provided by Gregory Hartman <gghartma@cs.cmu.edu>.
22
29
  * In Atom feeds, use the date provided in <updated>, and use it in
@@ -27,33 +34,33 @@ Ruby-Feedparser 0.5 (26/10/2007)
27
34
  * Make checks for HTML tags case-insensitive. Broke Dilbert feeds!!
28
35
  Reported by Michal Čihař. Closes gna bug #10199.
29
36
 
30
- Ruby-Feedparser 0.4 (01/05/2007)
31
- ================================
37
+ # 0.4 (01/05/2007)
38
+
32
39
  * Fixed a problem with html entities in the items' titles.
33
40
  * Date was not fetched for blogspot's atom feeds.
34
41
  Patch from Jason Ling <jason.ling@jeyel.com>.
35
42
  * Tests are now timezone-friendly. (closes GNA bug #8145).
36
43
  * Much nicer text output.
37
44
 
38
- Ruby-Feedparser 0.3 (01/12/2006)
39
- ================================
45
+ # 0.3 (01/12/2006)
46
+
40
47
  * Much nicer HTML output
41
48
  * Fixed a problem with some feeds with broken enclosures (without url)
42
- * Now automatically fixes non-absolute <a href> or <img src>
49
+ * Now automatically fixes non-absolute `<a href>` or `<img src>`
43
50
  * Fixed small parser bugs
44
51
  * Now displays enclosures in the text and html outputs. Ready for
45
52
  podcasting :-)
46
53
  * Now escape title, creator, subject and category internally. This minor
47
54
  fix avoids &amp; stuff in the titles, for example.
48
55
 
49
- Ruby-Feedparser 0.2 (05/06/2006)
50
- ================================
56
+ * 0.2 (05/06/2006)
57
+
51
58
  * Fixed a problem when parsing some ATOM feeds with <link> without type
52
59
  attribute. (Thanks Michal Cihar !)
53
60
  * FeedParser::Feed and FeedParser::FeedItem now have an xml attribute to
54
61
  get the related REXML::Element.
55
62
  * <enclosure/> support in RSS.
56
63
 
57
- Ruby-Feedparser 0.1 (24/11/2005)
58
- ================================
64
+ # 0.1 (24/11/2005)
65
+
59
66
  * first public release.
data/Rakefile CHANGED
@@ -1,14 +1,15 @@
1
1
  require 'rake/testtask'
2
- require 'rake/rdoctask'
3
- require 'rake/packagetask'
2
+ require 'rdoc/task'
3
+ require 'rubygems/package_task'
4
4
  require 'rake'
5
5
  require 'find'
6
+ require_relative 'lib/feedparser/version.rb'
6
7
 
7
8
  # Globals
8
9
  PKG_NAME = 'ruby-feedparser'
9
- PKG_VERSION = '0.7'
10
+ PKG_VERSION = FeedParser::VERSION
10
11
 
11
- PKG_FILES = [ 'ChangeLog', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
12
+ PKG_FILES = [ 'ChangeLog.md', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
12
13
  Find.find('lib/', 'test/', 'tools/') do |f|
13
14
  if FileTest.directory?(f) and f =~ /\.svn/
14
15
  Find.prune
@@ -19,7 +20,7 @@ end
19
20
 
20
21
  PKG_FILES.reject! { |f| f =~ /^test\/(source|.*_output)\// }
21
22
 
22
- task :default => [:package]
23
+ task :default => [:test]
23
24
 
24
25
  Rake::TestTask.new do |t|
25
26
  t.libs << "test"
@@ -61,8 +62,6 @@ end
61
62
 
62
63
  # "Gem" part of the Rakefile
63
64
  begin
64
- require 'rake/gempackagetask'
65
-
66
65
  spec = Gem::Specification.new do |s|
67
66
  s.platform = Gem::Platform::RUBY
68
67
  s.summary = "Ruby library to parse ATOM and RSS feeds"
@@ -73,12 +72,21 @@ begin
73
72
  s.autorequire = 'feedparser'
74
73
  s.files = PKG_FILES
75
74
  s.description = "Ruby library to parse ATOM and RSS feeds"
75
+ s.authors = ['Lucas Nussbaum']
76
+ s.add_runtime_dependency 'magic'
76
77
  end
77
78
 
78
- Rake::GemPackageTask.new(spec) do |pkg|
79
+ Gem::PackageTask.new(spec) do |pkg|
79
80
  pkg.need_zip = true
80
81
  pkg.need_tar = true
81
82
  end
82
83
  rescue LoadError
83
84
  puts "Will not generate gem."
84
85
  end
86
+
87
+ task :release => :repackage do
88
+ sh 'git', 'tag', 'v' + PKG_VERSION
89
+ sh 'git', 'push'
90
+ sh 'git', 'push', '--tags'
91
+ sh 'gem', 'push', "pkg/#{PKG_NAME}-#{PKG_VERSION}.gem"
92
+ end
@@ -1,17 +1,47 @@
1
+ require 'cgi'
1
2
  require 'rexml/document'
2
3
  require 'time'
3
4
  require 'feedparser/textconverters'
4
5
  require 'feedparser/rexml_patch'
5
6
  require 'feedparser/text-output'
7
+ require 'feedparser/version'
6
8
  require 'base64'
9
+ require 'magic'
10
+ require 'uri'
7
11
 
8
12
  module FeedParser
9
13
 
10
- VERSION = "0.7"
11
-
12
14
  class UnknownFeedTypeException < RuntimeError
13
15
  end
14
16
 
17
+ def self.recode(str)
18
+ encoding = nil
19
+ begin
20
+ encoding = Magic.guess_string_mime_encoding(str)
21
+ rescue => e
22
+ raise unless e.class.to_s =~ /\AMagic::(?:Exception|Error)\z/
23
+ # this happens when magic does not find any content at all, e.g. with
24
+ # strings that contain only whitespace. In these case it *should* be safe
25
+ # to assume UTF-8
26
+ encoding = Encoding::UTF_8
27
+ end
28
+ if encoding == 'unknown-8bit'
29
+ # find first substring with a valid encoding that is not us-ascii
30
+ length = 1 # has to start at 1, magic requires at least 2 bytes
31
+ while length < str.length && ['us-ascii', 'unknown-8bit'].include?(encoding)
32
+ encoding = Magic.guess_string_mime_encoding(str[0..length])
33
+ length = length + 1
34
+ end
35
+ # need to remove iso-8859-1 control characters
36
+ if encoding == 'iso-8859-1'
37
+ str = str.bytes.select { |c| c < 128 || c > 159 }.map(&:chr).join
38
+ end
39
+ end
40
+ str.force_encoding(encoding)
41
+ str = str.chars.select { |c| c.valid_encoding? }.join
42
+ str.encode('UTF-8')
43
+ end
44
+
15
45
  # an RSS/Atom feed
16
46
  class Feed
17
47
  attr_reader :type, :title, :link, :description, :creator, :encoding, :items
@@ -20,13 +50,16 @@ module FeedParser
20
50
  attr_reader :xml
21
51
 
22
52
  # parse str to build a Feed
23
- def initialize(str = nil)
53
+ def initialize(str = nil, uri = nil)
24
54
  parse(str) if str
55
+ parse_origin(uri) if uri
25
56
  end
26
57
 
27
58
  # Determines all the fields using a string containing an
28
59
  # XML document
29
60
  def parse(str)
61
+ str = FeedParser.recode(str)
62
+
30
63
  # Dirty hack: some feeds contain the & char. It must be changed to &amp;
31
64
  str.gsub!(/&(\s+)/, '&amp;\1')
32
65
  doc = REXML::Document.new(str)
@@ -34,6 +67,7 @@ module FeedParser
34
67
  # get feed info
35
68
  @encoding = doc.encoding
36
69
  @title,@link,@description,@creator = nil
70
+ @title = ""
37
71
  @items = []
38
72
  if doc.root.elements['channel'] || doc.root.elements['rss:channel']
39
73
  @type = "rss"
@@ -108,19 +142,28 @@ module FeedParser
108
142
  s += "Type: #{@type}\n"
109
143
  s += "Encoding: #{@encoding}\n"
110
144
  s += "Title: #{@title}\n"
111
- s += "Link: #{@link}\n"
145
+ s += "Link: #{link}\n"
112
146
  s += "Description: #{@description}\n"
113
147
  s += "Creator: #{@creator}\n"
114
148
  s += "\n"
115
149
  @items.each { |i| s += i.to_s(localtime) }
116
150
  s
117
151
  end
152
+
153
+ def parse_origin(uri)
154
+ uri = URI.parse(uri)
155
+ if uri.hostname && uri.scheme
156
+ @origin = "#{uri.scheme}://#{uri.hostname}"
157
+ end
158
+ end
159
+
160
+ attr_reader :origin
118
161
  end
119
162
 
120
163
  # an Item from a feed
121
164
  class FeedItem
122
- attr_accessor :title, :link, :content, :date, :creators, :subject,
123
- :cacheditem
165
+ attr_accessor :title, :content, :date, :creators, :subject,
166
+ :cacheditem, :links
124
167
 
125
168
  # The item's categories/tags. An array of strings.
126
169
  attr_accessor :categories
@@ -137,9 +180,12 @@ module FeedParser
137
180
  @xml = item
138
181
  @feed = feed
139
182
  @title, @link, @content, @date, @subject = nil
183
+ @links = []
140
184
  @creators = []
141
185
  @categories = []
142
186
  @enclosures = []
187
+
188
+ @title = ""
143
189
  parse(item) if item
144
190
  end
145
191
 
@@ -154,13 +200,14 @@ module FeedParser
154
200
  when 1
155
201
  return creators[0]
156
202
  else
157
- return creators[0...-1].join(", ")+" and "+creators[-1]
203
+ sorted_creators = creators.sort
204
+ return sorted_creators[0...-1].join(", ") + " and " + sorted_creators[-1]
158
205
  end
159
206
  end
160
207
 
161
208
  def to_s(localtime = true)
162
209
  s = "--------------------------------\n" +
163
- "Title: #{@title}\nLink: #{@link}\n"
210
+ "Title: #{@title}\nLink: #{link}\n"
164
211
  if localtime or @date.nil?
165
212
  s += "Date: #{@date.to_s}\n"
166
213
  else
@@ -181,6 +228,26 @@ module FeedParser
181
228
  end
182
229
  return s
183
230
  end
231
+
232
+ attr_writer :link
233
+
234
+ def link
235
+ if @link
236
+ begin
237
+ uri = URI.parse(@link)
238
+ rescue URI::InvalidURIError
239
+ return @link
240
+ end
241
+ if uri.hostname && uri.scheme
242
+ @link
243
+ elsif feed && feed.origin
244
+ [feed.origin, @link].compact.join
245
+ else
246
+ @link
247
+ end
248
+ end
249
+ end
250
+
184
251
  end
185
252
 
186
253
  class RSSItem < FeedItem
@@ -199,7 +266,7 @@ module FeedParser
199
266
  (e = item.elements['guid'] || item.elements['rss:guid'] and
200
267
  not (e.attribute('isPermaLink') and
201
268
  e.attribute('isPermaLink').value == 'false'))
202
- @link = e.text.rmWhiteSpace!
269
+ self.link = e.text.rmWhiteSpace!
203
270
  end
204
271
  # Content
205
272
  if (e = item.elements['content:encoded']) ||
@@ -261,8 +328,16 @@ module FeedParser
261
328
  end
262
329
  # Link
263
330
  item.each_element('link') do |e|
331
+
264
332
  if (h = e.attribute('href')) && h.value
265
- @link = h.value
333
+ self.link = h.value
334
+
335
+ if e.attribute('type')
336
+ @links << {:href => h.value, :type => e.attribute('type').value}
337
+ else
338
+ @links << {:href => h.value, :type => ''}
339
+ end
340
+
266
341
  end
267
342
  end
268
343
  # Content
@@ -80,13 +80,13 @@ module FeedParser
80
80
  s += (headline % ["Feed:", r])
81
81
 
82
82
  r = ""
83
- r += "<a href=\"#{@link}\">" if @link
83
+ r += "<a href=\"#{link}\">" if link
84
84
  if @title
85
85
  r += "<b>#{@title.escape_html}</b>\n"
86
- elsif @link
87
- r += "<b>#{@link.escape_html}</b>\n"
86
+ elsif link
87
+ r += "<b>#{link.escape_html}</b>\n"
88
88
  end
89
- r += "</a>\n" if @link
89
+ r += "</a>\n" if link
90
90
  s += (headline % ["Item:", r])
91
91
  s += "</table></td></tr></table>\n"
92
92
  s += "\n"
@@ -11,16 +11,16 @@ module FeedParser
11
11
  @pre = false
12
12
  @href = nil
13
13
  @links = []
14
+ @curlink = []
14
15
  @imgs = []
15
- @img_index = '@'
16
+ @img_index = 'A'
16
17
  super(verbose)
17
18
  end
18
19
 
19
20
  def next_img_index
20
- n = @img_index[0] + 1
21
- @img_index = " "
22
- @img_index[0] = n
23
- return @img_index
21
+ idx = @img_index
22
+ @img_index = @img_index.next
23
+ idx
24
24
  end
25
25
 
26
26
  def handle_data(data)
@@ -29,7 +29,8 @@ module FeedParser
29
29
  data.gsub!(/\n/, ' ')
30
30
  data.gsub!(/( )+/, ' ')
31
31
  end
32
- @savedata << data
32
+ data = FeedParser.recode(data)
33
+ @savedata << data.encode(Encoding::UTF_8)
33
34
  end
34
35
 
35
36
  def unknown_starttag(tag, attrs)
@@ -70,7 +71,14 @@ module FeedParser
70
71
  end
71
72
  end
72
73
  if @href
73
- @links << @href.gsub(/^("|'|)(.*)("|')$/,'\2')
74
+ @href.gsub!(/^("|'|)(.*)("|')$/,'\2')
75
+ @curlink = @links.find_index(@href)
76
+ if @curlink.nil?
77
+ @links << @href
78
+ @curlink = @links.length
79
+ else
80
+ @curlink += 1
81
+ end
74
82
  end
75
83
  when 'img'
76
84
  # find src in args
@@ -81,8 +89,14 @@ module FeedParser
81
89
  end
82
90
  end
83
91
  if src
84
- idx = next_img_index
85
- @imgs << [ idx, src.gsub(/^("|'|)(.*)("|')$/,'\2') ]
92
+ src.gsub!(/^("|'|)(.*)("|')$/,'\2')
93
+ i = @imgs.index { |e| e[1] == src }
94
+ if i.nil?
95
+ idx = next_img_index
96
+ @imgs << [ idx, src ]
97
+ else
98
+ idx = @imgs[i][0]
99
+ end
86
100
  @savedata << "[#{idx}]"
87
101
  end
88
102
  else
@@ -125,7 +139,7 @@ module FeedParser
125
139
  @pre = false
126
140
  when 'a'
127
141
  if @href
128
- @savedata << "[#{@links.length}]"
142
+ @savedata << "[#{@curlink}]"
129
143
  @href = nil
130
144
  end
131
145
  end
@@ -6,14 +6,17 @@ require 'feedparser/textconverters'
6
6
  # With those changes, it uses unpack/pack with some error handling
7
7
  module REXML
8
8
  module Encoding
9
+ alias rexml_decode decode
9
10
  def decode(str)
10
11
  return str.toUTF8(@encoding)
11
12
  end
12
13
 
14
+ alias rexml_encode encode
13
15
  def encode(str)
14
16
  return str
15
17
  end
16
18
 
19
+ alias rexml_encoding= encoding=
17
20
  def encoding=(enc)
18
21
  return if defined? @encoding and enc == @encoding
19
22
  @encoding = enc || 'utf-8'
@@ -293,12 +293,11 @@ module FeedParser
293
293
  end
294
294
 
295
295
  def handle_charref(name)
296
- n = name.to_i
297
- if !(0 <= n && n <= 255)
296
+ if name =~ /[0-9]+/
298
297
  unknown_charref(name)
299
- return
298
+ else
299
+ handle_data(name)
300
300
  end
301
- handle_data(n.chr)
302
301
  end
303
302
 
304
303
  def handle_entityref(name)
@@ -1,4 +1,3 @@
1
- require 'feedparser'
2
1
  require 'feedparser/html2text-parser'
3
2
  require 'feedparser/filesizes'
4
3
 
@@ -61,7 +60,7 @@ module FeedParser
61
60
  if header
62
61
  s += "Item: "
63
62
  s += @title if @title
64
- s += "\n<#{@link}>" if @link
63
+ s += "\n<#{link}>" if link
65
64
  if @date
66
65
  if localtime
67
66
  s += "\nDate: #{@date.to_s}"
@@ -71,7 +70,7 @@ module FeedParser
71
70
  end
72
71
  s += "\n"
73
72
  else
74
- s += "<#{@link}>\n\n" if @link
73
+ s += "<#{link}>\n\n" if link
75
74
  end
76
75
  s += "#{@content.html2text(wrapto).chomp}\n" if @content
77
76
  if @enclosures and @enclosures.length > 0
@@ -89,7 +88,7 @@ module FeedParser
89
88
  if not header
90
89
  s += "\nItem: "
91
90
  s += @title if @title
92
- s += "\n<#{@link}>" if @link
91
+ s += "\n<#{link}>" if link
93
92
  if @date
94
93
  if localtime
95
94
  s += "\nDate: #{@date.to_s}"
@@ -59,7 +59,7 @@ end
59
59
  text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
60
60
  text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
61
61
  # uris
62
- text.gsub!(/([^'"])(#{URI::regexp(['http','ftp','https'])})/,
62
+ text.gsub!(/([^'"])(#{URI::DEFAULT_PARSER.make_regexp(['http','ftp','https'])})/,
63
63
  '\1<a href="\2">\2</a>')
64
64
  end
65
65
  # Handle broken hrefs in <a> and <img>
@@ -0,0 +1,3 @@
1
+ module FeedParser
2
+ VERSION = "0.9.7"
3
+ end
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby -w
1
+ # encoding: UTF-8
2
2
 
3
3
  $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
4
 
@@ -114,4 +114,55 @@ class FeedParserTest < Test::Unit::TestCase
114
114
  # the third one should be removed because an enclosure should have an url, or it's useless.
115
115
  assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
116
116
  end
117
+
118
+ def test_recode_utf8
119
+ assert_equal 'UTF-8', FeedParser.recode("áéíóú").encoding.name
120
+ end
121
+
122
+ def test_recode_blank
123
+ assert_equal 'UTF-8', FeedParser.recode('').encoding.name
124
+ end
125
+
126
+ def test_recode_iso88519
127
+ assert_equal 'UTF-8', FeedParser.recode("áéíóú".encode('iso-8859-1')).encoding.name
128
+ end
129
+
130
+ def test_recode_utf8_mixed_with_ASCIIBIT
131
+ recoded = FeedParser.recode("áé\x8Díóú")
132
+ assert_equal'UTF-8', recoded.encoding.name
133
+ assert_equal 'áéíóú', recoded
134
+ end
135
+
136
+ def test_recode_unicode_char
137
+ assert_equal "1280×1024", FeedParser.recode("1280×1024")
138
+ end
139
+
140
+ def test_almost_valid_iso88591
141
+ input = "Codifica\xE7\xE3o \x96 quase v\xE1lida"
142
+ assert_equal "Codificação quase válida", FeedParser.recode(input)
143
+ end
144
+
145
+ def test_feed_origin
146
+ feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
147
+ assert_equal "http://foo.com", feed.origin
148
+ end
149
+
150
+ def test_item_origin
151
+ feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
152
+ item = FeedParser::FeedItem.new(nil, feed)
153
+ item.link = '/foo/bar'
154
+ assert_equal 'http://foo.com/foo/bar', item.link
155
+ end
156
+
157
+ def test_item_origin_no_link
158
+ item = FeedParser::FeedItem.new(nil, nil)
159
+ assert_nil item.link
160
+ end
161
+
162
+ def test_item_no_feed
163
+ item = FeedParser::FeedItem.new(nil, nil)
164
+ item.link = '/foo/bar'
165
+ assert_equal '/foo/bar', item.link
166
+ end
167
+
117
168
  end
@@ -0,0 +1,47 @@
1
+ require 'feedparser/feedparser'
2
+ require 'test/unit'
3
+
4
+
5
+ class FeedItemTest < Test::Unit::TestCase
6
+ def setup
7
+ @item = FeedParser::FeedItem.new(nil, nil)
8
+ end
9
+
10
+ ########################################################################
11
+
12
+ def test_link_no_link
13
+ assert @item.link.nil?
14
+ end
15
+
16
+ def test_link_basic
17
+ @item.instance_variable_set('@link', 'https://www.example.com/')
18
+ assert_equal "https://www.example.com/", @item.link
19
+ end
20
+
21
+ def test_link_path_only
22
+ @item.instance_variable_set('@link', '/foo/bar/')
23
+ assert_equal "/foo/bar/", @item.link
24
+ end
25
+
26
+ def test_link_path_only_with_feed_origin
27
+ @item.instance_variable_set('@link', '/foo/bar/')
28
+ feed = FeedParser::Feed.new
29
+ feed.instance_variable_set('@origin', 'https://www.exampleorigin.com')
30
+ @item.instance_variable_set('@feed', feed)
31
+ assert_equal "https://www.exampleorigin.com/foo/bar/", @item.link
32
+ end
33
+
34
+ def test_link_full_link_with_feed_origin
35
+ @item.instance_variable_set('@link', 'https://www.exampleorigin.com/foo/bar/')
36
+ feed = FeedParser::Feed.new
37
+ feed.instance_variable_set('@origin', 'https://www.exampleorigin.com')
38
+ @item.instance_variable_set('@feed', feed)
39
+ assert_equal "https://www.exampleorigin.com/foo/bar/", @item.link
40
+ end
41
+
42
+ def test_link_with_non_ascii
43
+ @item.instance_variable_set('@link', 'https://www.example.people/☭/')
44
+ assert_equal "https://www.example.people/☭/", @item.link
45
+ end
46
+
47
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'test/unit'
4
+
5
+ require 'feedparser/feedparser'
6
+
7
+ class Html2TextParserTest < Test::Unit::TestCase
8
+
9
+ def test_next_img_index
10
+ parser = FeedParser::HTML2TextParser.new
11
+ assert_equal 'A', parser.next_img_index
12
+ assert_equal 'B', parser.next_img_index
13
+ end
14
+
15
+ def test_numerical_entity
16
+ parser = FeedParser::HTML2TextParser.new
17
+ parser.feed('1280&#215;1024')
18
+ parser.close
19
+ assert_equal "1280×1024", parser.savedata
20
+ end
21
+
22
+ def test_numerical_entity_large_known
23
+ parser = FeedParser::HTML2TextParser.new
24
+ parser.feed('&#8594;')
25
+ parser.close
26
+ assert_equal "→", parser.savedata
27
+ end
28
+
29
+ def test_numerical_entity_large
30
+ parser = FeedParser::HTML2TextParser.new
31
+ parser.feed('&#10000;')
32
+ parser.close
33
+ assert_equal "✐", parser.savedata
34
+ end
35
+
36
+ def test_non_numerical_entity
37
+ parser = FeedParser::HTML2TextParser.new
38
+ parser.feed('HTML&amp;CO')
39
+ parser.close
40
+ assert_equal "HTML&CO", parser.savedata
41
+ end
42
+
43
+ end
@@ -19,12 +19,10 @@ class HTMLOutputTest < Test::Unit::TestCase
19
19
  else
20
20
  raise 'source directory not found.'
21
21
  end
22
- def test_parser
23
- allok = true
24
- Dir.foreach(SRCDIR) do |f|
25
- next if f !~ /.xml$/
26
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
27
- puts "Checking #{f}"
22
+ Dir.foreach(SRCDIR) do |f|
23
+ next if f !~ /.xml$/
24
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
25
+ define_method(testname) do
28
26
  str = File::read(SRCDIR + '/' + f)
29
27
  chan = FeedParser::Feed::new(str)
30
28
  chanstr = chan.to_html(false)
@@ -34,19 +32,21 @@ class HTMLOutputTest < Test::Unit::TestCase
34
32
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
35
33
  fd.print(chanstr)
36
34
  end
37
- puts "Test failed for #{f}."
38
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
39
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
40
- allok = false
35
+ assert(
36
+ false,
37
+ [
38
+ "Test failed for #{f}.",
39
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
40
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
41
+ ].join("\n")
42
+ )
41
43
  end
42
44
  else
43
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
44
45
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
45
46
  f.print(chanstr)
46
47
  end
47
- allok = false
48
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
48
49
  end
49
50
  end
50
- assert(allok)
51
51
  end
52
52
  end
data/test/tc_parser.rb CHANGED
@@ -15,12 +15,10 @@ class ParserTest < Test::Unit::TestCase
15
15
  else
16
16
  raise 'source directory not found.'
17
17
  end
18
- def test_parser
19
- allok = true
20
- Dir.foreach(SRCDIR) do |f|
21
- next if f !~ /.xml$/
22
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
23
- puts "Checking #{f}"
18
+ Dir.foreach(SRCDIR) do |f|
19
+ next if f !~ /.xml$/
20
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
21
+ define_method(testname) do
24
22
  str = File::read(SRCDIR + '/' + f)
25
23
  chan = FeedParser::Feed::new(str)
26
24
  chanstr = chan.to_s(false)
@@ -30,19 +28,21 @@ class ParserTest < Test::Unit::TestCase
30
28
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
31
29
  fd.print(chanstr)
32
30
  end
33
- puts "Test failed for #{f}."
34
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
35
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
36
- allok = false
31
+ assert(
32
+ false,
33
+ [
34
+ "Test failed for #{f}.",
35
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
36
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
37
+ ].join("\n")
38
+ )
37
39
  end
38
40
  else
39
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
40
41
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
41
42
  f.print(chanstr)
42
43
  end
43
- allok = false
44
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
44
45
  end
45
46
  end
46
- assert(allok)
47
47
  end
48
48
  end
@@ -0,0 +1,22 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'test/unit'
4
+ require 'mocha/setup'
5
+
6
+ require 'feedparser/sgml-parser'
7
+
8
+ class SGMLParserTest < Test::Unit::TestCase
9
+
10
+ def test_numerical_charref
11
+ parser = FeedParser::SGMLParser.new
12
+ parser.expects(:unknown_charref).with('215')
13
+ parser.handle_charref('215')
14
+ end
15
+
16
+ def test_non_numerical_charref
17
+ parser = FeedParser::SGMLParser.new
18
+ parser.expects(:handle_data).with('amp')
19
+ parser.handle_charref('amp')
20
+ end
21
+
22
+ end
@@ -15,12 +15,10 @@ class TextOutputTest < Test::Unit::TestCase
15
15
  else
16
16
  raise 'source directory not found.'
17
17
  end
18
- def test_parser
19
- allok = true
20
- Dir.foreach(SRCDIR) do |f|
21
- next if f !~ /.xml$/
22
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
23
- puts "Checking #{f}"
18
+ Dir.foreach(SRCDIR) do |f|
19
+ next if f !~ /.xml$/
20
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
21
+ define_method(testname) do
24
22
  str = File::read(SRCDIR + '/' + f)
25
23
  chan = FeedParser::Feed::new(str)
26
24
  chanstr = chan.to_text(false) # localtime set to false
@@ -30,19 +28,21 @@ class TextOutputTest < Test::Unit::TestCase
30
28
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
31
29
  fd.print(chanstr)
32
30
  end
33
- puts "Test failed for #{f}."
34
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
35
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
36
- allok = false
31
+ assert(
32
+ false,
33
+ [
34
+ "Test failed for #{f}.",
35
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
36
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
37
+ ].join("\n")
38
+ )
37
39
  end
38
40
  else
39
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
40
41
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
41
42
  f.print(chanstr)
42
43
  end
43
- allok = false
44
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
44
45
  end
45
46
  end
46
- assert(allok)
47
47
  end
48
48
  end
@@ -15,12 +15,10 @@ class TextWrappedOutputTest < Test::Unit::TestCase
15
15
  else
16
16
  raise 'source directory not found.'
17
17
  end
18
- def test_parser
19
- allok = true
20
- Dir.foreach(SRCDIR) do |f|
21
- next if f !~ /.xml$/
22
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
23
- puts "Checking #{f}"
18
+ Dir.foreach(SRCDIR) do |f|
19
+ next if f !~ /.xml$/
20
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
21
+ define_method(testname) do
24
22
  str = File::read(SRCDIR + '/' + f)
25
23
  chan = FeedParser::Feed::new(str)
26
24
  chanstr = chan.to_text(false, 72) # localtime set to false
@@ -30,19 +28,21 @@ class TextWrappedOutputTest < Test::Unit::TestCase
30
28
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
31
29
  fd.print(chanstr)
32
30
  end
33
- puts "Test failed for #{f}."
34
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
35
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
36
- allok = false
31
+ assert(
32
+ false,
33
+ [
34
+ "Test failed for #{f}.",
35
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
36
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
37
+ ].join("\n")
38
+ )
37
39
  end
38
40
  else
39
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
40
41
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
41
42
  f.print(chanstr)
42
43
  end
43
- allok = false
44
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
44
45
  end
45
46
  end
46
- assert(allok)
47
47
  end
48
48
  end
metadata CHANGED
@@ -1,76 +1,82 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: ruby-feedparser
3
- version: !ruby/object:Gem::Version
4
- version: "0.7"
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.7
5
5
  platform: ruby
6
- authors: []
7
-
6
+ authors:
7
+ - Lucas Nussbaum
8
8
  autorequire: feedparser
9
9
  bindir: bin
10
10
  cert_chain: []
11
-
12
- date: 2009-07-27 00:00:00 +02:00
13
- default_executable:
14
- dependencies: []
15
-
11
+ date: 2021-02-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: magic
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
16
27
  description: Ruby library to parse ATOM and RSS feeds
17
28
  email:
18
29
  executables: []
19
-
20
30
  extensions: []
21
-
22
31
  extra_rdoc_files: []
23
-
24
- files:
25
- - ChangeLog
26
- - README
32
+ files:
27
33
  - COPYING
34
+ - ChangeLog.md
28
35
  - LICENSE
29
- - setup.rb
36
+ - README
30
37
  - Rakefile
31
- - lib/feedparser/text-output.rb
38
+ - lib/feedparser.rb
39
+ - lib/feedparser/feedparser.rb
32
40
  - lib/feedparser/filesizes.rb
33
41
  - lib/feedparser/html-output.rb
34
- - lib/feedparser/rexml_patch.rb
35
42
  - lib/feedparser/html2text-parser.rb
36
- - lib/feedparser/textconverters.rb
37
- - lib/feedparser/feedparser.rb
43
+ - lib/feedparser/rexml_patch.rb
38
44
  - lib/feedparser/sgml-parser.rb
39
- - lib/feedparser.rb
45
+ - lib/feedparser/text-output.rb
46
+ - lib/feedparser/textconverters.rb
47
+ - lib/feedparser/version.rb
48
+ - setup.rb
40
49
  - test/tc_feed_parse.rb
41
- - test/tc_textoutput.rb
50
+ - test/tc_feeditem.rb
51
+ - test/tc_html2text_parser.rb
42
52
  - test/tc_htmloutput.rb
53
+ - test/tc_parser.rb
54
+ - test/tc_sgml_parser.rb
55
+ - test/tc_textoutput.rb
43
56
  - test/tc_textwrappedoutput.rb
44
57
  - test/ts_feedparser.rb
45
- - test/tc_parser.rb
46
58
  - tools/doctoweb.bash
47
- has_rdoc: true
48
59
  homepage:
49
60
  licenses: []
50
-
61
+ metadata: {}
51
62
  post_install_message:
52
63
  rdoc_options: []
53
-
54
- require_paths:
64
+ require_paths:
55
65
  - lib
56
- required_ruby_version: !ruby/object:Gem::Requirement
57
- requirements:
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
58
68
  - - ">="
59
- - !ruby/object:Gem::Version
60
- version: "0"
61
- version:
62
- required_rubygems_version: !ruby/object:Gem::Requirement
63
- requirements:
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
64
73
  - - ">="
65
- - !ruby/object:Gem::Version
66
- version: "0"
67
- version:
68
- requirements:
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements:
69
77
  - none
70
- rubyforge_project:
71
- rubygems_version: 1.3.4
78
+ rubygems_version: 3.2.5
72
79
  signing_key:
73
- specification_version: 3
80
+ specification_version: 4
74
81
  summary: Ruby library to parse ATOM and RSS feeds
75
82
  test_files: []
76
-