ruby-feedparser 0.7 → 0.9.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e198cf8ee7423ff4edf5ba4367ac809ba1fe2a9d6361fcf53d12b984aa138228
4
+ data.tar.gz: bbbd8c024c4e85c991ae2ceae4494e24d8b0865d2fe6a4df2646007e798e96ac
5
+ SHA512:
6
+ metadata.gz: ac90154cfa40180e03d4b7b1d631186c6db1d70d79bdbb7f4edb4c54a66eddab3085e04480ed965b7c2e055770976873224b9d12f20be3d282817d6cd34245be
7
+ data.tar.gz: 4f658dc07c1d692b44f9abd0d400449cb0e1aa3d1cda8c782b052e45b193a8ad462ce7e2d8f951b3d15b301764e651f3781da93cf07f28c1ea8438ab87a1c989
@@ -1,22 +1,29 @@
1
- Ruby-Feedparser 0.7 (27/07/2009)
2
- ================================
1
+ # 0.9.4 (25/03/2016)
2
+
3
+ Bug fixes:
4
+
5
+ * feedparser: relax exception check for Magic errors; by Eric Wong
6
+ * Always sort author list to avoid unecessary invalidation of caches; by Sébastien Dailly
7
+
8
+ # 0.7 (27/07/2009)
9
+
3
10
  * Handled several creators per feed item
4
11
  * Fix bug with urls into tag attributes
5
12
  * Better item categories support
6
13
  * Reworked text output formatting
7
14
  * Ignore ­, as some blog software (dotclear2) misuse it.
8
15
 
9
- Ruby-Feedparser 0.6 (23/07/2008)
10
- ================================
11
- * Moved to_human_readable from class Fixnum to class Integer.
16
+ # 0.6 (23/07/2008)
17
+
18
+ * Moved `to_human_readable` from class Fixnum to class Integer.
12
19
  * Correctly parse http://www.tbray.org/ongoing/ongoing.atom. Thanks
13
20
  to Janico Greifenberg for reporting this.
14
21
  * String#html2text now takes an additional wrapto parameter, allowing
15
22
  to wrap the text to a specified number of chars. Thanks to
16
23
  Maxime Petazzoni for the patch.
17
24
 
18
- Ruby-Feedparser 0.5 (26/10/2007)
19
- ================================
25
+ # 0.5 (26/10/2007)
26
+
20
27
  * Fixed a bug with items with both non-escaped and escaped HTML. Reported,
21
28
  then patch provided by Gregory Hartman <gghartma@cs.cmu.edu>.
22
29
  * In Atom feeds, use the date provided in <updated>, and use it in
@@ -27,33 +34,33 @@ Ruby-Feedparser 0.5 (26/10/2007)
27
34
  * Make checks for HTML tags case-insensitive. Broke Dilbert feeds!!
28
35
  Reported by Michal Čihař. Closes gna bug #10199.
29
36
 
30
- Ruby-Feedparser 0.4 (01/05/2007)
31
- ================================
37
+ # 0.4 (01/05/2007)
38
+
32
39
  * Fixed a problem with html entities in the items' titles.
33
40
  * Date was not fetched for blogspot's atom feeds.
34
41
  Patch from Jason Ling <jason.ling@jeyel.com>.
35
42
  * Tests are now timezone-friendly. (closes GNA bug #8145).
36
43
  * Much nicer text output.
37
44
 
38
- Ruby-Feedparser 0.3 (01/12/2006)
39
- ================================
45
+ # 0.3 (01/12/2006)
46
+
40
47
  * Much nicer HTML output
41
48
  * Fixed a problem with some feeds with broken enclosures (without url)
42
- * Now automatically fixes non-absolute <a href> or <img src>
49
+ * Now automatically fixes non-absolute `<a href>` or `<img src>`
43
50
  * Fixed small parser bugs
44
51
  * Now displays enclosures in the text and html outputs. Ready for
45
52
  podcasting :-)
46
53
  * Now escape title, creator, subject and category internally. This minor
47
54
  fix avoids &amp; stuff in the titles, for example.
48
55
 
49
- Ruby-Feedparser 0.2 (05/06/2006)
50
- ================================
56
+ * 0.2 (05/06/2006)
57
+
51
58
  * Fixed a problem when parsing some ATOM feeds with <link> without type
52
59
  attribute. (Thanks Michal Cihar !)
53
60
  * FeedParser::Feed and FeedParser::FeedItem now have an xml attribute to
54
61
  get the related REXML::Element.
55
62
  * <enclosure/> support in RSS.
56
63
 
57
- Ruby-Feedparser 0.1 (24/11/2005)
58
- ================================
64
+ # 0.1 (24/11/2005)
65
+
59
66
  * first public release.
data/Rakefile CHANGED
@@ -1,14 +1,15 @@
1
1
  require 'rake/testtask'
2
- require 'rake/rdoctask'
3
- require 'rake/packagetask'
2
+ require 'rdoc/task'
3
+ require 'rubygems/package_task'
4
4
  require 'rake'
5
5
  require 'find'
6
+ require_relative 'lib/feedparser/version.rb'
6
7
 
7
8
  # Globals
8
9
  PKG_NAME = 'ruby-feedparser'
9
- PKG_VERSION = '0.7'
10
+ PKG_VERSION = FeedParser::VERSION
10
11
 
11
- PKG_FILES = [ 'ChangeLog', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
12
+ PKG_FILES = [ 'ChangeLog.md', 'README', 'COPYING', 'LICENSE', 'setup.rb', 'Rakefile']
12
13
  Find.find('lib/', 'test/', 'tools/') do |f|
13
14
  if FileTest.directory?(f) and f =~ /\.svn/
14
15
  Find.prune
@@ -19,7 +20,7 @@ end
19
20
 
20
21
  PKG_FILES.reject! { |f| f =~ /^test\/(source|.*_output)\// }
21
22
 
22
- task :default => [:package]
23
+ task :default => [:test]
23
24
 
24
25
  Rake::TestTask.new do |t|
25
26
  t.libs << "test"
@@ -61,8 +62,6 @@ end
61
62
 
62
63
  # "Gem" part of the Rakefile
63
64
  begin
64
- require 'rake/gempackagetask'
65
-
66
65
  spec = Gem::Specification.new do |s|
67
66
  s.platform = Gem::Platform::RUBY
68
67
  s.summary = "Ruby library to parse ATOM and RSS feeds"
@@ -73,12 +72,21 @@ begin
73
72
  s.autorequire = 'feedparser'
74
73
  s.files = PKG_FILES
75
74
  s.description = "Ruby library to parse ATOM and RSS feeds"
75
+ s.authors = ['Lucas Nussbaum']
76
+ s.add_runtime_dependency 'magic'
76
77
  end
77
78
 
78
- Rake::GemPackageTask.new(spec) do |pkg|
79
+ Gem::PackageTask.new(spec) do |pkg|
79
80
  pkg.need_zip = true
80
81
  pkg.need_tar = true
81
82
  end
82
83
  rescue LoadError
83
84
  puts "Will not generate gem."
84
85
  end
86
+
87
+ task :release => :repackage do
88
+ sh 'git', 'tag', 'v' + PKG_VERSION
89
+ sh 'git', 'push'
90
+ sh 'git', 'push', '--tags'
91
+ sh 'gem', 'push', "pkg/#{PKG_NAME}-#{PKG_VERSION}.gem"
92
+ end
@@ -1,17 +1,47 @@
1
+ require 'cgi'
1
2
  require 'rexml/document'
2
3
  require 'time'
3
4
  require 'feedparser/textconverters'
4
5
  require 'feedparser/rexml_patch'
5
6
  require 'feedparser/text-output'
7
+ require 'feedparser/version'
6
8
  require 'base64'
9
+ require 'magic'
10
+ require 'uri'
7
11
 
8
12
  module FeedParser
9
13
 
10
- VERSION = "0.7"
11
-
12
14
  class UnknownFeedTypeException < RuntimeError
13
15
  end
14
16
 
17
+ def self.recode(str)
18
+ encoding = nil
19
+ begin
20
+ encoding = Magic.guess_string_mime_encoding(str)
21
+ rescue => e
22
+ raise unless e.class.to_s =~ /\AMagic::(?:Exception|Error)\z/
23
+ # this happens when magic does not find any content at all, e.g. with
24
+ # strings that contain only whitespace. In these case it *should* be safe
25
+ # to assume UTF-8
26
+ encoding = Encoding::UTF_8
27
+ end
28
+ if encoding == 'unknown-8bit'
29
+ # find first substring with a valid encoding that is not us-ascii
30
+ length = 1 # has to start at 1, magic requires at least 2 bytes
31
+ while length < str.length && ['us-ascii', 'unknown-8bit'].include?(encoding)
32
+ encoding = Magic.guess_string_mime_encoding(str[0..length])
33
+ length = length + 1
34
+ end
35
+ # need to remove iso-8859-1 control characters
36
+ if encoding == 'iso-8859-1'
37
+ str = str.bytes.select { |c| c < 128 || c > 159 }.map(&:chr).join
38
+ end
39
+ end
40
+ str.force_encoding(encoding)
41
+ str = str.chars.select { |c| c.valid_encoding? }.join
42
+ str.encode('UTF-8')
43
+ end
44
+
15
45
  # an RSS/Atom feed
16
46
  class Feed
17
47
  attr_reader :type, :title, :link, :description, :creator, :encoding, :items
@@ -20,13 +50,16 @@ module FeedParser
20
50
  attr_reader :xml
21
51
 
22
52
  # parse str to build a Feed
23
- def initialize(str = nil)
53
+ def initialize(str = nil, uri = nil)
24
54
  parse(str) if str
55
+ parse_origin(uri) if uri
25
56
  end
26
57
 
27
58
  # Determines all the fields using a string containing an
28
59
  # XML document
29
60
  def parse(str)
61
+ str = FeedParser.recode(str)
62
+
30
63
  # Dirty hack: some feeds contain the & char. It must be changed to &amp;
31
64
  str.gsub!(/&(\s+)/, '&amp;\1')
32
65
  doc = REXML::Document.new(str)
@@ -34,6 +67,7 @@ module FeedParser
34
67
  # get feed info
35
68
  @encoding = doc.encoding
36
69
  @title,@link,@description,@creator = nil
70
+ @title = ""
37
71
  @items = []
38
72
  if doc.root.elements['channel'] || doc.root.elements['rss:channel']
39
73
  @type = "rss"
@@ -108,19 +142,28 @@ module FeedParser
108
142
  s += "Type: #{@type}\n"
109
143
  s += "Encoding: #{@encoding}\n"
110
144
  s += "Title: #{@title}\n"
111
- s += "Link: #{@link}\n"
145
+ s += "Link: #{link}\n"
112
146
  s += "Description: #{@description}\n"
113
147
  s += "Creator: #{@creator}\n"
114
148
  s += "\n"
115
149
  @items.each { |i| s += i.to_s(localtime) }
116
150
  s
117
151
  end
152
+
153
+ def parse_origin(uri)
154
+ uri = URI.parse(uri)
155
+ if uri.hostname && uri.scheme
156
+ @origin = "#{uri.scheme}://#{uri.hostname}"
157
+ end
158
+ end
159
+
160
+ attr_reader :origin
118
161
  end
119
162
 
120
163
  # an Item from a feed
121
164
  class FeedItem
122
- attr_accessor :title, :link, :content, :date, :creators, :subject,
123
- :cacheditem
165
+ attr_accessor :title, :content, :date, :creators, :subject,
166
+ :cacheditem, :links
124
167
 
125
168
  # The item's categories/tags. An array of strings.
126
169
  attr_accessor :categories
@@ -137,9 +180,12 @@ module FeedParser
137
180
  @xml = item
138
181
  @feed = feed
139
182
  @title, @link, @content, @date, @subject = nil
183
+ @links = []
140
184
  @creators = []
141
185
  @categories = []
142
186
  @enclosures = []
187
+
188
+ @title = ""
143
189
  parse(item) if item
144
190
  end
145
191
 
@@ -154,13 +200,14 @@ module FeedParser
154
200
  when 1
155
201
  return creators[0]
156
202
  else
157
- return creators[0...-1].join(", ")+" and "+creators[-1]
203
+ sorted_creators = creators.sort
204
+ return sorted_creators[0...-1].join(", ") + " and " + sorted_creators[-1]
158
205
  end
159
206
  end
160
207
 
161
208
  def to_s(localtime = true)
162
209
  s = "--------------------------------\n" +
163
- "Title: #{@title}\nLink: #{@link}\n"
210
+ "Title: #{@title}\nLink: #{link}\n"
164
211
  if localtime or @date.nil?
165
212
  s += "Date: #{@date.to_s}\n"
166
213
  else
@@ -181,6 +228,26 @@ module FeedParser
181
228
  end
182
229
  return s
183
230
  end
231
+
232
+ attr_writer :link
233
+
234
+ def link
235
+ if @link
236
+ begin
237
+ uri = URI.parse(@link)
238
+ rescue URI::InvalidURIError
239
+ return @link
240
+ end
241
+ if uri.hostname && uri.scheme
242
+ @link
243
+ elsif feed && feed.origin
244
+ [feed.origin, @link].compact.join
245
+ else
246
+ @link
247
+ end
248
+ end
249
+ end
250
+
184
251
  end
185
252
 
186
253
  class RSSItem < FeedItem
@@ -199,7 +266,7 @@ module FeedParser
199
266
  (e = item.elements['guid'] || item.elements['rss:guid'] and
200
267
  not (e.attribute('isPermaLink') and
201
268
  e.attribute('isPermaLink').value == 'false'))
202
- @link = e.text.rmWhiteSpace!
269
+ self.link = e.text.rmWhiteSpace!
203
270
  end
204
271
  # Content
205
272
  if (e = item.elements['content:encoded']) ||
@@ -261,8 +328,16 @@ module FeedParser
261
328
  end
262
329
  # Link
263
330
  item.each_element('link') do |e|
331
+
264
332
  if (h = e.attribute('href')) && h.value
265
- @link = h.value
333
+ self.link = h.value
334
+
335
+ if e.attribute('type')
336
+ @links << {:href => h.value, :type => e.attribute('type').value}
337
+ else
338
+ @links << {:href => h.value, :type => ''}
339
+ end
340
+
266
341
  end
267
342
  end
268
343
  # Content
@@ -80,13 +80,13 @@ module FeedParser
80
80
  s += (headline % ["Feed:", r])
81
81
 
82
82
  r = ""
83
- r += "<a href=\"#{@link}\">" if @link
83
+ r += "<a href=\"#{link}\">" if link
84
84
  if @title
85
85
  r += "<b>#{@title.escape_html}</b>\n"
86
- elsif @link
87
- r += "<b>#{@link.escape_html}</b>\n"
86
+ elsif link
87
+ r += "<b>#{link.escape_html}</b>\n"
88
88
  end
89
- r += "</a>\n" if @link
89
+ r += "</a>\n" if link
90
90
  s += (headline % ["Item:", r])
91
91
  s += "</table></td></tr></table>\n"
92
92
  s += "\n"
@@ -11,16 +11,16 @@ module FeedParser
11
11
  @pre = false
12
12
  @href = nil
13
13
  @links = []
14
+ @curlink = []
14
15
  @imgs = []
15
- @img_index = '@'
16
+ @img_index = 'A'
16
17
  super(verbose)
17
18
  end
18
19
 
19
20
  def next_img_index
20
- n = @img_index[0] + 1
21
- @img_index = " "
22
- @img_index[0] = n
23
- return @img_index
21
+ idx = @img_index
22
+ @img_index = @img_index.next
23
+ idx
24
24
  end
25
25
 
26
26
  def handle_data(data)
@@ -29,7 +29,8 @@ module FeedParser
29
29
  data.gsub!(/\n/, ' ')
30
30
  data.gsub!(/( )+/, ' ')
31
31
  end
32
- @savedata << data
32
+ data = FeedParser.recode(data)
33
+ @savedata << data.encode(Encoding::UTF_8)
33
34
  end
34
35
 
35
36
  def unknown_starttag(tag, attrs)
@@ -70,7 +71,14 @@ module FeedParser
70
71
  end
71
72
  end
72
73
  if @href
73
- @links << @href.gsub(/^("|'|)(.*)("|')$/,'\2')
74
+ @href.gsub!(/^("|'|)(.*)("|')$/,'\2')
75
+ @curlink = @links.find_index(@href)
76
+ if @curlink.nil?
77
+ @links << @href
78
+ @curlink = @links.length
79
+ else
80
+ @curlink += 1
81
+ end
74
82
  end
75
83
  when 'img'
76
84
  # find src in args
@@ -81,8 +89,14 @@ module FeedParser
81
89
  end
82
90
  end
83
91
  if src
84
- idx = next_img_index
85
- @imgs << [ idx, src.gsub(/^("|'|)(.*)("|')$/,'\2') ]
92
+ src.gsub!(/^("|'|)(.*)("|')$/,'\2')
93
+ i = @imgs.index { |e| e[1] == src }
94
+ if i.nil?
95
+ idx = next_img_index
96
+ @imgs << [ idx, src ]
97
+ else
98
+ idx = @imgs[i][0]
99
+ end
86
100
  @savedata << "[#{idx}]"
87
101
  end
88
102
  else
@@ -125,7 +139,7 @@ module FeedParser
125
139
  @pre = false
126
140
  when 'a'
127
141
  if @href
128
- @savedata << "[#{@links.length}]"
142
+ @savedata << "[#{@curlink}]"
129
143
  @href = nil
130
144
  end
131
145
  end
@@ -6,14 +6,17 @@ require 'feedparser/textconverters'
6
6
  # With those changes, it uses unpack/pack with some error handling
7
7
  module REXML
8
8
  module Encoding
9
+ alias rexml_decode decode
9
10
  def decode(str)
10
11
  return str.toUTF8(@encoding)
11
12
  end
12
13
 
14
+ alias rexml_encode encode
13
15
  def encode(str)
14
16
  return str
15
17
  end
16
18
 
19
+ alias rexml_encoding= encoding=
17
20
  def encoding=(enc)
18
21
  return if defined? @encoding and enc == @encoding
19
22
  @encoding = enc || 'utf-8'
@@ -293,12 +293,11 @@ module FeedParser
293
293
  end
294
294
 
295
295
  def handle_charref(name)
296
- n = name.to_i
297
- if !(0 <= n && n <= 255)
296
+ if name =~ /[0-9]+/
298
297
  unknown_charref(name)
299
- return
298
+ else
299
+ handle_data(name)
300
300
  end
301
- handle_data(n.chr)
302
301
  end
303
302
 
304
303
  def handle_entityref(name)
@@ -1,4 +1,3 @@
1
- require 'feedparser'
2
1
  require 'feedparser/html2text-parser'
3
2
  require 'feedparser/filesizes'
4
3
 
@@ -61,7 +60,7 @@ module FeedParser
61
60
  if header
62
61
  s += "Item: "
63
62
  s += @title if @title
64
- s += "\n<#{@link}>" if @link
63
+ s += "\n<#{link}>" if link
65
64
  if @date
66
65
  if localtime
67
66
  s += "\nDate: #{@date.to_s}"
@@ -71,7 +70,7 @@ module FeedParser
71
70
  end
72
71
  s += "\n"
73
72
  else
74
- s += "<#{@link}>\n\n" if @link
73
+ s += "<#{link}>\n\n" if link
75
74
  end
76
75
  s += "#{@content.html2text(wrapto).chomp}\n" if @content
77
76
  if @enclosures and @enclosures.length > 0
@@ -89,7 +88,7 @@ module FeedParser
89
88
  if not header
90
89
  s += "\nItem: "
91
90
  s += @title if @title
92
- s += "\n<#{@link}>" if @link
91
+ s += "\n<#{link}>" if link
93
92
  if @date
94
93
  if localtime
95
94
  s += "\nDate: #{@date.to_s}"
@@ -59,7 +59,7 @@ end
59
59
  text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
60
60
  text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
61
61
  # uris
62
- text.gsub!(/([^'"])(#{URI::regexp(['http','ftp','https'])})/,
62
+ text.gsub!(/([^'"])(#{URI::DEFAULT_PARSER.make_regexp(['http','ftp','https'])})/,
63
63
  '\1<a href="\2">\2</a>')
64
64
  end
65
65
  # Handle broken hrefs in <a> and <img>
@@ -0,0 +1,3 @@
1
+ module FeedParser
2
+ VERSION = "0.9.7"
3
+ end
@@ -1,4 +1,4 @@
1
- #!/usr/bin/ruby -w
1
+ # encoding: UTF-8
2
2
 
3
3
  $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
4
 
@@ -114,4 +114,55 @@ class FeedParserTest < Test::Unit::TestCase
114
114
  # the third one should be removed because an enclosure should have an url, or it's useless.
115
115
  assert_equal([["url1", "1", "type1"], ["url2", nil, "type2"], ["url1", "1", nil]], ch.items[0].enclosures)
116
116
  end
117
+
118
+ def test_recode_utf8
119
+ assert_equal 'UTF-8', FeedParser.recode("áéíóú").encoding.name
120
+ end
121
+
122
+ def test_recode_blank
123
+ assert_equal 'UTF-8', FeedParser.recode('').encoding.name
124
+ end
125
+
126
+ def test_recode_iso88519
127
+ assert_equal 'UTF-8', FeedParser.recode("áéíóú".encode('iso-8859-1')).encoding.name
128
+ end
129
+
130
+ def test_recode_utf8_mixed_with_ASCIIBIT
131
+ recoded = FeedParser.recode("áé\x8Díóú")
132
+ assert_equal'UTF-8', recoded.encoding.name
133
+ assert_equal 'áéíóú', recoded
134
+ end
135
+
136
+ def test_recode_unicode_char
137
+ assert_equal "1280×1024", FeedParser.recode("1280×1024")
138
+ end
139
+
140
+ def test_almost_valid_iso88591
141
+ input = "Codifica\xE7\xE3o \x96 quase v\xE1lida"
142
+ assert_equal "Codificação quase válida", FeedParser.recode(input)
143
+ end
144
+
145
+ def test_feed_origin
146
+ feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
147
+ assert_equal "http://foo.com", feed.origin
148
+ end
149
+
150
+ def test_item_origin
151
+ feed = FeedParser::Feed.new(nil, 'http://foo.com/feed')
152
+ item = FeedParser::FeedItem.new(nil, feed)
153
+ item.link = '/foo/bar'
154
+ assert_equal 'http://foo.com/foo/bar', item.link
155
+ end
156
+
157
+ def test_item_origin_no_link
158
+ item = FeedParser::FeedItem.new(nil, nil)
159
+ assert_nil item.link
160
+ end
161
+
162
+ def test_item_no_feed
163
+ item = FeedParser::FeedItem.new(nil, nil)
164
+ item.link = '/foo/bar'
165
+ assert_equal '/foo/bar', item.link
166
+ end
167
+
117
168
  end
@@ -0,0 +1,47 @@
1
+ require 'feedparser/feedparser'
2
+ require 'test/unit'
3
+
4
+
5
+ class FeedItemTest < Test::Unit::TestCase
6
+ def setup
7
+ @item = FeedParser::FeedItem.new(nil, nil)
8
+ end
9
+
10
+ ########################################################################
11
+
12
+ def test_link_no_link
13
+ assert @item.link.nil?
14
+ end
15
+
16
+ def test_link_basic
17
+ @item.instance_variable_set('@link', 'https://www.example.com/')
18
+ assert_equal "https://www.example.com/", @item.link
19
+ end
20
+
21
+ def test_link_path_only
22
+ @item.instance_variable_set('@link', '/foo/bar/')
23
+ assert_equal "/foo/bar/", @item.link
24
+ end
25
+
26
+ def test_link_path_only_with_feed_origin
27
+ @item.instance_variable_set('@link', '/foo/bar/')
28
+ feed = FeedParser::Feed.new
29
+ feed.instance_variable_set('@origin', 'https://www.exampleorigin.com')
30
+ @item.instance_variable_set('@feed', feed)
31
+ assert_equal "https://www.exampleorigin.com/foo/bar/", @item.link
32
+ end
33
+
34
+ def test_link_full_link_with_feed_origin
35
+ @item.instance_variable_set('@link', 'https://www.exampleorigin.com/foo/bar/')
36
+ feed = FeedParser::Feed.new
37
+ feed.instance_variable_set('@origin', 'https://www.exampleorigin.com')
38
+ @item.instance_variable_set('@feed', feed)
39
+ assert_equal "https://www.exampleorigin.com/foo/bar/", @item.link
40
+ end
41
+
42
+ def test_link_with_non_ascii
43
+ @item.instance_variable_set('@link', 'https://www.example.people/☭/')
44
+ assert_equal "https://www.example.people/☭/", @item.link
45
+ end
46
+
47
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'test/unit'
4
+
5
+ require 'feedparser/feedparser'
6
+
7
+ class Html2TextParserTest < Test::Unit::TestCase
8
+
9
+ def test_next_img_index
10
+ parser = FeedParser::HTML2TextParser.new
11
+ assert_equal 'A', parser.next_img_index
12
+ assert_equal 'B', parser.next_img_index
13
+ end
14
+
15
+ def test_numerical_entity
16
+ parser = FeedParser::HTML2TextParser.new
17
+ parser.feed('1280&#215;1024')
18
+ parser.close
19
+ assert_equal "1280×1024", parser.savedata
20
+ end
21
+
22
+ def test_numerical_entity_large_known
23
+ parser = FeedParser::HTML2TextParser.new
24
+ parser.feed('&#8594;')
25
+ parser.close
26
+ assert_equal "→", parser.savedata
27
+ end
28
+
29
+ def test_numerical_entity_large
30
+ parser = FeedParser::HTML2TextParser.new
31
+ parser.feed('&#10000;')
32
+ parser.close
33
+ assert_equal "✐", parser.savedata
34
+ end
35
+
36
+ def test_non_numerical_entity
37
+ parser = FeedParser::HTML2TextParser.new
38
+ parser.feed('HTML&amp;CO')
39
+ parser.close
40
+ assert_equal "HTML&CO", parser.savedata
41
+ end
42
+
43
+ end
@@ -19,12 +19,10 @@ class HTMLOutputTest < Test::Unit::TestCase
19
19
  else
20
20
  raise 'source directory not found.'
21
21
  end
22
- def test_parser
23
- allok = true
24
- Dir.foreach(SRCDIR) do |f|
25
- next if f !~ /.xml$/
26
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
27
- puts "Checking #{f}"
22
+ Dir.foreach(SRCDIR) do |f|
23
+ next if f !~ /.xml$/
24
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
25
+ define_method(testname) do
28
26
  str = File::read(SRCDIR + '/' + f)
29
27
  chan = FeedParser::Feed::new(str)
30
28
  chanstr = chan.to_html(false)
@@ -34,19 +32,21 @@ class HTMLOutputTest < Test::Unit::TestCase
34
32
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
35
33
  fd.print(chanstr)
36
34
  end
37
- puts "Test failed for #{f}."
38
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
39
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
40
- allok = false
35
+ assert(
36
+ false,
37
+ [
38
+ "Test failed for #{f}.",
39
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
40
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
41
+ ].join("\n")
42
+ )
41
43
  end
42
44
  else
43
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
44
45
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
45
46
  f.print(chanstr)
46
47
  end
47
- allok = false
48
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
48
49
  end
49
50
  end
50
- assert(allok)
51
51
  end
52
52
  end
data/test/tc_parser.rb CHANGED
@@ -15,12 +15,10 @@ class ParserTest < Test::Unit::TestCase
15
15
  else
16
16
  raise 'source directory not found.'
17
17
  end
18
- def test_parser
19
- allok = true
20
- Dir.foreach(SRCDIR) do |f|
21
- next if f !~ /.xml$/
22
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
23
- puts "Checking #{f}"
18
+ Dir.foreach(SRCDIR) do |f|
19
+ next if f !~ /.xml$/
20
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
21
+ define_method(testname) do
24
22
  str = File::read(SRCDIR + '/' + f)
25
23
  chan = FeedParser::Feed::new(str)
26
24
  chanstr = chan.to_s(false)
@@ -30,19 +28,21 @@ class ParserTest < Test::Unit::TestCase
30
28
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
31
29
  fd.print(chanstr)
32
30
  end
33
- puts "Test failed for #{f}."
34
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
35
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
36
- allok = false
31
+ assert(
32
+ false,
33
+ [
34
+ "Test failed for #{f}.",
35
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
36
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
37
+ ].join("\n")
38
+ )
37
39
  end
38
40
  else
39
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
40
41
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
41
42
  f.print(chanstr)
42
43
  end
43
- allok = false
44
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
44
45
  end
45
46
  end
46
- assert(allok)
47
47
  end
48
48
  end
@@ -0,0 +1,22 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'test/unit'
4
+ require 'mocha/setup'
5
+
6
+ require 'feedparser/sgml-parser'
7
+
8
+ class SGMLParserTest < Test::Unit::TestCase
9
+
10
+ def test_numerical_charref
11
+ parser = FeedParser::SGMLParser.new
12
+ parser.expects(:unknown_charref).with('215')
13
+ parser.handle_charref('215')
14
+ end
15
+
16
+ def test_non_numerical_charref
17
+ parser = FeedParser::SGMLParser.new
18
+ parser.expects(:handle_data).with('amp')
19
+ parser.handle_charref('amp')
20
+ end
21
+
22
+ end
@@ -15,12 +15,10 @@ class TextOutputTest < Test::Unit::TestCase
15
15
  else
16
16
  raise 'source directory not found.'
17
17
  end
18
- def test_parser
19
- allok = true
20
- Dir.foreach(SRCDIR) do |f|
21
- next if f !~ /.xml$/
22
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
23
- puts "Checking #{f}"
18
+ Dir.foreach(SRCDIR) do |f|
19
+ next if f !~ /.xml$/
20
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
21
+ define_method(testname) do
24
22
  str = File::read(SRCDIR + '/' + f)
25
23
  chan = FeedParser::Feed::new(str)
26
24
  chanstr = chan.to_text(false) # localtime set to false
@@ -30,19 +28,21 @@ class TextOutputTest < Test::Unit::TestCase
30
28
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
31
29
  fd.print(chanstr)
32
30
  end
33
- puts "Test failed for #{f}."
34
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
35
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
36
- allok = false
31
+ assert(
32
+ false,
33
+ [
34
+ "Test failed for #{f}.",
35
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
36
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
37
+ ].join("\n")
38
+ )
37
39
  end
38
40
  else
39
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
40
41
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
41
42
  f.print(chanstr)
42
43
  end
43
- allok = false
44
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
44
45
  end
45
46
  end
46
- assert(allok)
47
47
  end
48
48
  end
@@ -15,12 +15,10 @@ class TextWrappedOutputTest < Test::Unit::TestCase
15
15
  else
16
16
  raise 'source directory not found.'
17
17
  end
18
- def test_parser
19
- allok = true
20
- Dir.foreach(SRCDIR) do |f|
21
- next if f !~ /.xml$/
22
- next if ENV['SOURCE'] != nil and ENV['SOURCE'] != f
23
- puts "Checking #{f}"
18
+ Dir.foreach(SRCDIR) do |f|
19
+ next if f !~ /.xml$/
20
+ testname = 'test_' + File.basename(f).gsub(/\W/, '_')
21
+ define_method(testname) do
24
22
  str = File::read(SRCDIR + '/' + f)
25
23
  chan = FeedParser::Feed::new(str)
26
24
  chanstr = chan.to_text(false, 72) # localtime set to false
@@ -30,19 +28,21 @@ class TextWrappedOutputTest < Test::Unit::TestCase
30
28
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output.new'), "w") do |fd|
31
29
  fd.print(chanstr)
32
30
  end
33
- puts "Test failed for #{f}."
34
- puts " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}"
35
- puts " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}"
36
- allok = false
31
+ assert(
32
+ false,
33
+ [
34
+ "Test failed for #{f}.",
35
+ " Check: diff -u #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{,.new}",
36
+ " Commit: mv -f #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}{.new,}",
37
+ ].join("\n")
38
+ )
37
39
  end
38
40
  else
39
- puts "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!"
40
41
  File::open(DSTDIR + '/' + f.gsub(/.xml$/, '.output'), "w") do |f|
41
42
  f.print(chanstr)
42
43
  end
43
- allok = false
44
+ assert(false, "Missing #{DSTDIR + '/' + f.gsub(/.xml$/, '.output')}. Writing it, but check manually!")
44
45
  end
45
46
  end
46
- assert(allok)
47
47
  end
48
48
  end
metadata CHANGED
@@ -1,76 +1,82 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: ruby-feedparser
3
- version: !ruby/object:Gem::Version
4
- version: "0.7"
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.7
5
5
  platform: ruby
6
- authors: []
7
-
6
+ authors:
7
+ - Lucas Nussbaum
8
8
  autorequire: feedparser
9
9
  bindir: bin
10
10
  cert_chain: []
11
-
12
- date: 2009-07-27 00:00:00 +02:00
13
- default_executable:
14
- dependencies: []
15
-
11
+ date: 2021-02-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: magic
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
16
27
  description: Ruby library to parse ATOM and RSS feeds
17
28
  email:
18
29
  executables: []
19
-
20
30
  extensions: []
21
-
22
31
  extra_rdoc_files: []
23
-
24
- files:
25
- - ChangeLog
26
- - README
32
+ files:
27
33
  - COPYING
34
+ - ChangeLog.md
28
35
  - LICENSE
29
- - setup.rb
36
+ - README
30
37
  - Rakefile
31
- - lib/feedparser/text-output.rb
38
+ - lib/feedparser.rb
39
+ - lib/feedparser/feedparser.rb
32
40
  - lib/feedparser/filesizes.rb
33
41
  - lib/feedparser/html-output.rb
34
- - lib/feedparser/rexml_patch.rb
35
42
  - lib/feedparser/html2text-parser.rb
36
- - lib/feedparser/textconverters.rb
37
- - lib/feedparser/feedparser.rb
43
+ - lib/feedparser/rexml_patch.rb
38
44
  - lib/feedparser/sgml-parser.rb
39
- - lib/feedparser.rb
45
+ - lib/feedparser/text-output.rb
46
+ - lib/feedparser/textconverters.rb
47
+ - lib/feedparser/version.rb
48
+ - setup.rb
40
49
  - test/tc_feed_parse.rb
41
- - test/tc_textoutput.rb
50
+ - test/tc_feeditem.rb
51
+ - test/tc_html2text_parser.rb
42
52
  - test/tc_htmloutput.rb
53
+ - test/tc_parser.rb
54
+ - test/tc_sgml_parser.rb
55
+ - test/tc_textoutput.rb
43
56
  - test/tc_textwrappedoutput.rb
44
57
  - test/ts_feedparser.rb
45
- - test/tc_parser.rb
46
58
  - tools/doctoweb.bash
47
- has_rdoc: true
48
59
  homepage:
49
60
  licenses: []
50
-
61
+ metadata: {}
51
62
  post_install_message:
52
63
  rdoc_options: []
53
-
54
- require_paths:
64
+ require_paths:
55
65
  - lib
56
- required_ruby_version: !ruby/object:Gem::Requirement
57
- requirements:
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
58
68
  - - ">="
59
- - !ruby/object:Gem::Version
60
- version: "0"
61
- version:
62
- required_rubygems_version: !ruby/object:Gem::Requirement
63
- requirements:
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
64
73
  - - ">="
65
- - !ruby/object:Gem::Version
66
- version: "0"
67
- version:
68
- requirements:
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements:
69
77
  - none
70
- rubyforge_project:
71
- rubygems_version: 1.3.4
78
+ rubygems_version: 3.2.5
72
79
  signing_key:
73
- specification_version: 3
80
+ specification_version: 4
74
81
  summary: Ruby library to parse ATOM and RSS feeds
75
82
  test_files: []
76
-