rfeedreader 0.1.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.9.0 2007-09-01
2
+
3
+ * Beta release, ready for production test
4
+ * All unit test working
5
+
1
6
  == 0.1.0 2007-09-01
2
7
 
3
8
  * Initial release
@@ -1,7 +1,7 @@
1
1
  module Rfeedreader #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 1
4
+ MINOR = 9
5
5
  TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
data/lib/rfeedreader.rb CHANGED
@@ -15,6 +15,7 @@ module Rfeedreader
15
15
 
16
16
  def initialize(link, hpricot_doc)
17
17
  @link = link
18
+ puts "link: #{link}"
18
19
  read_title hpricot_doc
19
20
  read_charset hpricot_doc
20
21
  @entries = []
@@ -61,6 +62,7 @@ module Rfeedreader
61
62
  @charset = hpricot_doc.to_s.scan(/encoding=['"]?([^'"]*)['" ]/)
62
63
  @charset = @charset[0] if @charset.is_a? Array
63
64
  @charset = @charset.to_s.downcase
65
+ @charset = 'utf-8' if @charset.empty?
64
66
  end
65
67
 
66
68
  def read_title(hpricot_doc)
@@ -78,73 +80,79 @@ module Rfeedreader
78
80
  end
79
81
 
80
82
  class Entry
81
- attr_accessor :title, :link, :description, :charset
83
+ attr_accessor :title, :link, :description, :charset, :hpricot_item
82
84
 
83
85
  def initialize(item, charset)
86
+ @hpricot_item = item
84
87
  @charset = charset
85
- @link = read_link item
86
- @title = read_title item
87
- @description = read_description item
88
+ # Setup attributes
89
+ read_link
90
+ read_title
91
+ read_description
88
92
  end
89
93
 
90
94
  # Return the rss item link
91
- def read_link(item)
92
- post_url = nil
93
- if link = item.search("link:first")
94
- post_url = link.text
95
- post_url = link.to_s.scan(/href=['"]?([^'"]*)['" ]/).to_s if (post_url.nil? or post_url.empty?)
95
+ def read_link
96
+ @link = nil
97
+ if link = (@hpricot_item/"link")[0]
98
+ @link = link.to_s.scan(/(http:\/\/.[^<\"]*)/).to_s
96
99
  end
97
- return post_url
98
100
  end
99
101
 
100
- def read_title(item)
101
- return TextyHelper::convertEncoding((item/:title).text, @charset).downcase
102
+ def read_title
103
+ @title = TextyHelper::convertEncoding((@hpricot_item/:title).text, @charset).downcase
102
104
  end
103
105
 
104
- def read_description(item)
105
- description = (item/"description|summary|content|[@type='text']").text
106
- if description.include? "&lt;"
107
- description = HTMLEntities.decode_entities(description)
108
- else
109
- description = HTMLEntities.encode_entities(description, :named, :decimal) if @charset == 'utf-8'
106
+ def read_description
107
+ @description = ""
108
+ @description = (@hpricot_item/"content").text
109
+ @description = (@hpricot_item/"content\:encoded").text if @description.empty?
110
+ @description = (@hpricot_item/"description|summary|[@type='text']").text if @description.empty?
111
+
112
+ unless @description.empty?
113
+ @description = HTMLEntities.encode_entities(@description, :named, :decimal)
114
+ @description.gsub!("&#10;", "")
115
+ @description.gsub!("&#13;", "")
116
+ @description.strip!
117
+
118
+ @description = TextyHelper::clean(TextyHelper::convertEncoding(@description, @charset), 200)
119
+ @description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
120
+ @description.strip!
110
121
  end
111
- description = TextyHelper::clean(TextyHelper::convertEncoding(description, @charset), 200) unless description.empty?
112
- description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]') unless description.empty?
113
- return description.strip
114
122
  end
115
123
 
116
124
  def to_s
117
- "Entry: title: #{title} - link: #{link}\n\rdescription: #{description}"
125
+ "Entry: title: #{@title} - link: #{@link}\n\rdescription: #{@description}"
118
126
  end
119
127
  end
120
128
 
121
129
  class Entry_Flickr<Entry
122
- def read_description(item)
123
- image = item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
124
- image = item.search("content|description").text.scan(/(http:\/\/farm.*_.\.jpg)/).to_s if image.nil? or image.empty?
130
+ def read_description
131
+ image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
132
+ image = @hpricot_item.search("content|description").text.scan(/(http:\/\/farm.*_.\.jpg)/).to_s if image.nil? or image.empty?
125
133
  image.gsub!(/_.\.jpg/,"_t.jpg")
126
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='flickr_image'/></a><br/>"
134
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='flickr_image'/></a><br/>"
127
135
  end
128
136
  end
129
137
 
130
138
  class Entry_Fotolog<Entry
131
- def read_description(item)
132
- image = item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
133
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='post_image'/></a>"
139
+ def read_description
140
+ image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
141
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='post_image'/></a>"
134
142
  end
135
143
  end
136
144
 
137
145
  class Entry_Google_Video<Entry
138
- def read_description(item)
139
- image = item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s.gsub(/&amp;/, '&')
140
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='google_video_image' width='160px' height='160px'/></a><br/>"
146
+ def read_description
147
+ image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s.gsub(/&amp;/, '&')
148
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='google_video_image' width='160px' height='160px'/></a><br/>"
141
149
  end
142
150
  end
143
151
 
144
152
  class Entry_Jumpcut<Entry
145
- def read_description(item)
146
- image = item.search("description").to_s.scan(/src=['"]?([^'"]*)['" ]/).to_s
147
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='jumpcut_image' width='160px' height='120px'/></a><br/>"
153
+ def read_description
154
+ image = @hpricot_item.search("description").to_s.scan(/src=['"]?([^'"]*)['" ]/).to_s
155
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='jumpcut_image' width='160px' height='120px'/></a><br/>"
148
156
  end
149
157
  end
150
158
 
@@ -158,13 +166,14 @@ module Rfeedreader
158
166
  def read(uri, nb_posts=10)
159
167
 
160
168
  link = Rfeedfinder::feed(uri)
161
- doc = open_doc(link)
169
+ unless link.nil?
170
+ doc = open_doc(link)
162
171
 
163
- unless doc.nil?
164
- feed = Feed.new(link, doc)
165
- entries = feed.parse_entries(doc, nb_posts)
172
+ unless doc.nil?
173
+ feed = Feed.new(link, doc)
174
+ entries = feed.parse_entries(doc, nb_posts)
175
+ end
166
176
  end
167
-
168
177
  return feed
169
178
  end
170
179
 
data/test/test_helper.rb CHANGED
@@ -5,4 +5,12 @@ def read_feed(feed_url)
5
5
  feed = Rfeedreader.read(feed_url)
6
6
  assert_not_nil feed
7
7
  return feed
8
+ end
9
+
10
+ def read_first(feed_url)
11
+ puts "Read first from #{feed_url}"
12
+ feed = Rfeedreader.read_first feed_url
13
+ assert_not_nil feed
14
+ assert_equal 1, feed.entries.size
15
+ feed.display_entries
8
16
  end
@@ -41,118 +41,75 @@ class TestRfeedreader < Test::Unit::TestCase
41
41
  feed.display_entries
42
42
  end
43
43
 
44
- def test_read_lots
45
- feed = Rfeedreader.read_first "http://rss.jumpcut.com/rss/user?u_id=17C65AB8A6EF11DBBE093EF340157CF2"
46
- assert_equal 1, feed.entries.size
47
- feed = Rfeedreader.read_first "http://rss.jumpcut.com/rss/user?u_id=db9ec418fdaf11db8198000423cef5f6"
48
- assert_equal 1, feed.entries.size
49
- feed = Rfeedreader.read_first "http://organizandolaesperanza.blogspot.com"
50
- assert_equal 1, feed.entries.size
51
- feed = Rfeedreader.read_first "http://skblackburn.blogspot.com/"
52
- assert_equal 1, feed.entries.size
53
- feed = Rfeedreader.read_first "http://nadapersonal.blogspot.com"
54
- assert_equal 1, feed.entries.size
55
- feed = Rfeedreader.read_first "http://diariodeunadislexica.blogspot.com/"
56
- assert_equal 1, feed.entries.size
57
- feed = Rfeedreader.read_first "http://diputadodelosverdes.blogspot.com/"
58
- assert_equal 1, feed.entries.size
59
- feed = Rfeedreader.read_first "http://cinclin.blogspot.com/"
60
- assert_equal 1, feed.entries.size
61
- feed = Rfeedreader.read_first "http://claudiaramos.blogspot.com/"
62
- assert_equal 1, feed.entries.size
63
- feed = Rfeedreader.read_first "http://lacomunidad.elpais.com/krismontesinos/"
64
- assert_equal 1, feed.entries.size
65
- feed = Rfeedreader.read_first "http://www.becker-posner-blog.com/index.rdf"
66
- assert_equal 1, feed.entries.size
67
- feed = Rfeedreader.read_first "http://rss.slashdot.org/Slashdot/slashdot"
68
- assert_equal 1, feed.entries.size
69
- feed = Rfeedreader.read_first "http://planeta.lamatriz.org/feed/"
70
- assert_equal 1, feed.entries.size
71
- feed = Rfeedreader.read_first "http://edubloggers.blogspot.com/"
72
- assert_equal 1, feed.entries.size
73
- feed = Rfeedreader.read_first "http://www.deugarte.com/feed/"
74
- assert_equal 1, feed.entries.size
75
- feed = Rfeedreader.read_first "http://www.twitter.com/alx/"
76
- assert_equal 1, feed.entries.size
77
- feed = Rfeedreader.read_first "http://alemama.blogspot.com"
78
- assert_equal 1, feed.entries.size
79
- feed = Rfeedreader.read_first "http://seedmagazine.com/news/atom-focus.xml"
80
- assert_equal 1, feed.entries.size
81
- feed = Rfeedreader.read_first "http://bitacora.feevy.com"
82
- assert_equal 1, feed.entries.size
83
- feed = Rfeedreader.read_first "http://www.enriquemeneses.com/"
84
- assert_equal 1, feed.entries.size
85
- feed = Rfeedreader.read_first "http://ianasagasti.blogs.com/"
86
- assert_equal 1, feed.entries.size
87
- feed = Rfeedreader.read_first "http://www.ecoperiodico.com/"
88
- assert_equal 1, feed.entries.size
89
- feed = Rfeedreader.read_first "http://bloc.balearweb.net/rss.php?summary=1"
90
- assert_equal 1, feed.entries.size
91
- feed = Rfeedreader.read_first "http://www.antoniobezanilla.com/"
92
- assert_equal 1, feed.entries.size
93
- feed = Rfeedreader.read_first "http://www.joselopezorozco.com/"
94
- assert_equal 1, feed.entries.size
95
- feed = Rfeedreader.read_first "http://minijoan.vox.com/"
96
- assert_equal 1, feed.entries.size
97
- feed = Rfeedreader.read_first "http://www.dosdedosdefrente.com/blog/"
98
- assert_equal 1, feed.entries.size
99
- feed = Rfeedreader.read_first "http://www.deugarte.com/blog/fabbing/feed"
100
- assert_equal 1, feed.entries.size
101
- feed = Rfeedreader.read_first "http://www.papelenblanco.com/autor/sergio-fernandez/rss2.xml"
102
- assert_equal 1, feed.entries.size
103
- feed = Rfeedreader.read_first "http://sombra.lamatriz.org/"
104
- assert_equal 1, feed.entries.size
105
- feed = Rfeedreader.read_first "http://tristezza0.spaces.live.com/feed.rss"
106
- assert_equal 1, feed.entries.size
107
- feed = Rfeedreader.read_first "http://lacoctelera.com/macadamia"
108
- assert_equal 1, feed.entries.size
109
- feed = Rfeedreader.read_first "http://www.liberation.fr"
110
- assert_equal 1, feed.entries.size
111
- feed = Rfeedreader.read_first "http://juxtaprose.com/posts/good-web-20-critique/feed/"
112
- assert_equal 1, feed.entries.size
113
- feed = Rfeedreader.read_first "http://www.gara.net/rss/kultura"
114
- assert_equal 1, feed.entries.size
115
- feed = Rfeedreader.read_first "http://davicius.wordpress.com/feed/"
116
- assert_equal 1, feed.entries.size
117
- feed = Rfeedreader.read_first "http://www.cato-at-liberty.org/wp-rss.php"
118
- assert_equal 1, feed.entries.size
119
- feed = Rfeedreader.read_first "http://creando.bligoo.com/"
120
- assert_equal 1, feed.entries.size
121
- feed = Rfeedreader.read_first "http://feeds.feedburner.com/37signals/beMH"
122
- assert_equal 1, feed.entries.size
123
- feed = Rfeedreader.read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
124
- assert_equal 1, feed.entries.size
125
- feed = Rfeedreader.read_first "http://www.rubendomfer.com/blog/"
126
- assert_equal 1, feed.entries.size
127
- feed = Rfeedreader.read_first "http://www.arfues.net/weblog/"
128
- assert_equal 1, feed.entries.size
129
- feed = Rfeedreader.read_first "http://www.lkstro.com/"
130
- assert_equal 1, feed.entries.size
131
- feed = Rfeedreader.read_first "http://www.lorenabetta.info"
132
- assert_equal 1, feed.entries.size
133
- feed = Rfeedreader.read_first "http://www.adesalambrar.info/"
134
- assert_equal 1, feed.entries.size
135
- feed = Rfeedreader.read_first "http://www.bufetalmeida.com/rss.xml"
136
- assert_equal 1, feed.entries.size
137
- feed = Rfeedreader.read_first "http://dreams.draxus.org/"
138
- assert_equal 1, feed.entries.size
139
- feed = Rfeedreader.read_first "http://mephisto.sobrerailes.com/"
140
- assert_equal 1, feed.entries.size
141
- feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/"
142
- assert_equal 1, feed.entries.size
143
- feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/feed/main/rss20"
144
- assert_equal 1, feed.entries.size
145
- feed = Rfeedreader.read_first "http://www1.fotolog.com/mad_lux"
146
- assert_equal 1, feed.entries.size
147
- feed = Rfeedreader.read_first "http://www1.fotolog.com/kel_06/"
148
- assert_equal 1, feed.entries.size
149
- feed = Rfeedreader.read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
150
- assert_equal 1, feed.entries.size
151
- feed = Rfeedreader.read_first "http://voxd.blogsome.com/"
152
- assert_equal 1, feed.entries.size
153
- feed = Rfeedreader.read_first "http://andreja666.bloger.hr/"
154
- assert_equal 1, feed.entries.size
155
- feed = Rfeedreader.read_first "http://blog.zvents.com/"
156
- assert_equal 1, feed.entries.size
44
+ def test_read_from_feevy
45
+ # read_first "http://rss.jumpcut.com/rss/user?u_id=17C65AB8A6EF11DBBE093EF340157CF2"
46
+ # read_first "http://rss.jumpcut.com/rss/user?u_id=db9ec418fdaf11db8198000423cef5f6"
47
+ # read_first "http://organizandolaesperanza.blogspot.com"
48
+ # read_first "http://skblackburn.blogspot.com/"
49
+ # read_first "http://nadapersonal.blogspot.com"
50
+ # read_first "http://diariodeunadislexica.blogspot.com/"
51
+ # read_first "http://diputadodelosverdes.blogspot.com/"
52
+ # read_first "http://cinclin.blogspot.com/"
53
+ # read_first "http://claudiaramos.blogspot.com/"
54
+ # read_first "http://lacomunidad.elpais.com/krismontesinos/"
55
+ # read_first "http://www.becker-posner-blog.com/index.rdf"
56
+ # read_first "http://rss.slashdot.org/Slashdot/slashdot"
57
+ # read_first "http://planeta.lamatriz.org/feed/"
58
+ # read_first "http://edubloggers.blogspot.com/"
59
+ # read_first "http://www.deugarte.com/feed/"
60
+ # read_first "http://www.twitter.com/alx/"
61
+ # read_first "http://alemama.blogspot.com"
62
+ # read_first "http://seedmagazine.com/news/atom-focus.xml"
63
+ # read_first "http://bitacora.feevy.com"
64
+ # read_first "http://www.enriquemeneses.com/"
65
+ read_first "http://ianasagasti.blogs.com/"
66
+ read_first "http://www.ecoperiodico.com/"
67
+ read_first "http://bloc.balearweb.net/rss.php?summary=1"
68
+ read_first "http://www.antoniobezanilla.com/"
69
+ read_first "http://www.joselopezorozco.com/"
70
+ read_first "http://www.dosdedosdefrente.com/blog/"
71
+ read_first "http://www.deugarte.com/blog/fabbing/feed"
72
+ read_first "http://www.papelenblanco.com/autor/sergio-fernandez/rss2.xml"
73
+ read_first "http://sombra.lamatriz.org/"
74
+ read_first "http://tristezza0.spaces.live.com/feed.rss"
75
+ read_first "http://www.liberation.fr"
76
+ read_first "http://juxtaprose.com/posts/good-web-20-critique/feed/"
77
+ read_first "http://www.gara.net/rss/kultura"
78
+ read_first "http://davicius.wordpress.com/feed/"
79
+ read_first "http://www.cato-at-liberty.org/wp-rss.php"
80
+ read_first "http://creando.bligoo.com/"
81
+ read_first "http://feeds.feedburner.com/37signals/beMH"
82
+ read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
83
+ read_first "http://www.rubendomfer.com/blog/"
84
+ read_first "http://www.arfues.net/weblog/"
85
+ read_first "http://www.lkstro.com/"
86
+ read_first "http://www.lorenabetta.info"
87
+ read_first "http://www.adesalambrar.info/"
88
+ read_first "http://www.bufetalmeida.com/rss.xml"
89
+ read_first "http://dreams.draxus.org/"
90
+ read_first "http://mephisto.sobrerailes.com/"
91
+ read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
92
+ read_first "http://voxd.blogsome.com/"
93
+ read_first "http://blog.zvents.com/"
94
+ end
95
+
96
+ def test_read_content_encoded
97
+ read_first "http://www.lacoctelera.com/macadamia/feeds/rss2"
98
+ end
99
+
100
+ def test_read_link_empty
101
+ read_first "http://minijoan.vox.com/library/posts/atom.xml"
102
+ end
103
+
104
+ def test_read_type_error
105
+ read_first "http://www0.fotolog.com/darth_fonsu/feed/main/rss20"
106
+ end
107
+
108
+ def test_read_twitter
109
+ read_first "http://twitter.com/statuses/friends_timeline/534023.rss"
110
+ end
111
+
112
+ def test_encoding_error
113
+ read_first "http://www.adesalambrar.info/feed/"
157
114
  end
158
- end
115
+ end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>rfeedreader</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.1.0</a>
36
+ <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.0</a>
37
37
  </div>
38
38
  <h2>What</h2>
39
39
 
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: rfeedreader
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
6
+ version: 0.9.0
7
7
  date: 2007-09-01 00:00:00 +02:00
8
8
  summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
9
9
  require_paths: