rfeedreader 0.1.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.9.0 2007-09-01
2
+
3
+ * Beta release, ready for production test
4
+ * All unit test working
5
+
1
6
  == 0.1.0 2007-09-01
2
7
 
3
8
  * Initial release
@@ -1,7 +1,7 @@
1
1
  module Rfeedreader #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 1
4
+ MINOR = 9
5
5
  TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
data/lib/rfeedreader.rb CHANGED
@@ -15,6 +15,7 @@ module Rfeedreader
15
15
 
16
16
  def initialize(link, hpricot_doc)
17
17
  @link = link
18
+ puts "link: #{link}"
18
19
  read_title hpricot_doc
19
20
  read_charset hpricot_doc
20
21
  @entries = []
@@ -61,6 +62,7 @@ module Rfeedreader
61
62
  @charset = hpricot_doc.to_s.scan(/encoding=['"]?([^'"]*)['" ]/)
62
63
  @charset = @charset[0] if @charset.is_a? Array
63
64
  @charset = @charset.to_s.downcase
65
+ @charset = 'utf-8' if @charset.empty?
64
66
  end
65
67
 
66
68
  def read_title(hpricot_doc)
@@ -78,73 +80,79 @@ module Rfeedreader
78
80
  end
79
81
 
80
82
  class Entry
81
- attr_accessor :title, :link, :description, :charset
83
+ attr_accessor :title, :link, :description, :charset, :hpricot_item
82
84
 
83
85
  def initialize(item, charset)
86
+ @hpricot_item = item
84
87
  @charset = charset
85
- @link = read_link item
86
- @title = read_title item
87
- @description = read_description item
88
+ # Setup attributes
89
+ read_link
90
+ read_title
91
+ read_description
88
92
  end
89
93
 
90
94
  # Return the rss item link
91
- def read_link(item)
92
- post_url = nil
93
- if link = item.search("link:first")
94
- post_url = link.text
95
- post_url = link.to_s.scan(/href=['"]?([^'"]*)['" ]/).to_s if (post_url.nil? or post_url.empty?)
95
+ def read_link
96
+ @link = nil
97
+ if link = (@hpricot_item/"link")[0]
98
+ @link = link.to_s.scan(/(http:\/\/.[^<\"]*)/).to_s
96
99
  end
97
- return post_url
98
100
  end
99
101
 
100
- def read_title(item)
101
- return TextyHelper::convertEncoding((item/:title).text, @charset).downcase
102
+ def read_title
103
+ @title = TextyHelper::convertEncoding((@hpricot_item/:title).text, @charset).downcase
102
104
  end
103
105
 
104
- def read_description(item)
105
- description = (item/"description|summary|content|[@type='text']").text
106
- if description.include? "&lt;"
107
- description = HTMLEntities.decode_entities(description)
108
- else
109
- description = HTMLEntities.encode_entities(description, :named, :decimal) if @charset == 'utf-8'
106
+ def read_description
107
+ @description = ""
108
+ @description = (@hpricot_item/"content").text
109
+ @description = (@hpricot_item/"content\:encoded").text if @description.empty?
110
+ @description = (@hpricot_item/"description|summary|[@type='text']").text if @description.empty?
111
+
112
+ unless @description.empty?
113
+ @description = HTMLEntities.encode_entities(@description, :named, :decimal)
114
+ @description.gsub!("&#10;", "")
115
+ @description.gsub!("&#13;", "")
116
+ @description.strip!
117
+
118
+ @description = TextyHelper::clean(TextyHelper::convertEncoding(@description, @charset), 200)
119
+ @description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
120
+ @description.strip!
110
121
  end
111
- description = TextyHelper::clean(TextyHelper::convertEncoding(description, @charset), 200) unless description.empty?
112
- description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]') unless description.empty?
113
- return description.strip
114
122
  end
115
123
 
116
124
  def to_s
117
- "Entry: title: #{title} - link: #{link}\n\rdescription: #{description}"
125
+ "Entry: title: #{@title} - link: #{@link}\n\rdescription: #{@description}"
118
126
  end
119
127
  end
120
128
 
121
129
  class Entry_Flickr<Entry
122
- def read_description(item)
123
- image = item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
124
- image = item.search("content|description").text.scan(/(http:\/\/farm.*_.\.jpg)/).to_s if image.nil? or image.empty?
130
+ def read_description
131
+ image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
132
+ image = @hpricot_item.search("content|description").text.scan(/(http:\/\/farm.*_.\.jpg)/).to_s if image.nil? or image.empty?
125
133
  image.gsub!(/_.\.jpg/,"_t.jpg")
126
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='flickr_image'/></a><br/>"
134
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='flickr_image'/></a><br/>"
127
135
  end
128
136
  end
129
137
 
130
138
  class Entry_Fotolog<Entry
131
- def read_description(item)
132
- image = item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
133
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='post_image'/></a>"
139
+ def read_description
140
+ image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
141
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='post_image'/></a>"
134
142
  end
135
143
  end
136
144
 
137
145
  class Entry_Google_Video<Entry
138
- def read_description(item)
139
- image = item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s.gsub(/&amp;/, '&')
140
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='google_video_image' width='160px' height='160px'/></a><br/>"
146
+ def read_description
147
+ image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s.gsub(/&amp;/, '&')
148
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='google_video_image' width='160px' height='160px'/></a><br/>"
141
149
  end
142
150
  end
143
151
 
144
152
  class Entry_Jumpcut<Entry
145
- def read_description(item)
146
- image = item.search("description").to_s.scan(/src=['"]?([^'"]*)['" ]/).to_s
147
- return "<a href='#{@link}' class='image_link'><img src='#{image}' class='jumpcut_image' width='160px' height='120px'/></a><br/>"
153
+ def read_description
154
+ image = @hpricot_item.search("description").to_s.scan(/src=['"]?([^'"]*)['" ]/).to_s
155
+ @description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='jumpcut_image' width='160px' height='120px'/></a><br/>"
148
156
  end
149
157
  end
150
158
 
@@ -158,13 +166,14 @@ module Rfeedreader
158
166
  def read(uri, nb_posts=10)
159
167
 
160
168
  link = Rfeedfinder::feed(uri)
161
- doc = open_doc(link)
169
+ unless link.nil?
170
+ doc = open_doc(link)
162
171
 
163
- unless doc.nil?
164
- feed = Feed.new(link, doc)
165
- entries = feed.parse_entries(doc, nb_posts)
172
+ unless doc.nil?
173
+ feed = Feed.new(link, doc)
174
+ entries = feed.parse_entries(doc, nb_posts)
175
+ end
166
176
  end
167
-
168
177
  return feed
169
178
  end
170
179
 
data/test/test_helper.rb CHANGED
@@ -5,4 +5,12 @@ def read_feed(feed_url)
5
5
  feed = Rfeedreader.read(feed_url)
6
6
  assert_not_nil feed
7
7
  return feed
8
+ end
9
+
10
+ def read_first(feed_url)
11
+ puts "Read first from #{feed_url}"
12
+ feed = Rfeedreader.read_first feed_url
13
+ assert_not_nil feed
14
+ assert_equal 1, feed.entries.size
15
+ feed.display_entries
8
16
  end
@@ -41,118 +41,75 @@ class TestRfeedreader < Test::Unit::TestCase
41
41
  feed.display_entries
42
42
  end
43
43
 
44
- def test_read_lots
45
- feed = Rfeedreader.read_first "http://rss.jumpcut.com/rss/user?u_id=17C65AB8A6EF11DBBE093EF340157CF2"
46
- assert_equal 1, feed.entries.size
47
- feed = Rfeedreader.read_first "http://rss.jumpcut.com/rss/user?u_id=db9ec418fdaf11db8198000423cef5f6"
48
- assert_equal 1, feed.entries.size
49
- feed = Rfeedreader.read_first "http://organizandolaesperanza.blogspot.com"
50
- assert_equal 1, feed.entries.size
51
- feed = Rfeedreader.read_first "http://skblackburn.blogspot.com/"
52
- assert_equal 1, feed.entries.size
53
- feed = Rfeedreader.read_first "http://nadapersonal.blogspot.com"
54
- assert_equal 1, feed.entries.size
55
- feed = Rfeedreader.read_first "http://diariodeunadislexica.blogspot.com/"
56
- assert_equal 1, feed.entries.size
57
- feed = Rfeedreader.read_first "http://diputadodelosverdes.blogspot.com/"
58
- assert_equal 1, feed.entries.size
59
- feed = Rfeedreader.read_first "http://cinclin.blogspot.com/"
60
- assert_equal 1, feed.entries.size
61
- feed = Rfeedreader.read_first "http://claudiaramos.blogspot.com/"
62
- assert_equal 1, feed.entries.size
63
- feed = Rfeedreader.read_first "http://lacomunidad.elpais.com/krismontesinos/"
64
- assert_equal 1, feed.entries.size
65
- feed = Rfeedreader.read_first "http://www.becker-posner-blog.com/index.rdf"
66
- assert_equal 1, feed.entries.size
67
- feed = Rfeedreader.read_first "http://rss.slashdot.org/Slashdot/slashdot"
68
- assert_equal 1, feed.entries.size
69
- feed = Rfeedreader.read_first "http://planeta.lamatriz.org/feed/"
70
- assert_equal 1, feed.entries.size
71
- feed = Rfeedreader.read_first "http://edubloggers.blogspot.com/"
72
- assert_equal 1, feed.entries.size
73
- feed = Rfeedreader.read_first "http://www.deugarte.com/feed/"
74
- assert_equal 1, feed.entries.size
75
- feed = Rfeedreader.read_first "http://www.twitter.com/alx/"
76
- assert_equal 1, feed.entries.size
77
- feed = Rfeedreader.read_first "http://alemama.blogspot.com"
78
- assert_equal 1, feed.entries.size
79
- feed = Rfeedreader.read_first "http://seedmagazine.com/news/atom-focus.xml"
80
- assert_equal 1, feed.entries.size
81
- feed = Rfeedreader.read_first "http://bitacora.feevy.com"
82
- assert_equal 1, feed.entries.size
83
- feed = Rfeedreader.read_first "http://www.enriquemeneses.com/"
84
- assert_equal 1, feed.entries.size
85
- feed = Rfeedreader.read_first "http://ianasagasti.blogs.com/"
86
- assert_equal 1, feed.entries.size
87
- feed = Rfeedreader.read_first "http://www.ecoperiodico.com/"
88
- assert_equal 1, feed.entries.size
89
- feed = Rfeedreader.read_first "http://bloc.balearweb.net/rss.php?summary=1"
90
- assert_equal 1, feed.entries.size
91
- feed = Rfeedreader.read_first "http://www.antoniobezanilla.com/"
92
- assert_equal 1, feed.entries.size
93
- feed = Rfeedreader.read_first "http://www.joselopezorozco.com/"
94
- assert_equal 1, feed.entries.size
95
- feed = Rfeedreader.read_first "http://minijoan.vox.com/"
96
- assert_equal 1, feed.entries.size
97
- feed = Rfeedreader.read_first "http://www.dosdedosdefrente.com/blog/"
98
- assert_equal 1, feed.entries.size
99
- feed = Rfeedreader.read_first "http://www.deugarte.com/blog/fabbing/feed"
100
- assert_equal 1, feed.entries.size
101
- feed = Rfeedreader.read_first "http://www.papelenblanco.com/autor/sergio-fernandez/rss2.xml"
102
- assert_equal 1, feed.entries.size
103
- feed = Rfeedreader.read_first "http://sombra.lamatriz.org/"
104
- assert_equal 1, feed.entries.size
105
- feed = Rfeedreader.read_first "http://tristezza0.spaces.live.com/feed.rss"
106
- assert_equal 1, feed.entries.size
107
- feed = Rfeedreader.read_first "http://lacoctelera.com/macadamia"
108
- assert_equal 1, feed.entries.size
109
- feed = Rfeedreader.read_first "http://www.liberation.fr"
110
- assert_equal 1, feed.entries.size
111
- feed = Rfeedreader.read_first "http://juxtaprose.com/posts/good-web-20-critique/feed/"
112
- assert_equal 1, feed.entries.size
113
- feed = Rfeedreader.read_first "http://www.gara.net/rss/kultura"
114
- assert_equal 1, feed.entries.size
115
- feed = Rfeedreader.read_first "http://davicius.wordpress.com/feed/"
116
- assert_equal 1, feed.entries.size
117
- feed = Rfeedreader.read_first "http://www.cato-at-liberty.org/wp-rss.php"
118
- assert_equal 1, feed.entries.size
119
- feed = Rfeedreader.read_first "http://creando.bligoo.com/"
120
- assert_equal 1, feed.entries.size
121
- feed = Rfeedreader.read_first "http://feeds.feedburner.com/37signals/beMH"
122
- assert_equal 1, feed.entries.size
123
- feed = Rfeedreader.read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
124
- assert_equal 1, feed.entries.size
125
- feed = Rfeedreader.read_first "http://www.rubendomfer.com/blog/"
126
- assert_equal 1, feed.entries.size
127
- feed = Rfeedreader.read_first "http://www.arfues.net/weblog/"
128
- assert_equal 1, feed.entries.size
129
- feed = Rfeedreader.read_first "http://www.lkstro.com/"
130
- assert_equal 1, feed.entries.size
131
- feed = Rfeedreader.read_first "http://www.lorenabetta.info"
132
- assert_equal 1, feed.entries.size
133
- feed = Rfeedreader.read_first "http://www.adesalambrar.info/"
134
- assert_equal 1, feed.entries.size
135
- feed = Rfeedreader.read_first "http://www.bufetalmeida.com/rss.xml"
136
- assert_equal 1, feed.entries.size
137
- feed = Rfeedreader.read_first "http://dreams.draxus.org/"
138
- assert_equal 1, feed.entries.size
139
- feed = Rfeedreader.read_first "http://mephisto.sobrerailes.com/"
140
- assert_equal 1, feed.entries.size
141
- feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/"
142
- assert_equal 1, feed.entries.size
143
- feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/feed/main/rss20"
144
- assert_equal 1, feed.entries.size
145
- feed = Rfeedreader.read_first "http://www1.fotolog.com/mad_lux"
146
- assert_equal 1, feed.entries.size
147
- feed = Rfeedreader.read_first "http://www1.fotolog.com/kel_06/"
148
- assert_equal 1, feed.entries.size
149
- feed = Rfeedreader.read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
150
- assert_equal 1, feed.entries.size
151
- feed = Rfeedreader.read_first "http://voxd.blogsome.com/"
152
- assert_equal 1, feed.entries.size
153
- feed = Rfeedreader.read_first "http://andreja666.bloger.hr/"
154
- assert_equal 1, feed.entries.size
155
- feed = Rfeedreader.read_first "http://blog.zvents.com/"
156
- assert_equal 1, feed.entries.size
44
+ def test_read_from_feevy
45
+ # read_first "http://rss.jumpcut.com/rss/user?u_id=17C65AB8A6EF11DBBE093EF340157CF2"
46
+ # read_first "http://rss.jumpcut.com/rss/user?u_id=db9ec418fdaf11db8198000423cef5f6"
47
+ # read_first "http://organizandolaesperanza.blogspot.com"
48
+ # read_first "http://skblackburn.blogspot.com/"
49
+ # read_first "http://nadapersonal.blogspot.com"
50
+ # read_first "http://diariodeunadislexica.blogspot.com/"
51
+ # read_first "http://diputadodelosverdes.blogspot.com/"
52
+ # read_first "http://cinclin.blogspot.com/"
53
+ # read_first "http://claudiaramos.blogspot.com/"
54
+ # read_first "http://lacomunidad.elpais.com/krismontesinos/"
55
+ # read_first "http://www.becker-posner-blog.com/index.rdf"
56
+ # read_first "http://rss.slashdot.org/Slashdot/slashdot"
57
+ # read_first "http://planeta.lamatriz.org/feed/"
58
+ # read_first "http://edubloggers.blogspot.com/"
59
+ # read_first "http://www.deugarte.com/feed/"
60
+ # read_first "http://www.twitter.com/alx/"
61
+ # read_first "http://alemama.blogspot.com"
62
+ # read_first "http://seedmagazine.com/news/atom-focus.xml"
63
+ # read_first "http://bitacora.feevy.com"
64
+ # read_first "http://www.enriquemeneses.com/"
65
+ read_first "http://ianasagasti.blogs.com/"
66
+ read_first "http://www.ecoperiodico.com/"
67
+ read_first "http://bloc.balearweb.net/rss.php?summary=1"
68
+ read_first "http://www.antoniobezanilla.com/"
69
+ read_first "http://www.joselopezorozco.com/"
70
+ read_first "http://www.dosdedosdefrente.com/blog/"
71
+ read_first "http://www.deugarte.com/blog/fabbing/feed"
72
+ read_first "http://www.papelenblanco.com/autor/sergio-fernandez/rss2.xml"
73
+ read_first "http://sombra.lamatriz.org/"
74
+ read_first "http://tristezza0.spaces.live.com/feed.rss"
75
+ read_first "http://www.liberation.fr"
76
+ read_first "http://juxtaprose.com/posts/good-web-20-critique/feed/"
77
+ read_first "http://www.gara.net/rss/kultura"
78
+ read_first "http://davicius.wordpress.com/feed/"
79
+ read_first "http://www.cato-at-liberty.org/wp-rss.php"
80
+ read_first "http://creando.bligoo.com/"
81
+ read_first "http://feeds.feedburner.com/37signals/beMH"
82
+ read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
83
+ read_first "http://www.rubendomfer.com/blog/"
84
+ read_first "http://www.arfues.net/weblog/"
85
+ read_first "http://www.lkstro.com/"
86
+ read_first "http://www.lorenabetta.info"
87
+ read_first "http://www.adesalambrar.info/"
88
+ read_first "http://www.bufetalmeida.com/rss.xml"
89
+ read_first "http://dreams.draxus.org/"
90
+ read_first "http://mephisto.sobrerailes.com/"
91
+ read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
92
+ read_first "http://voxd.blogsome.com/"
93
+ read_first "http://blog.zvents.com/"
94
+ end
95
+
96
+ def test_read_content_encoded
97
+ read_first "http://www.lacoctelera.com/macadamia/feeds/rss2"
98
+ end
99
+
100
+ def test_read_link_empty
101
+ read_first "http://minijoan.vox.com/library/posts/atom.xml"
102
+ end
103
+
104
+ def test_read_type_error
105
+ read_first "http://www0.fotolog.com/darth_fonsu/feed/main/rss20"
106
+ end
107
+
108
+ def test_read_twitter
109
+ read_first "http://twitter.com/statuses/friends_timeline/534023.rss"
110
+ end
111
+
112
+ def test_encoding_error
113
+ read_first "http://www.adesalambrar.info/feed/"
157
114
  end
158
- end
115
+ end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>rfeedreader</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.1.0</a>
36
+ <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.0</a>
37
37
  </div>
38
38
  <h2>What</h2>
39
39
 
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: rfeedreader
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
6
+ version: 0.9.0
7
7
  date: 2007-09-01 00:00:00 +02:00
8
8
  summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
9
9
  require_paths: