rfeedreader 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +1 -1
- data/Rakefile +3 -3
- data/lib/rfeedreader/version.rb +1 -1
- data/lib/rfeedreader.rb +104 -8
- data/website/index.html +1 -1
- metadata +19 -1
data/History.txt
CHANGED
data/Rakefile
CHANGED
@@ -11,8 +11,6 @@ require 'hoe'
|
|
11
11
|
|
12
12
|
include FileUtils
|
13
13
|
require File.join(File.dirname(__FILE__), 'lib', 'rfeedreader', 'version')
|
14
|
-
require File.join(File.dirname(__FILE__), 'lib', 'rfeedreader', 'texty_helper')
|
15
|
-
require File.join(File.dirname(__FILE__), 'lib', 'rfeedreader', 'html_entities')
|
16
14
|
|
17
15
|
AUTHOR = 'Alexandre Girard' # can also be an array of Authors
|
18
16
|
EMAIL = "alx.girard@gmail.com"
|
@@ -73,7 +71,9 @@ hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
|
73
71
|
|
74
72
|
# == Optional
|
75
73
|
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
76
|
-
p.extra_deps = [['rfeedfinder',
|
74
|
+
p.extra_deps = [['rfeedfinder', '>=0.9.0'],
|
75
|
+
['htmlentities', '>=4.0.0'],
|
76
|
+
['hpricot', '>=0.6']] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
|
77
77
|
#p.spec_extras = {} # A hash of extra values to set in the gemspec.
|
78
78
|
end
|
79
79
|
|
data/lib/rfeedreader/version.rb
CHANGED
data/lib/rfeedreader.rb
CHANGED
@@ -1,15 +1,106 @@
|
|
1
|
-
require 'net/http'
|
2
1
|
require 'rubygems'
|
3
|
-
require 'open-uri'
|
4
2
|
require 'hpricot'
|
5
|
-
require '
|
3
|
+
require 'htmlentities'
|
4
|
+
require 'iconv'
|
5
|
+
require 'net/http'
|
6
|
+
require 'open-uri'
|
6
7
|
require 'rfeedfinder'
|
7
|
-
require '
|
8
|
-
require 'rfeedreader/html_entities'
|
8
|
+
require 'timeout'
|
9
9
|
|
10
10
|
module Rfeedreader
|
11
11
|
module_function
|
12
12
|
|
13
|
+
class TextyHelper
|
14
|
+
def TextyHelper.clean(html, length = 45)
|
15
|
+
return html if html.empty?
|
16
|
+
if html.index("<")
|
17
|
+
html.gsub!(/(<[^>]*>)|\n|\t/s) {" "}
|
18
|
+
|
19
|
+
# strip any comments, and if they have a newline at the end (ie. line with
|
20
|
+
# only a comment) strip that too
|
21
|
+
truncate(html.gsub(/<!--(.*?)-->[\n]?/m, ""), length)
|
22
|
+
else
|
23
|
+
truncate(html, length) # already plain text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def TextyHelper.truncate(text, length = 45, truncate_string = "...")
|
28
|
+
if text.nil? then
|
29
|
+
return
|
30
|
+
end
|
31
|
+
l = length - truncate_string.length
|
32
|
+
if text.length > length
|
33
|
+
text = text[0...l]
|
34
|
+
# Avoid html entity truncation
|
35
|
+
if text =~ /(&#\d+[^;])$/
|
36
|
+
text.delete!($1)
|
37
|
+
end
|
38
|
+
text = text + truncate_string
|
39
|
+
end
|
40
|
+
text
|
41
|
+
end
|
42
|
+
|
43
|
+
def TextyHelper.convertEncoding(text, encoding='utf-8')
|
44
|
+
# Pre-process encoding
|
45
|
+
unless text.nil?
|
46
|
+
if encoding == 'utf-8'
|
47
|
+
# Some strange caracters to handle
|
48
|
+
text.gsub!("\342\200\042", "–") # en-dash
|
49
|
+
text.gsub!("\342\200\041", "—") # em-dash
|
50
|
+
text.gsub!("\342\200\174", "…") # elipse
|
51
|
+
text.gsub!("\342\200\176", "‘") # single quote
|
52
|
+
text.gsub!("\342\200\177", "’") # single quote
|
53
|
+
text.gsub!("\342\200\230", "’") # single quote
|
54
|
+
text.gsub!("\342\200\231", "’") # single quote
|
55
|
+
text.gsub!("\342\200\234", "“") # Double quote, right
|
56
|
+
text.gsub!("\342\200\235", "”") # Double quote, left
|
57
|
+
text.gsub!("\342\200\242", ".")
|
58
|
+
text.gsub!("\342\202\254", "€"); # Euro symbol
|
59
|
+
text.gsub!(/\S\200\S/, " ") # every strange character send to the moon
|
60
|
+
text.gsub!("\176", "\'") # single quote
|
61
|
+
text.gsub!("\177", "\'") # single quote
|
62
|
+
text.gsub!("\205", "-") # ISO-Latin1 horizontal elipses (0x85)
|
63
|
+
text.gsub!("\221", "\'") # ISO-Latin1 left single-quote
|
64
|
+
text.gsub!("\222", "\'") # ISO-Latin1 right single-quote
|
65
|
+
text.gsub!("\223", "\"") # ISO-Latin1 left double-quote
|
66
|
+
text.gsub!("\224", "\"") # ISO-Latin1 right double-quote
|
67
|
+
text.gsub!("\225", "\*") # ISO-Latin1 bullet
|
68
|
+
text.gsub!("\226", "-") # ISO-Latin1 en-dash (0x96)
|
69
|
+
text.gsub!("\227", "-") # ISO-Latin1 em-dash (0x97)
|
70
|
+
text.gsub!("\230", "\'") # single quote
|
71
|
+
text.gsub!("\231", "\'") # single quote
|
72
|
+
text.gsub!("\233", ">") # ISO-Latin1 single right angle quote
|
73
|
+
text.gsub!("\234", "\"") # Double quote
|
74
|
+
text.gsub!("\235", "\"") # Double quote
|
75
|
+
text.gsub!("\240", " ") # ISO-Latin1 nonbreaking space
|
76
|
+
text.gsub!("\246", "\|") # ISO-Latin1 broken vertical bar
|
77
|
+
text.gsub!("\255", "") # ISO-Latin1 soft hyphen (0xAD)
|
78
|
+
text.gsub!("\264", "\'") # ISO-Latin1 spacing acute
|
79
|
+
text.gsub!("\267", "\*") # ISO-Latin1 middle dot (0xB7)
|
80
|
+
ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
|
81
|
+
text = ic.iconv(text + ' ')[0..-2]
|
82
|
+
elsif encoding == 'iso-8859-15'
|
83
|
+
text.gsub!("’", "'") # Long horizontal bar
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
begin
|
88
|
+
text = Iconv.new('iso-8859-1', encoding).iconv(text)
|
89
|
+
# Post-process encoding
|
90
|
+
unless text.nil? or text.empty? or text.kind_of? ArgumentError
|
91
|
+
text.gsub!(/[\240-\377]/) { |c| "&#%d;" % c[0] }
|
92
|
+
if encoding == 'iso-8859-15'
|
93
|
+
text.gsub!("’", "'")
|
94
|
+
end
|
95
|
+
end
|
96
|
+
rescue => err
|
97
|
+
puts "Error while encoding: #{err} #{err.class}"
|
98
|
+
end
|
99
|
+
|
100
|
+
return text
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
13
104
|
class Feed
|
14
105
|
attr_accessor :title, :link, :charset, :entries
|
15
106
|
|
@@ -100,7 +191,7 @@ module Rfeedreader
|
|
100
191
|
end
|
101
192
|
|
102
193
|
def read_title
|
103
|
-
@title = TextyHelper
|
194
|
+
@title = TextyHelper.convertEncoding((@hpricot_item/:title).text, @charset).downcase
|
104
195
|
end
|
105
196
|
|
106
197
|
def read_description
|
@@ -110,14 +201,19 @@ module Rfeedreader
|
|
110
201
|
@description = (@hpricot_item/"description|summary|[@type='text']").text if @description.empty?
|
111
202
|
|
112
203
|
unless @description.empty?
|
204
|
+
|
205
|
+
@description = TextyHelper.clean(@description, 200)
|
206
|
+
|
113
207
|
@description = HTMLEntities.encode_entities(@description, :named, :decimal)
|
208
|
+
@description = TextyHelper.convertEncoding(@description, @charset)
|
209
|
+
|
114
210
|
@description.gsub!(" ", "")
|
115
211
|
@description.gsub!(" ", "")
|
116
212
|
@description.strip!
|
117
|
-
|
118
|
-
@description = TextyHelper::clean(TextyHelper::convertEncoding(@description, @charset), 200)
|
213
|
+
|
119
214
|
@description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
|
120
215
|
@description.strip!
|
216
|
+
|
121
217
|
end
|
122
218
|
end
|
123
219
|
|
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>rfeedreader</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.
|
36
|
+
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.2</a>
|
37
37
|
</div>
|
38
38
|
<h2>What</h2>
|
39
39
|
|
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rfeedreader
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.9.
|
6
|
+
version: 0.9.2
|
7
7
|
date: 2007-09-01 00:00:00 +02:00
|
8
8
|
summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
|
9
9
|
require_paths:
|
@@ -73,3 +73,21 @@ dependencies:
|
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: 0.9.0
|
75
75
|
version:
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: htmlentities
|
78
|
+
version_requirement:
|
79
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: 4.0.0
|
84
|
+
version:
|
85
|
+
- !ruby/object:Gem::Dependency
|
86
|
+
name: hpricot
|
87
|
+
version_requirement:
|
88
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: "0.6"
|
93
|
+
version:
|