busk-ruby-readability 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/readability.rb +28 -5
- metadata +5 -17
data/lib/readability.rb
CHANGED
@@ -43,7 +43,7 @@ module Readability
|
|
43
43
|
:trimRe => /^\s+|\s+$/,
|
44
44
|
:normalizeRe => /\s{2,}/,
|
45
45
|
:killBreaksRe => /(<br\s*\/?>(\s| ?)*){1,}/,
|
46
|
-
:videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
|
46
|
+
:videoRe => /http:\/\/(www\.)?(youtube|vimeo|ted|player\.vimeo)\.com/i
|
47
47
|
}
|
48
48
|
|
49
49
|
def content(remove_unlikely_candidates = true)
|
@@ -51,6 +51,7 @@ module Readability
|
|
51
51
|
|
52
52
|
article = youtube if is_youtube? && remove_unlikely_candidates
|
53
53
|
article = vimeo if is_vimeo? && remove_unlikely_candidates
|
54
|
+
article = ted if is_ted? && remove_unlikely_candidates
|
54
55
|
|
55
56
|
if article && remove_unlikely_candidates
|
56
57
|
return article.to_html.gsub(/[\r\n\f]+/, "\n" ).gsub(/[\t ]+/, " ").gsub(/ /, " ")
|
@@ -79,6 +80,10 @@ module Readability
|
|
79
80
|
def is_vimeo?
|
80
81
|
(@input.base_uri.to_s =~ /^http:\/\/(www.)?vimeo.com/)
|
81
82
|
end
|
83
|
+
|
84
|
+
def is_ted?
|
85
|
+
(@input.base_uri.to_s =~ /^http:\/\/(www.)?ted.com\/talks/)
|
86
|
+
end
|
82
87
|
|
83
88
|
def is_special_case?
|
84
89
|
(@input.base_uri.to_s =~ REGEXES[:videoRe])
|
@@ -87,11 +92,11 @@ module Readability
|
|
87
92
|
def youtube
|
88
93
|
if @input.base_uri.request_uri =~ /\?v=([_\-a-z0-9]+)&?/i
|
89
94
|
Nokogiri::HTML.fragment <<-HTML
|
90
|
-
<object width="
|
95
|
+
<object width="739" height="416">
|
91
96
|
<param name="movie" value="http://www.youtube.com/v/#{$1}?fs=1&hl=en_US"></param>
|
92
97
|
<param name="allowFullScreen" value="true"></param>
|
93
98
|
<param name="allowscriptaccess" value="always"></param>
|
94
|
-
<embed src="http://www.youtube.com/v/#{$1}?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="
|
99
|
+
<embed src="http://www.youtube.com/v/#{$1}?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="739" height="416"></embed>
|
95
100
|
</object>
|
96
101
|
HTML
|
97
102
|
else
|
@@ -116,6 +121,18 @@ module Readability
|
|
116
121
|
nil
|
117
122
|
end
|
118
123
|
end
|
124
|
+
|
125
|
+
def ted
|
126
|
+
if (player = @html.css(".copy_paste")).present?
|
127
|
+
unless player.first.attr("value").blank?
|
128
|
+
Nokogiri::HTML.fragment(player.first.attr("value").to_s)
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
else
|
133
|
+
nil
|
134
|
+
end
|
135
|
+
end
|
119
136
|
|
120
137
|
def get_article(candidates, best_candidate)
|
121
138
|
# Now that we have the top candidate, look through its siblings for content that might also be related.
|
@@ -279,9 +296,15 @@ module Readability
|
|
279
296
|
header.remove if class_weight(header) < 0 || get_link_density(header) > 0.33
|
280
297
|
end
|
281
298
|
|
282
|
-
node.css("form
|
299
|
+
node.css("form").each do |elem|
|
283
300
|
elem.remove
|
284
301
|
end
|
302
|
+
|
303
|
+
node.css("iframe").each do |iframe|
|
304
|
+
unless iframe.attr("src").to_s =~ REGEXES[:videoRe]
|
305
|
+
iframe.remove
|
306
|
+
end
|
307
|
+
end
|
285
308
|
|
286
309
|
# remove empty <p> tags
|
287
310
|
# node.css("p").each do |elem|
|
@@ -360,4 +383,4 @@ module Readability
|
|
360
383
|
end
|
361
384
|
|
362
385
|
end
|
363
|
-
end
|
386
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: busk-ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease: false
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 4
|
10
|
-
version: 1.0.4
|
4
|
+
version: 1.0.5
|
11
5
|
platform: ruby
|
12
6
|
authors: []
|
13
7
|
|
@@ -15,7 +9,7 @@ autorequire:
|
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
11
|
|
18
|
-
date: 2010-09-
|
12
|
+
date: 2010-09-22 00:00:00 -03:00
|
19
13
|
default_executable:
|
20
14
|
dependencies: []
|
21
15
|
|
@@ -39,27 +33,21 @@ rdoc_options: []
|
|
39
33
|
require_paths:
|
40
34
|
- lib
|
41
35
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
36
|
requirements:
|
44
37
|
- - ">="
|
45
38
|
- !ruby/object:Gem::Version
|
46
|
-
hash: 3
|
47
|
-
segments:
|
48
|
-
- 0
|
49
39
|
version: "0"
|
40
|
+
version:
|
50
41
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
42
|
requirements:
|
53
43
|
- - ">="
|
54
44
|
- !ruby/object:Gem::Version
|
55
|
-
hash: 3
|
56
|
-
segments:
|
57
|
-
- 0
|
58
45
|
version: "0"
|
46
|
+
version:
|
59
47
|
requirements: []
|
60
48
|
|
61
49
|
rubyforge_project:
|
62
|
-
rubygems_version: 1.3.
|
50
|
+
rubygems_version: 1.3.5
|
63
51
|
signing_key:
|
64
52
|
specification_version: 3
|
65
53
|
summary: A rewrite of original ruby-readability
|