busk-ruby-readability 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/readability.rb +28 -5
- metadata +5 -17
data/lib/readability.rb
CHANGED
@@ -43,7 +43,7 @@ module Readability
|
|
43
43
|
:trimRe => /^\s+|\s+$/,
|
44
44
|
:normalizeRe => /\s{2,}/,
|
45
45
|
:killBreaksRe => /(<br\s*\/?>(\s| ?)*){1,}/,
|
46
|
-
:videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
|
46
|
+
:videoRe => /http:\/\/(www\.)?(youtube|vimeo|ted|player\.vimeo)\.com/i
|
47
47
|
}
|
48
48
|
|
49
49
|
def content(remove_unlikely_candidates = true)
|
@@ -51,6 +51,7 @@ module Readability
|
|
51
51
|
|
52
52
|
article = youtube if is_youtube? && remove_unlikely_candidates
|
53
53
|
article = vimeo if is_vimeo? && remove_unlikely_candidates
|
54
|
+
article = ted if is_ted? && remove_unlikely_candidates
|
54
55
|
|
55
56
|
if article && remove_unlikely_candidates
|
56
57
|
return article.to_html.gsub(/[\r\n\f]+/, "\n" ).gsub(/[\t ]+/, " ").gsub(/ /, " ")
|
@@ -79,6 +80,10 @@ module Readability
|
|
79
80
|
def is_vimeo?
|
80
81
|
(@input.base_uri.to_s =~ /^http:\/\/(www.)?vimeo.com/)
|
81
82
|
end
|
83
|
+
|
84
|
+
def is_ted?
|
85
|
+
(@input.base_uri.to_s =~ /^http:\/\/(www.)?ted.com\/talks/)
|
86
|
+
end
|
82
87
|
|
83
88
|
def is_special_case?
|
84
89
|
(@input.base_uri.to_s =~ REGEXES[:videoRe])
|
@@ -87,11 +92,11 @@ module Readability
|
|
87
92
|
def youtube
|
88
93
|
if @input.base_uri.request_uri =~ /\?v=([_\-a-z0-9]+)&?/i
|
89
94
|
Nokogiri::HTML.fragment <<-HTML
|
90
|
-
<object width="
|
95
|
+
<object width="739" height="416">
|
91
96
|
<param name="movie" value="http://www.youtube.com/v/#{$1}?fs=1&hl=en_US"></param>
|
92
97
|
<param name="allowFullScreen" value="true"></param>
|
93
98
|
<param name="allowscriptaccess" value="always"></param>
|
94
|
-
<embed src="http://www.youtube.com/v/#{$1}?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="
|
99
|
+
<embed src="http://www.youtube.com/v/#{$1}?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="739" height="416"></embed>
|
95
100
|
</object>
|
96
101
|
HTML
|
97
102
|
else
|
@@ -116,6 +121,18 @@ module Readability
|
|
116
121
|
nil
|
117
122
|
end
|
118
123
|
end
|
124
|
+
|
125
|
+
def ted
|
126
|
+
if (player = @html.css(".copy_paste")).present?
|
127
|
+
unless player.first.attr("value").blank?
|
128
|
+
Nokogiri::HTML.fragment(player.first.attr("value").to_s)
|
129
|
+
else
|
130
|
+
nil
|
131
|
+
end
|
132
|
+
else
|
133
|
+
nil
|
134
|
+
end
|
135
|
+
end
|
119
136
|
|
120
137
|
def get_article(candidates, best_candidate)
|
121
138
|
# Now that we have the top candidate, look through its siblings for content that might also be related.
|
@@ -279,9 +296,15 @@ module Readability
|
|
279
296
|
header.remove if class_weight(header) < 0 || get_link_density(header) > 0.33
|
280
297
|
end
|
281
298
|
|
282
|
-
node.css("form
|
299
|
+
node.css("form").each do |elem|
|
283
300
|
elem.remove
|
284
301
|
end
|
302
|
+
|
303
|
+
node.css("iframe").each do |iframe|
|
304
|
+
unless iframe.attr("src").to_s =~ REGEXES[:videoRe]
|
305
|
+
iframe.remove
|
306
|
+
end
|
307
|
+
end
|
285
308
|
|
286
309
|
# remove empty <p> tags
|
287
310
|
# node.css("p").each do |elem|
|
@@ -360,4 +383,4 @@ module Readability
|
|
360
383
|
end
|
361
384
|
|
362
385
|
end
|
363
|
-
end
|
386
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: busk-ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease: false
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 4
|
10
|
-
version: 1.0.4
|
4
|
+
version: 1.0.5
|
11
5
|
platform: ruby
|
12
6
|
authors: []
|
13
7
|
|
@@ -15,7 +9,7 @@ autorequire:
|
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
11
|
|
18
|
-
date: 2010-09-
|
12
|
+
date: 2010-09-22 00:00:00 -03:00
|
19
13
|
default_executable:
|
20
14
|
dependencies: []
|
21
15
|
|
@@ -39,27 +33,21 @@ rdoc_options: []
|
|
39
33
|
require_paths:
|
40
34
|
- lib
|
41
35
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
36
|
requirements:
|
44
37
|
- - ">="
|
45
38
|
- !ruby/object:Gem::Version
|
46
|
-
hash: 3
|
47
|
-
segments:
|
48
|
-
- 0
|
49
39
|
version: "0"
|
40
|
+
version:
|
50
41
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
-
none: false
|
52
42
|
requirements:
|
53
43
|
- - ">="
|
54
44
|
- !ruby/object:Gem::Version
|
55
|
-
hash: 3
|
56
|
-
segments:
|
57
|
-
- 0
|
58
45
|
version: "0"
|
46
|
+
version:
|
59
47
|
requirements: []
|
60
48
|
|
61
49
|
rubyforge_project:
|
62
|
-
rubygems_version: 1.3.
|
50
|
+
rubygems_version: 1.3.5
|
63
51
|
signing_key:
|
64
52
|
specification_version: 3
|
65
53
|
summary: A rewrite of original ruby-readability
|