busk-ruby-readability 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/readability.rb +28 -5
  2. metadata +5 -17
data/lib/readability.rb CHANGED
@@ -43,7 +43,7 @@ module Readability
43
43
  :trimRe => /^\s+|\s+$/,
44
44
  :normalizeRe => /\s{2,}/,
45
45
  :killBreaksRe => /(<br\s*\/?>(\s|&nbsp;?)*){1,}/,
46
- :videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
46
+ :videoRe => /http:\/\/(www\.)?(youtube|vimeo|ted|player\.vimeo)\.com/i
47
47
  }
48
48
 
49
49
  def content(remove_unlikely_candidates = true)
@@ -51,6 +51,7 @@ module Readability
51
51
 
52
52
  article = youtube if is_youtube? && remove_unlikely_candidates
53
53
  article = vimeo if is_vimeo? && remove_unlikely_candidates
54
+ article = ted if is_ted? && remove_unlikely_candidates
54
55
 
55
56
  if article && remove_unlikely_candidates
56
57
  return article.to_html.gsub(/[\r\n\f]+/, "\n" ).gsub(/[\t ]+/, " ").gsub(/&nbsp;/, " ")
@@ -79,6 +80,10 @@ module Readability
79
80
  def is_vimeo?
80
81
  (@input.base_uri.to_s =~ /^http:\/\/(www.)?vimeo.com/)
81
82
  end
83
+
84
+ def is_ted?
85
+ (@input.base_uri.to_s =~ /^http:\/\/(www.)?ted.com\/talks/)
86
+ end
82
87
 
83
88
  def is_special_case?
84
89
  (@input.base_uri.to_s =~ REGEXES[:videoRe])
@@ -87,11 +92,11 @@ module Readability
87
92
  def youtube
88
93
  if @input.base_uri.request_uri =~ /\?v=([_\-a-z0-9]+)&?/i
89
94
  Nokogiri::HTML.fragment <<-HTML
90
- <object width="480" height="385">
95
+ <object width="739" height="416">
91
96
  <param name="movie" value="http://www.youtube.com/v/#{$1}?fs=1&amp;hl=en_US"></param>
92
97
  <param name="allowFullScreen" value="true"></param>
93
98
  <param name="allowscriptaccess" value="always"></param>
94
- <embed src="http://www.youtube.com/v/#{$1}?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="480" height="385"></embed>
99
+ <embed src="http://www.youtube.com/v/#{$1}?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="739" height="416"></embed>
95
100
  </object>
96
101
  HTML
97
102
  else
@@ -116,6 +121,18 @@ module Readability
116
121
  nil
117
122
  end
118
123
  end
124
+
125
+ def ted
126
+ if (player = @html.css(".copy_paste")).present?
127
+ unless player.first.attr("value").blank?
128
+ Nokogiri::HTML.fragment(player.first.attr("value").to_s)
129
+ else
130
+ nil
131
+ end
132
+ else
133
+ nil
134
+ end
135
+ end
119
136
 
120
137
  def get_article(candidates, best_candidate)
121
138
  # Now that we have the top candidate, look through its siblings for content that might also be related.
@@ -279,9 +296,15 @@ module Readability
279
296
  header.remove if class_weight(header) < 0 || get_link_density(header) > 0.33
280
297
  end
281
298
 
282
- node.css("form, iframe").each do |elem|
299
+ node.css("form").each do |elem|
283
300
  elem.remove
284
301
  end
302
+
303
+ node.css("iframe").each do |iframe|
304
+ unless iframe.attr("src").to_s =~ REGEXES[:videoRe]
305
+ iframe.remove
306
+ end
307
+ end
285
308
 
286
309
  # remove empty <p> tags
287
310
  # node.css("p").each do |elem|
@@ -360,4 +383,4 @@ module Readability
360
383
  end
361
384
 
362
385
  end
363
- end
386
+ end
metadata CHANGED
@@ -1,13 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: busk-ruby-readability
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
5
- prerelease: false
6
- segments:
7
- - 1
8
- - 0
9
- - 4
10
- version: 1.0.4
4
+ version: 1.0.5
11
5
  platform: ruby
12
6
  authors: []
13
7
 
@@ -15,7 +9,7 @@ autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
11
 
18
- date: 2010-09-21 00:00:00 -03:00
12
+ date: 2010-09-22 00:00:00 -03:00
19
13
  default_executable:
20
14
  dependencies: []
21
15
 
@@ -39,27 +33,21 @@ rdoc_options: []
39
33
  require_paths:
40
34
  - lib
41
35
  required_ruby_version: !ruby/object:Gem::Requirement
42
- none: false
43
36
  requirements:
44
37
  - - ">="
45
38
  - !ruby/object:Gem::Version
46
- hash: 3
47
- segments:
48
- - 0
49
39
  version: "0"
40
+ version:
50
41
  required_rubygems_version: !ruby/object:Gem::Requirement
51
- none: false
52
42
  requirements:
53
43
  - - ">="
54
44
  - !ruby/object:Gem::Version
55
- hash: 3
56
- segments:
57
- - 0
58
45
  version: "0"
46
+ version:
59
47
  requirements: []
60
48
 
61
49
  rubyforge_project:
62
- rubygems_version: 1.3.7
50
+ rubygems_version: 1.3.5
63
51
  signing_key:
64
52
  specification_version: 3
65
53
  summary: A rewrite of original ruby-readability