busk-ruby-readability 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/readability.rb +28 -5
  2. metadata +5 -17
data/lib/readability.rb CHANGED
@@ -43,7 +43,7 @@ module Readability
43
43
  :trimRe => /^\s+|\s+$/,
44
44
  :normalizeRe => /\s{2,}/,
45
45
  :killBreaksRe => /(<br\s*\/?>(\s|&nbsp;?)*){1,}/,
46
- :videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
46
+ :videoRe => /http:\/\/(www\.)?(youtube|vimeo|ted|player\.vimeo)\.com/i
47
47
  }
48
48
 
49
49
  def content(remove_unlikely_candidates = true)
@@ -51,6 +51,7 @@ module Readability
51
51
 
52
52
  article = youtube if is_youtube? && remove_unlikely_candidates
53
53
  article = vimeo if is_vimeo? && remove_unlikely_candidates
54
+ article = ted if is_ted? && remove_unlikely_candidates
54
55
 
55
56
  if article && remove_unlikely_candidates
56
57
  return article.to_html.gsub(/[\r\n\f]+/, "\n" ).gsub(/[\t ]+/, " ").gsub(/&nbsp;/, " ")
@@ -79,6 +80,10 @@ module Readability
79
80
  def is_vimeo?
80
81
  (@input.base_uri.to_s =~ /^http:\/\/(www.)?vimeo.com/)
81
82
  end
83
+
84
+ def is_ted?
85
+ (@input.base_uri.to_s =~ /^http:\/\/(www.)?ted.com\/talks/)
86
+ end
82
87
 
83
88
  def is_special_case?
84
89
  (@input.base_uri.to_s =~ REGEXES[:videoRe])
@@ -87,11 +92,11 @@ module Readability
87
92
  def youtube
88
93
  if @input.base_uri.request_uri =~ /\?v=([_\-a-z0-9]+)&?/i
89
94
  Nokogiri::HTML.fragment <<-HTML
90
- <object width="480" height="385">
95
+ <object width="739" height="416">
91
96
  <param name="movie" value="http://www.youtube.com/v/#{$1}?fs=1&amp;hl=en_US"></param>
92
97
  <param name="allowFullScreen" value="true"></param>
93
98
  <param name="allowscriptaccess" value="always"></param>
94
- <embed src="http://www.youtube.com/v/#{$1}?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="480" height="385"></embed>
99
+ <embed src="http://www.youtube.com/v/#{$1}?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="739" height="416"></embed>
95
100
  </object>
96
101
  HTML
97
102
  else
@@ -116,6 +121,18 @@ module Readability
116
121
  nil
117
122
  end
118
123
  end
124
+
125
+ def ted
126
+ if (player = @html.css(".copy_paste")).present?
127
+ unless player.first.attr("value").blank?
128
+ Nokogiri::HTML.fragment(player.first.attr("value").to_s)
129
+ else
130
+ nil
131
+ end
132
+ else
133
+ nil
134
+ end
135
+ end
119
136
 
120
137
  def get_article(candidates, best_candidate)
121
138
  # Now that we have the top candidate, look through its siblings for content that might also be related.
@@ -279,9 +296,15 @@ module Readability
279
296
  header.remove if class_weight(header) < 0 || get_link_density(header) > 0.33
280
297
  end
281
298
 
282
- node.css("form, iframe").each do |elem|
299
+ node.css("form").each do |elem|
283
300
  elem.remove
284
301
  end
302
+
303
+ node.css("iframe").each do |iframe|
304
+ unless iframe.attr("src").to_s =~ REGEXES[:videoRe]
305
+ iframe.remove
306
+ end
307
+ end
285
308
 
286
309
  # remove empty <p> tags
287
310
  # node.css("p").each do |elem|
@@ -360,4 +383,4 @@ module Readability
360
383
  end
361
384
 
362
385
  end
363
- end
386
+ end
metadata CHANGED
@@ -1,13 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: busk-ruby-readability
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
5
- prerelease: false
6
- segments:
7
- - 1
8
- - 0
9
- - 4
10
- version: 1.0.4
4
+ version: 1.0.5
11
5
  platform: ruby
12
6
  authors: []
13
7
 
@@ -15,7 +9,7 @@ autorequire:
15
9
  bindir: bin
16
10
  cert_chain: []
17
11
 
18
- date: 2010-09-21 00:00:00 -03:00
12
+ date: 2010-09-22 00:00:00 -03:00
19
13
  default_executable:
20
14
  dependencies: []
21
15
 
@@ -39,27 +33,21 @@ rdoc_options: []
39
33
  require_paths:
40
34
  - lib
41
35
  required_ruby_version: !ruby/object:Gem::Requirement
42
- none: false
43
36
  requirements:
44
37
  - - ">="
45
38
  - !ruby/object:Gem::Version
46
- hash: 3
47
- segments:
48
- - 0
49
39
  version: "0"
40
+ version:
50
41
  required_rubygems_version: !ruby/object:Gem::Requirement
51
- none: false
52
42
  requirements:
53
43
  - - ">="
54
44
  - !ruby/object:Gem::Version
55
- hash: 3
56
- segments:
57
- - 0
58
45
  version: "0"
46
+ version:
59
47
  requirements: []
60
48
 
61
49
  rubyforge_project:
62
- rubygems_version: 1.3.7
50
+ rubygems_version: 1.3.5
63
51
  signing_key:
64
52
  specification_version: 3
65
53
  summary: A rewrite of original ruby-readability