dq-readability 1.0.4 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWRhYWQ4YjNmMzVkOTAxYzgwNTI0NzExZWFkYzYyOTJmYzZlZWQyNA==
4
+ YWVkZDMxNmZmZWIxMDk5MjI5N2Q1MTZkNDBlZGYyZDFmOTE2NzNhMw==
5
5
  data.tar.gz: !binary |-
6
- ZDliZTE4MzE2NTVhNTliNDlkMTdkMDg1ZDNmMzc4MDljNjU3OTZhNw==
6
+ YmQ0NTkzYzE1ZmRmNjQ3NzRkMDE2MmFjNDg3Y2VkMjY5ZDJiZDc4YQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YWZlNTMwZjNmN2U3MzUxZjU4MDYyMzQ1ODAxNGY4MmUyMTVlMzI0YzJmODA2
10
- NDZmZjVhOTNlZTAxOWI0NTRiN2JmODZlNmQyZTEwYzYwYjIzNDY5NGUxYjQ0
11
- YTg1ZjYyYWYwMzQ2YzYyNjEwNTRlNTcxNWI5YjFjYmUxZWM5NDY=
9
+ M2M5ZjRmODcyMzU0MWM3MGVlZGZlYmE4MjVmNmI5YmQ0ODRjZmY0OTM1MGRl
10
+ M2FiODJlZjU4YTUwYjgxNDI3Yjk4YjQ1ZjE3MTFmOTAzZmRhODdmOGNhZTRi
11
+ NmMyZDAxYzkxMTI1NWRlODg5ZjJjNWI1OTg3ZTBmNmU1YjU0ODc=
12
12
  data.tar.gz: !binary |-
13
- ZDE3YTE0YmQzMjFjOWYwMTUwNDlmMTFjMGNkN2U4N2VmMzIyODA1MGEyMjFh
14
- ODE3OTYyYjEwOTllZDIwZWE0NTIyY2YzZDcxOTg3NTZlYTk5NDU5Njk2YmJl
15
- ZTRjY2JmZGE2MzYzNzhmNmI5NDU2ZGE4OWNlODlhNWNkMTU3ZDg=
13
+ OTc0YTZjOTc3Nzg1OTI4ZmE3Yzg1YmRjOGVkYzgwZjU4MmNjYWUyY2I3NTkz
14
+ Zjc3ZTFiM2U3N2M2OTNiZmRkMGZlMDM2N2ZiODQxYjI4OGZiOTg5YjU1ZGM5
15
+ OTM3NTkyYjA4MDdiMGEwZmUzOTFlZDdlODcyMTY3ZTVjNDdkMzQ=
data/README.md CHANGED
@@ -1,3 +1,10 @@
1
+ Version
2
+ -------
3
+ 1.0.4 released. Check out https://rubygems.org/gems/dq-readability
4
+
5
+ * competing structure for fighting invalid characters
6
+ * Wikipedia image case resolved
7
+
1
8
  Install
2
9
  -------
3
10
  Command line:
@@ -12,8 +19,4 @@ Example
12
19
  require 'rubygems'
13
20
  require 'dq-readability'
14
21
  source = "http://www.personal.kent.edu/~rmuhamma/Algorithms/MyAlgorithms/Sorting/radixSort.htm"
15
- puts DQReadability::Document.new(source,:tags=>%w[div pre p h1 h2 h3 h4 td table tr b a img br li ul ol center br hr blockquote em strong sub sup font tbody span],:attributes=>%w[href src align width color height]).content
16
-
17
-
18
-
19
-
22
+ puts DQReadability::Document.new(source,:tags=>%w[div pre p h1 h2 h3 h4 td table tr b a img br li ul ol center br hr blockquote em strong sub sup font tbody tt span dl dd t code figure fieldset legend dir noscript],:attributes=>%w[href src align width color height]).content
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "dq-readability"
6
- s.version = '1.0.4'
6
+ s.version = '1.0.5'
7
7
  s.authors = ["Prateek Papriwal"]
8
8
  s.email = ["papriwalprateek@gmail.com"]
9
9
  s.homepage = "http://github.com/DaQwest/dq-readability"
@@ -17,7 +17,8 @@ module DQReadability
17
17
  :remove_empty_nodes => true,
18
18
  :min_image_width => 130,
19
19
  :min_image_height => 80,
20
- :ignore_image_format => []
20
+ :ignore_image_format => [],
21
+ :bypass => false
21
22
  }.freeze
22
23
 
23
24
  REGEXES = {
@@ -51,6 +52,7 @@ module DQReadability
51
52
  @weight_classes = @options[:weight_classes]
52
53
  @clean_conditionally = @options[:clean_conditionally]
53
54
  @best_candidate_has_image = true
55
+ @bypass = @options[:bypass]
54
56
  make_html
55
57
  end
56
58
 
@@ -144,10 +146,16 @@ module DQReadability
144
146
  end
145
147
  end
146
148
  rescue
147
- elem['href'] = ""
148
149
  end
149
150
  end
150
151
 
152
+ # removing edit spans
153
+
154
+ @html.css('span').each do |elem|
155
+ if elem.text.downcase == "[edit]"
156
+ elem.remove
157
+ end
158
+ end
151
159
 
152
160
  end
153
161
 
@@ -296,6 +304,7 @@ module DQReadability
296
304
  end
297
305
 
298
306
  def content(remove_unlikely_candidates = :default)
307
+ if @bypass == false
299
308
  @remove_unlikely_candidates = false if remove_unlikely_candidates == false
300
309
 
301
310
  prepare_candidates
@@ -319,6 +328,13 @@ module DQReadability
319
328
  else
320
329
  cleaned_article
321
330
  end
331
+ else
332
+ make_html
333
+ s = Nokogiri::XML::Node::SaveOptions
334
+ save_opts = s::NO_DECLARATION | s::NO_EMPTY_TAGS | s::AS_XHTML
335
+ html = @html.serialize(:save_with => save_opts)
336
+ return html
337
+ end
322
338
  end
323
339
 
324
340
  def get_article(candidates, best_candidate)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dq-readability
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Prateek Papriwal
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-12 00:00:00.000000000 Z
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -135,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
135
  version: '0'
136
136
  requirements: []
137
137
  rubyforge_project: dq-readability
138
- rubygems_version: 2.1.11
138
+ rubygems_version: 2.2.2
139
139
  signing_key:
140
140
  specification_version: 4
141
141
  summary: Port of arc90's readability project to ruby