dq-readability 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YWRhYWQ4YjNmMzVkOTAxYzgwNTI0NzExZWFkYzYyOTJmYzZlZWQyNA==
4
+ YWVkZDMxNmZmZWIxMDk5MjI5N2Q1MTZkNDBlZGYyZDFmOTE2NzNhMw==
5
5
  data.tar.gz: !binary |-
6
- ZDliZTE4MzE2NTVhNTliNDlkMTdkMDg1ZDNmMzc4MDljNjU3OTZhNw==
6
+ YmQ0NTkzYzE1ZmRmNjQ3NzRkMDE2MmFjNDg3Y2VkMjY5ZDJiZDc4YQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YWZlNTMwZjNmN2U3MzUxZjU4MDYyMzQ1ODAxNGY4MmUyMTVlMzI0YzJmODA2
10
- NDZmZjVhOTNlZTAxOWI0NTRiN2JmODZlNmQyZTEwYzYwYjIzNDY5NGUxYjQ0
11
- YTg1ZjYyYWYwMzQ2YzYyNjEwNTRlNTcxNWI5YjFjYmUxZWM5NDY=
9
+ M2M5ZjRmODcyMzU0MWM3MGVlZGZlYmE4MjVmNmI5YmQ0ODRjZmY0OTM1MGRl
10
+ M2FiODJlZjU4YTUwYjgxNDI3Yjk4YjQ1ZjE3MTFmOTAzZmRhODdmOGNhZTRi
11
+ NmMyZDAxYzkxMTI1NWRlODg5ZjJjNWI1OTg3ZTBmNmU1YjU0ODc=
12
12
  data.tar.gz: !binary |-
13
- ZDE3YTE0YmQzMjFjOWYwMTUwNDlmMTFjMGNkN2U4N2VmMzIyODA1MGEyMjFh
14
- ODE3OTYyYjEwOTllZDIwZWE0NTIyY2YzZDcxOTg3NTZlYTk5NDU5Njk2YmJl
15
- ZTRjY2JmZGE2MzYzNzhmNmI5NDU2ZGE4OWNlODlhNWNkMTU3ZDg=
13
+ OTc0YTZjOTc3Nzg1OTI4ZmE3Yzg1YmRjOGVkYzgwZjU4MmNjYWUyY2I3NTkz
14
+ Zjc3ZTFiM2U3N2M2OTNiZmRkMGZlMDM2N2ZiODQxYjI4OGZiOTg5YjU1ZGM5
15
+ OTM3NTkyYjA4MDdiMGEwZmUzOTFlZDdlODcyMTY3ZTVjNDdkMzQ=
data/README.md CHANGED
@@ -1,3 +1,10 @@
1
+ Version
2
+ -------
3
+ 1.0.4 released. Check out https://rubygems.org/gems/dq-readability
4
+
5
+ * competing structure for fighting invalid characters
6
+ * Wikipedia image case resolved
7
+
1
8
  Install
2
9
  -------
3
10
  Command line:
@@ -12,8 +19,4 @@ Example
12
19
  require 'rubygems'
13
20
  require 'dq-readability'
14
21
  source = "http://www.personal.kent.edu/~rmuhamma/Algorithms/MyAlgorithms/Sorting/radixSort.htm"
15
- puts DQReadability::Document.new(source,:tags=>%w[div pre p h1 h2 h3 h4 td table tr b a img br li ul ol center br hr blockquote em strong sub sup font tbody span],:attributes=>%w[href src align width color height]).content
16
-
17
-
18
-
19
-
22
+ puts DQReadability::Document.new(source,:tags=>%w[div pre p h1 h2 h3 h4 td table tr b a img br li ul ol center br hr blockquote em strong sub sup font tbody tt span dl dd t code figure fieldset legend dir noscript],:attributes=>%w[href src align width color height]).content
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "dq-readability"
6
- s.version = '1.0.4'
6
+ s.version = '1.0.5'
7
7
  s.authors = ["Prateek Papriwal"]
8
8
  s.email = ["papriwalprateek@gmail.com"]
9
9
  s.homepage = "http://github.com/DaQwest/dq-readability"
@@ -17,7 +17,8 @@ module DQReadability
17
17
  :remove_empty_nodes => true,
18
18
  :min_image_width => 130,
19
19
  :min_image_height => 80,
20
- :ignore_image_format => []
20
+ :ignore_image_format => [],
21
+ :bypass => false
21
22
  }.freeze
22
23
 
23
24
  REGEXES = {
@@ -51,6 +52,7 @@ module DQReadability
51
52
  @weight_classes = @options[:weight_classes]
52
53
  @clean_conditionally = @options[:clean_conditionally]
53
54
  @best_candidate_has_image = true
55
+ @bypass = @options[:bypass]
54
56
  make_html
55
57
  end
56
58
 
@@ -144,10 +146,16 @@ module DQReadability
144
146
  end
145
147
  end
146
148
  rescue
147
- elem['href'] = ""
148
149
  end
149
150
  end
150
151
 
152
+ # removing edit spans
153
+
154
+ @html.css('span').each do |elem|
155
+ if elem.text.downcase == "[edit]"
156
+ elem.remove
157
+ end
158
+ end
151
159
 
152
160
  end
153
161
 
@@ -296,6 +304,7 @@ module DQReadability
296
304
  end
297
305
 
298
306
  def content(remove_unlikely_candidates = :default)
307
+ if @bypass == false
299
308
  @remove_unlikely_candidates = false if remove_unlikely_candidates == false
300
309
 
301
310
  prepare_candidates
@@ -319,6 +328,13 @@ module DQReadability
319
328
  else
320
329
  cleaned_article
321
330
  end
331
+ else
332
+ make_html
333
+ s = Nokogiri::XML::Node::SaveOptions
334
+ save_opts = s::NO_DECLARATION | s::NO_EMPTY_TAGS | s::AS_XHTML
335
+ html = @html.serialize(:save_with => save_opts)
336
+ return html
337
+ end
322
338
  end
323
339
 
324
340
  def get_article(candidates, best_candidate)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dq-readability
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Prateek Papriwal
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-12 00:00:00.000000000 Z
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -135,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
135
  version: '0'
136
136
  requirements: []
137
137
  rubyforge_project: dq-readability
138
- rubygems_version: 2.1.11
138
+ rubygems_version: 2.2.2
139
139
  signing_key:
140
140
  specification_version: 4
141
141
  summary: Port of arc90's readability project to ruby