dq-readability 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +8 -5
- data/dq-readability.gemspec +1 -1
- data/lib/dq-readability.rb +18 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YWVkZDMxNmZmZWIxMDk5MjI5N2Q1MTZkNDBlZGYyZDFmOTE2NzNhMw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YmQ0NTkzYzE1ZmRmNjQ3NzRkMDE2MmFjNDg3Y2VkMjY5ZDJiZDc4YQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2M5ZjRmODcyMzU0MWM3MGVlZGZlYmE4MjVmNmI5YmQ0ODRjZmY0OTM1MGRl
|
10
|
+
M2FiODJlZjU4YTUwYjgxNDI3Yjk4YjQ1ZjE3MTFmOTAzZmRhODdmOGNhZTRi
|
11
|
+
NmMyZDAxYzkxMTI1NWRlODg5ZjJjNWI1OTg3ZTBmNmU1YjU0ODc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OTc0YTZjOTc3Nzg1OTI4ZmE3Yzg1YmRjOGVkYzgwZjU4MmNjYWUyY2I3NTkz
|
14
|
+
Zjc3ZTFiM2U3N2M2OTNiZmRkMGZlMDM2N2ZiODQxYjI4OGZiOTg5YjU1ZGM5
|
15
|
+
OTM3NTkyYjA4MDdiMGEwZmUzOTFlZDdlODcyMTY3ZTVjNDdkMzQ=
|
data/README.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
Version
|
2
|
+
-------
|
3
|
+
1.0.4 released. Check out https://rubygems.org/gems/dq-readability
|
4
|
+
|
5
|
+
* competing structure for fighting invalid characters
|
6
|
+
* Wikipedia image case resolved
|
7
|
+
|
1
8
|
Install
|
2
9
|
-------
|
3
10
|
Command line:
|
@@ -12,8 +19,4 @@ Example
|
|
12
19
|
require 'rubygems'
|
13
20
|
require 'dq-readability'
|
14
21
|
source = "http://www.personal.kent.edu/~rmuhamma/Algorithms/MyAlgorithms/Sorting/radixSort.htm"
|
15
|
-
puts DQReadability::Document.new(source,:tags=>%w[div pre p h1 h2 h3 h4 td table tr b a img br li ul ol center br hr blockquote em strong sub sup font tbody span],:attributes=>%w[href src align width color height]).content
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
puts DQReadability::Document.new(source,:tags=>%w[div pre p h1 h2 h3 h4 td table tr b a img br li ul ol center br hr blockquote em strong sub sup font tbody tt span dl dd t code figure fieldset legend dir noscript],:attributes=>%w[href src align width color height]).content
|
data/dq-readability.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "dq-readability"
|
6
|
-
s.version = '1.0.
|
6
|
+
s.version = '1.0.5'
|
7
7
|
s.authors = ["Prateek Papriwal"]
|
8
8
|
s.email = ["papriwalprateek@gmail.com"]
|
9
9
|
s.homepage = "http://github.com/DaQwest/dq-readability"
|
data/lib/dq-readability.rb
CHANGED
@@ -17,7 +17,8 @@ module DQReadability
|
|
17
17
|
:remove_empty_nodes => true,
|
18
18
|
:min_image_width => 130,
|
19
19
|
:min_image_height => 80,
|
20
|
-
:ignore_image_format => []
|
20
|
+
:ignore_image_format => [],
|
21
|
+
:bypass => false
|
21
22
|
}.freeze
|
22
23
|
|
23
24
|
REGEXES = {
|
@@ -51,6 +52,7 @@ module DQReadability
|
|
51
52
|
@weight_classes = @options[:weight_classes]
|
52
53
|
@clean_conditionally = @options[:clean_conditionally]
|
53
54
|
@best_candidate_has_image = true
|
55
|
+
@bypass = @options[:bypass]
|
54
56
|
make_html
|
55
57
|
end
|
56
58
|
|
@@ -144,10 +146,16 @@ module DQReadability
|
|
144
146
|
end
|
145
147
|
end
|
146
148
|
rescue
|
147
|
-
elem['href'] = ""
|
148
149
|
end
|
149
150
|
end
|
150
151
|
|
152
|
+
# removing edit spans
|
153
|
+
|
154
|
+
@html.css('span').each do |elem|
|
155
|
+
if elem.text.downcase == "[edit]"
|
156
|
+
elem.remove
|
157
|
+
end
|
158
|
+
end
|
151
159
|
|
152
160
|
end
|
153
161
|
|
@@ -296,6 +304,7 @@ module DQReadability
|
|
296
304
|
end
|
297
305
|
|
298
306
|
def content(remove_unlikely_candidates = :default)
|
307
|
+
if @bypass == false
|
299
308
|
@remove_unlikely_candidates = false if remove_unlikely_candidates == false
|
300
309
|
|
301
310
|
prepare_candidates
|
@@ -319,6 +328,13 @@ module DQReadability
|
|
319
328
|
else
|
320
329
|
cleaned_article
|
321
330
|
end
|
331
|
+
else
|
332
|
+
make_html
|
333
|
+
s = Nokogiri::XML::Node::SaveOptions
|
334
|
+
save_opts = s::NO_DECLARATION | s::NO_EMPTY_TAGS | s::AS_XHTML
|
335
|
+
html = @html.serialize(:save_with => save_opts)
|
336
|
+
return html
|
337
|
+
end
|
322
338
|
end
|
323
339
|
|
324
340
|
def get_article(candidates, best_candidate)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dq-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Prateek Papriwal
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -135,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
135
|
version: '0'
|
136
136
|
requirements: []
|
137
137
|
rubyforge_project: dq-readability
|
138
|
-
rubygems_version: 2.
|
138
|
+
rubygems_version: 2.2.2
|
139
139
|
signing_key:
|
140
140
|
specification_version: 4
|
141
141
|
summary: Port of arc90's readability project to ruby
|