deba 0.7.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/deba.rb +6 -5
- data/lib/deba/extractor.rb +6 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b7f7bd1f9e68e59d2040b5641cd9a6cd7a04a35
|
4
|
+
data.tar.gz: a7b81e7fa90ea6ff05d00ed1607e85ec9726c2b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0644b62452652dd3edef34ca928560079eae026517abea537b0444b403cbf1d921ae56425f89f608573ed6abc099a7f125ca14174fce8b9090b798ab48d6e53d
|
7
|
+
data.tar.gz: 97f960ca1fb9b1cfc4c840876744c06374d4e046968b0e117eeda657630f776c9437a11c6f3f2e18c8dc88122a46dbddf16373f80a10a1dc782d636b9bafbcd2
|
data/lib/deba.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
|
3
3
|
module Deba
|
4
|
-
VERSION = "0.
|
4
|
+
VERSION = "0.8.0"
|
5
5
|
end
|
6
6
|
|
7
7
|
require "deba/utils"
|
@@ -15,11 +15,12 @@ require "deba/text_runner"
|
|
15
15
|
require "deba/extractor"
|
16
16
|
|
17
17
|
module Deba
|
18
|
-
def self.extract(html)
|
19
|
-
document(html).to_s
|
18
|
+
def self.extract(html, options = {})
|
19
|
+
document(html, options).to_s
|
20
20
|
end
|
21
21
|
|
22
|
-
def self.document(html)
|
23
|
-
|
22
|
+
def self.document(html, options = {})
|
23
|
+
doc = html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri.HTML(html)
|
24
|
+
Deba::Extractor.new(doc, options).extract
|
24
25
|
end
|
25
26
|
end
|
data/lib/deba/extractor.rb
CHANGED
@@ -5,8 +5,9 @@ class Deba::Extractor
|
|
5
5
|
|
6
6
|
attr_reader :blocks
|
7
7
|
|
8
|
-
def initialize(doc)
|
8
|
+
def initialize(doc, options = {})
|
9
9
|
@node = doc.root
|
10
|
+
@options = options
|
10
11
|
end
|
11
12
|
|
12
13
|
def extract
|
@@ -20,6 +21,10 @@ class Deba::Extractor
|
|
20
21
|
end
|
21
22
|
|
22
23
|
def process(node)
|
24
|
+
if @options.key?(:exclude)
|
25
|
+
return if Array(@options[:exclude]).any? { |selector| node.matches?(selector) }
|
26
|
+
end
|
27
|
+
|
23
28
|
node_name = node.name.downcase
|
24
29
|
|
25
30
|
return if node_name == 'head'
|