algolia_html_extractor 2.6.0 → 2.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/algolia_html_extractor.rb +6 -1
- data/lib/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf26475758c9b8aaccd120f0a9fbfec4d6446a24
|
4
|
+
data.tar.gz: 6aa13ec0687cd1949a0b1b2a7ed08b1686d50cc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f10c93aa21388d46178cac6019505b0f5e374df9a4374c27bb8b401c5c1e759c9b68b732d0cd8d599d893dec5d6ab7a8eda2df4db0bb05b12e8b0811d27a47a7
|
7
|
+
data.tar.gz: db353bf1f3ee22dc75d645a68c07de2633bddb1bb20d53dd15f118e924a8aaee3bb5956aac593642741d76660ccf80e8668c8b83ca73e1db31286f55c3fd1947
|
@@ -8,7 +8,8 @@ module AlgoliaHTMLExtractor
|
|
8
8
|
def self.default_options(options)
|
9
9
|
default_options = {
|
10
10
|
css_selector: 'p',
|
11
|
-
heading_selector: 'h1,h2,h3,h4,h5,h6'
|
11
|
+
heading_selector: 'h1,h2,h3,h4,h5,h6',
|
12
|
+
tags_to_exclude: ''
|
12
13
|
}
|
13
14
|
default_options.merge(options)
|
14
15
|
end
|
@@ -22,6 +23,7 @@ module AlgoliaHTMLExtractor
|
|
22
23
|
options = default_options(options)
|
23
24
|
heading_selector = options[:heading_selector]
|
24
25
|
css_selector = options[:css_selector]
|
26
|
+
tags_to_exclude = options[:tags_to_exclude]
|
25
27
|
|
26
28
|
items = []
|
27
29
|
current_hierarchy = {
|
@@ -56,6 +58,9 @@ module AlgoliaHTMLExtractor
|
|
56
58
|
# Stop if node is not to be extracted
|
57
59
|
next unless node.matches?(css_selector)
|
58
60
|
|
61
|
+
# Removing excluded child from the node
|
62
|
+
node.search(tags_to_exclude).each(&:remove) unless tags_to_exclude.empty?
|
63
|
+
|
59
64
|
# Stop if node is empty
|
60
65
|
content = extract_text(node)
|
61
66
|
next if content.empty?
|
data/lib/version.rb
CHANGED