algolia_html_extractor 2.6.0 → 2.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 550c012af975860c7a34893843737d48952bff0a
4
- data.tar.gz: 46dd5bd764f68770a2f19a33db9561670cb77fb5
3
+ metadata.gz: cf26475758c9b8aaccd120f0a9fbfec4d6446a24
4
+ data.tar.gz: 6aa13ec0687cd1949a0b1b2a7ed08b1686d50cc1
5
5
  SHA512:
6
- metadata.gz: ec5094c285d211d63154e532faaa5de6e55662facc0dd747ca5a2c113313fb12c980d1eb19b0baf9ef9843cf63a6bbfd20bb2119c2b3c1d31e6744f3a0a13de1
7
- data.tar.gz: 2620c162a481ef1bcd922644faa3a7d01fc3d206b98743c197358242690086b149c3ce85889a777bfaa8a4726835798f0f702f8dc3e2957a9b0f25f8d8511f88
6
+ metadata.gz: f10c93aa21388d46178cac6019505b0f5e374df9a4374c27bb8b401c5c1e759c9b68b732d0cd8d599d893dec5d6ab7a8eda2df4db0bb05b12e8b0811d27a47a7
7
+ data.tar.gz: db353bf1f3ee22dc75d645a68c07de2633bddb1bb20d53dd15f118e924a8aaee3bb5956aac593642741d76660ccf80e8668c8b83ca73e1db31286f55c3fd1947
@@ -8,7 +8,8 @@ module AlgoliaHTMLExtractor
8
8
  def self.default_options(options)
9
9
  default_options = {
10
10
  css_selector: 'p',
11
- heading_selector: 'h1,h2,h3,h4,h5,h6'
11
+ heading_selector: 'h1,h2,h3,h4,h5,h6',
12
+ tags_to_exclude: ''
12
13
  }
13
14
  default_options.merge(options)
14
15
  end
@@ -22,6 +23,7 @@ module AlgoliaHTMLExtractor
22
23
  options = default_options(options)
23
24
  heading_selector = options[:heading_selector]
24
25
  css_selector = options[:css_selector]
26
+ tags_to_exclude = options[:tags_to_exclude]
25
27
 
26
28
  items = []
27
29
  current_hierarchy = {
@@ -56,6 +58,9 @@ module AlgoliaHTMLExtractor
56
58
  # Stop if node is not to be extracted
57
59
  next unless node.matches?(css_selector)
58
60
 
61
+ # Removing excluded child from the node
62
+ node.search(tags_to_exclude).each(&:remove) unless tags_to_exclude.empty?
63
+
59
64
  # Stop if node is empty
60
65
  content = extract_text(node)
61
66
  next if content.empty?
@@ -1,6 +1,6 @@
1
1
  # Expose gem version
2
2
  # rubocop:disable Style/SingleLineMethods
3
3
  class AlgoliaHTMLExtractorVersion
4
- def self.to_s; '2.6.0' end
4
+ def self.to_s; '2.6.1' end
5
5
  end
6
6
  # rubocop:enable Style/SingleLineMethods
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algolia_html_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.0
4
+ version: 2.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry