algolia_html_extractor 2.5.2 → 2.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 0fa007eca9fdbcf5c0774e04f78f05cdd370622f
4
- data.tar.gz: 2d926696b683b4481bbc531f7dadcf129511e2d3
2
+ SHA256:
3
+ metadata.gz: 1496286d1762c4231f9f38e42549b0128c53d1ff5cf86ffc5b0da04c3c9934bb
4
+ data.tar.gz: 6af72f32fa6e8e86064677688a2268a02a1ad2a8bdd9fc44146f2e72f527eb3f
5
5
  SHA512:
6
- metadata.gz: 40372848ec0a92e1b826ac651b69a075840b32921c46a060c675592c0038342c4e8548c0d42ab0e6353f0d0fae1f7cf2b844bae0aa414a3f5f4f1f5cba51840a
7
- data.tar.gz: f847e9ac884dba1331c64b8f3e611de3a9a5d221606fc885c05822deeb37b719c493d5c8e3e2f8d4a8dc032d98bf860856b03bbe34f084216a9b960b3579a2ee
6
+ metadata.gz: 70bdd12e5e15d62cf9c5a5c992180b2491c090cdcca4e671ff8039c7ae91c91817fd38dcf6a4b54e09f59082dfa2157cd4ca44513cf83c64aad8e1f8c771a258
7
+ data.tar.gz: b5111d1f7fbc948a1186daf0c2c3760cb2a6ea0a7b1b64ae677f96f4ff849a8e99a30f31efb75807e1d509f131c236f6c750a4f94384180df8b70c0736b3c2c1
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'nokogiri'
2
4
  require 'digest/md5'
3
5
 
@@ -8,7 +10,8 @@ module AlgoliaHTMLExtractor
8
10
  def self.default_options(options)
9
11
  default_options = {
10
12
  css_selector: 'p',
11
- heading_selector: 'h1,h2,h3,h4,h5,h6'
13
+ heading_selector: 'h1,h2,h3,h4,h5,h6',
14
+ tags_to_exclude: ''
12
15
  }
13
16
  default_options.merge(options)
14
17
  end
@@ -22,6 +25,7 @@ module AlgoliaHTMLExtractor
22
25
  options = default_options(options)
23
26
  heading_selector = options[:heading_selector]
24
27
  css_selector = options[:css_selector]
28
+ tags_to_exclude = options[:tags_to_exclude]
25
29
 
26
30
  items = []
27
31
  current_hierarchy = {
@@ -56,6 +60,9 @@ module AlgoliaHTMLExtractor
56
60
  # Stop if node is not to be extracted
57
61
  next unless node.matches?(css_selector)
58
62
 
63
+ # Removing excluded child from the node
64
+ node.search(tags_to_exclude).each(&:remove) unless tags_to_exclude.empty?
65
+
59
66
  # Stop if node is empty
60
67
  content = extract_text(node)
61
68
  next if content.empty?
@@ -148,6 +155,7 @@ module AlgoliaHTMLExtractor
148
155
  def self.heading_weight(heading_level)
149
156
  weight = 100
150
157
  return weight if heading_level.nil?
158
+
151
159
  weight - ((heading_level + 1) * 10)
152
160
  end
153
161
  end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Expose gem version
2
4
  # rubocop:disable Style/SingleLineMethods
3
5
  class AlgoliaHTMLExtractorVersion
4
- def self.to_s; '2.5.2' end
6
+ def self.to_s; '2.6.4' end
5
7
  end
6
8
  # rubocop:enable Style/SingleLineMethods
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algolia_html_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.2
4
+ version: 2.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-20 00:00:00.000000000 Z
11
+ date: 2021-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 1.8.2
33
+ version: '1.10'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 1.8.2
40
+ version: '1.10'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: coveralls
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -180,7 +180,7 @@ homepage: https://github.com/algolia/html-extractor
180
180
  licenses:
181
181
  - MIT
182
182
  metadata: {}
183
- post_install_message:
183
+ post_install_message:
184
184
  rdoc_options: []
185
185
  require_paths:
186
186
  - lib
@@ -195,9 +195,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
195
195
  - !ruby/object:Gem::Version
196
196
  version: '0'
197
197
  requirements: []
198
- rubyforge_project:
199
- rubygems_version: 2.6.13
200
- signing_key:
198
+ rubygems_version: 3.1.2
199
+ signing_key:
201
200
  specification_version: 4
202
201
  summary: Convert HTML content into Algolia records
203
202
  test_files: []