algolia_html_extractor 2.5.2 → 2.6.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 0fa007eca9fdbcf5c0774e04f78f05cdd370622f
4
- data.tar.gz: 2d926696b683b4481bbc531f7dadcf129511e2d3
2
+ SHA256:
3
+ metadata.gz: 1496286d1762c4231f9f38e42549b0128c53d1ff5cf86ffc5b0da04c3c9934bb
4
+ data.tar.gz: 6af72f32fa6e8e86064677688a2268a02a1ad2a8bdd9fc44146f2e72f527eb3f
5
5
  SHA512:
6
- metadata.gz: 40372848ec0a92e1b826ac651b69a075840b32921c46a060c675592c0038342c4e8548c0d42ab0e6353f0d0fae1f7cf2b844bae0aa414a3f5f4f1f5cba51840a
7
- data.tar.gz: f847e9ac884dba1331c64b8f3e611de3a9a5d221606fc885c05822deeb37b719c493d5c8e3e2f8d4a8dc032d98bf860856b03bbe34f084216a9b960b3579a2ee
6
+ metadata.gz: 70bdd12e5e15d62cf9c5a5c992180b2491c090cdcca4e671ff8039c7ae91c91817fd38dcf6a4b54e09f59082dfa2157cd4ca44513cf83c64aad8e1f8c771a258
7
+ data.tar.gz: b5111d1f7fbc948a1186daf0c2c3760cb2a6ea0a7b1b64ae677f96f4ff849a8e99a30f31efb75807e1d509f131c236f6c750a4f94384180df8b70c0736b3c2c1
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'nokogiri'
2
4
  require 'digest/md5'
3
5
 
@@ -8,7 +10,8 @@ module AlgoliaHTMLExtractor
8
10
  def self.default_options(options)
9
11
  default_options = {
10
12
  css_selector: 'p',
11
- heading_selector: 'h1,h2,h3,h4,h5,h6'
13
+ heading_selector: 'h1,h2,h3,h4,h5,h6',
14
+ tags_to_exclude: ''
12
15
  }
13
16
  default_options.merge(options)
14
17
  end
@@ -22,6 +25,7 @@ module AlgoliaHTMLExtractor
22
25
  options = default_options(options)
23
26
  heading_selector = options[:heading_selector]
24
27
  css_selector = options[:css_selector]
28
+ tags_to_exclude = options[:tags_to_exclude]
25
29
 
26
30
  items = []
27
31
  current_hierarchy = {
@@ -56,6 +60,9 @@ module AlgoliaHTMLExtractor
56
60
  # Stop if node is not to be extracted
57
61
  next unless node.matches?(css_selector)
58
62
 
63
+ # Removing excluded child from the node
64
+ node.search(tags_to_exclude).each(&:remove) unless tags_to_exclude.empty?
65
+
59
66
  # Stop if node is empty
60
67
  content = extract_text(node)
61
68
  next if content.empty?
@@ -148,6 +155,7 @@ module AlgoliaHTMLExtractor
148
155
  def self.heading_weight(heading_level)
149
156
  weight = 100
150
157
  return weight if heading_level.nil?
158
+
151
159
  weight - ((heading_level + 1) * 10)
152
160
  end
153
161
  end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Expose gem version
2
4
  # rubocop:disable Style/SingleLineMethods
3
5
  class AlgoliaHTMLExtractorVersion
4
- def self.to_s; '2.5.2' end
6
+ def self.to_s; '2.6.4' end
5
7
  end
6
8
  # rubocop:enable Style/SingleLineMethods
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algolia_html_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.2
4
+ version: 2.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-20 00:00:00.000000000 Z
11
+ date: 2021-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 1.8.2
33
+ version: '1.10'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 1.8.2
40
+ version: '1.10'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: coveralls
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -180,7 +180,7 @@ homepage: https://github.com/algolia/html-extractor
180
180
  licenses:
181
181
  - MIT
182
182
  metadata: {}
183
- post_install_message:
183
+ post_install_message:
184
184
  rdoc_options: []
185
185
  require_paths:
186
186
  - lib
@@ -195,9 +195,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
195
195
  - !ruby/object:Gem::Version
196
196
  version: '0'
197
197
  requirements: []
198
- rubyforge_project:
199
- rubygems_version: 2.6.13
200
- signing_key:
198
+ rubygems_version: 3.1.2
199
+ signing_key:
201
200
  specification_version: 4
202
201
  summary: Convert HTML content into Algolia records
203
202
  test_files: []