algolia_html_extractor 2.5.1 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 9944d514907702afbbcf6c176fa83c961bee97ee
4
- data.tar.gz: abe9a382fb695023abd965f0e5809311400da1c3
2
+ SHA256:
3
+ metadata.gz: ed81071c2031fcf2aab6a94b3e7e71b6348b00fd839638de3688a50209dde639
4
+ data.tar.gz: 5ccba85a1982abae971c624ce889aa11c5973e205c56adba7a351c9cf0baf902
5
5
  SHA512:
6
- metadata.gz: 194f4e3fe482f40c1dfb43e40352eea7d24f822870d0686b047a0e87d01d4270621a4ffa2c81fc86334b3f75a6801c05e0c799a8d03f9c2b29dcc440140b600c
7
- data.tar.gz: 7c0b204aef8574ef685c65c1198d5919b00e5936fbc8d90f0101c6f2c0b07d6c630743566e2e935824cbb638c300bd4fccecc219217792d2693c3e265970ead6
6
+ metadata.gz: 38afb9eec2aba6c22a10caf459979587ac3ddd3771b5ed45ca22e039f8e589b45f82a3997bd289b302dae41e8afb0a102b5fa2e12248b4627c32973712a302d6
7
+ data.tar.gz: b2a3f64fd9c20d6f5d0e7e823db5ce069071417a37a9bc114ef293a50db8d4cc8e7911afc811b413178fa2c863aeb68d30f5a64b9c0d2d691ab66916038c54ad
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'nokogiri'
2
4
  require 'digest/md5'
3
5
 
@@ -8,7 +10,8 @@ module AlgoliaHTMLExtractor
8
10
  def self.default_options(options)
9
11
  default_options = {
10
12
  css_selector: 'p',
11
- heading_selector: 'h1,h2,h3,h4,h5,h6'
13
+ heading_selector: 'h1,h2,h3,h4,h5,h6',
14
+ tags_to_exclude: ''
12
15
  }
13
16
  default_options.merge(options)
14
17
  end
@@ -22,6 +25,7 @@ module AlgoliaHTMLExtractor
22
25
  options = default_options(options)
23
26
  heading_selector = options[:heading_selector]
24
27
  css_selector = options[:css_selector]
28
+ tags_to_exclude = options[:tags_to_exclude]
25
29
 
26
30
  items = []
27
31
  current_hierarchy = {
@@ -56,6 +60,9 @@ module AlgoliaHTMLExtractor
56
60
  # Stop if node is not to be extracted
57
61
  next unless node.matches?(css_selector)
58
62
 
63
+ # Removing excluded child from the node
64
+ node.search(tags_to_exclude).each(&:remove) unless tags_to_exclude.empty?
65
+
59
66
  # Stop if node is empty
60
67
  content = extract_text(node)
61
68
  next if content.empty?
@@ -148,6 +155,7 @@ module AlgoliaHTMLExtractor
148
155
  def self.heading_weight(heading_level)
149
156
  weight = 100
150
157
  return weight if heading_level.nil?
158
+
151
159
  weight - ((heading_level + 1) * 10)
152
160
  end
153
161
  end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Expose gem version
2
4
  # rubocop:disable Style/SingleLineMethods
3
5
  class AlgoliaHTMLExtractorVersion
4
- def self.to_s; '2.5.1' end
6
+ def self.to_s; '2.6.3' end
5
7
  end
6
8
  # rubocop:enable Style/SingleLineMethods
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algolia_html_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.1
4
+ version: 2.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-13 00:00:00.000000000 Z
11
+ date: 2021-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: awesome_print
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.6'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '1.6'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: json
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -42,16 +28,16 @@ dependencies:
42
28
  name: nokogiri
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - "~>"
31
+ - - ">="
46
32
  - !ruby/object:Gem::Version
47
- version: 1.8.2
33
+ version: 1.10.4
48
34
  type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - "~>"
38
+ - - ">="
53
39
  - !ruby/object:Gem::Version
54
- version: 1.8.2
40
+ version: 1.10.4
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: coveralls
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -194,7 +180,7 @@ homepage: https://github.com/algolia/html-extractor
194
180
  licenses:
195
181
  - MIT
196
182
  metadata: {}
197
- post_install_message:
183
+ post_install_message:
198
184
  rdoc_options: []
199
185
  require_paths:
200
186
  - lib
@@ -209,9 +195,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
209
195
  - !ruby/object:Gem::Version
210
196
  version: '0'
211
197
  requirements: []
212
- rubyforge_project:
213
- rubygems_version: 2.6.13
214
- signing_key:
198
+ rubygems_version: 3.1.2
199
+ signing_key:
215
200
  specification_version: 4
216
201
  summary: Convert HTML content into Algolia records
217
202
  test_files: []