algolia_html_extractor 2.5.1 → 2.6.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 9944d514907702afbbcf6c176fa83c961bee97ee
4
- data.tar.gz: abe9a382fb695023abd965f0e5809311400da1c3
2
+ SHA256:
3
+ metadata.gz: ed81071c2031fcf2aab6a94b3e7e71b6348b00fd839638de3688a50209dde639
4
+ data.tar.gz: 5ccba85a1982abae971c624ce889aa11c5973e205c56adba7a351c9cf0baf902
5
5
  SHA512:
6
- metadata.gz: 194f4e3fe482f40c1dfb43e40352eea7d24f822870d0686b047a0e87d01d4270621a4ffa2c81fc86334b3f75a6801c05e0c799a8d03f9c2b29dcc440140b600c
7
- data.tar.gz: 7c0b204aef8574ef685c65c1198d5919b00e5936fbc8d90f0101c6f2c0b07d6c630743566e2e935824cbb638c300bd4fccecc219217792d2693c3e265970ead6
6
+ metadata.gz: 38afb9eec2aba6c22a10caf459979587ac3ddd3771b5ed45ca22e039f8e589b45f82a3997bd289b302dae41e8afb0a102b5fa2e12248b4627c32973712a302d6
7
+ data.tar.gz: b2a3f64fd9c20d6f5d0e7e823db5ce069071417a37a9bc114ef293a50db8d4cc8e7911afc811b413178fa2c863aeb68d30f5a64b9c0d2d691ab66916038c54ad
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'nokogiri'
2
4
  require 'digest/md5'
3
5
 
@@ -8,7 +10,8 @@ module AlgoliaHTMLExtractor
8
10
  def self.default_options(options)
9
11
  default_options = {
10
12
  css_selector: 'p',
11
- heading_selector: 'h1,h2,h3,h4,h5,h6'
13
+ heading_selector: 'h1,h2,h3,h4,h5,h6',
14
+ tags_to_exclude: ''
12
15
  }
13
16
  default_options.merge(options)
14
17
  end
@@ -22,6 +25,7 @@ module AlgoliaHTMLExtractor
22
25
  options = default_options(options)
23
26
  heading_selector = options[:heading_selector]
24
27
  css_selector = options[:css_selector]
28
+ tags_to_exclude = options[:tags_to_exclude]
25
29
 
26
30
  items = []
27
31
  current_hierarchy = {
@@ -56,6 +60,9 @@ module AlgoliaHTMLExtractor
56
60
  # Stop if node is not to be extracted
57
61
  next unless node.matches?(css_selector)
58
62
 
63
+ # Removing excluded child from the node
64
+ node.search(tags_to_exclude).each(&:remove) unless tags_to_exclude.empty?
65
+
59
66
  # Stop if node is empty
60
67
  content = extract_text(node)
61
68
  next if content.empty?
@@ -148,6 +155,7 @@ module AlgoliaHTMLExtractor
148
155
  def self.heading_weight(heading_level)
149
156
  weight = 100
150
157
  return weight if heading_level.nil?
158
+
151
159
  weight - ((heading_level + 1) * 10)
152
160
  end
153
161
  end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Expose gem version
2
4
  # rubocop:disable Style/SingleLineMethods
3
5
  class AlgoliaHTMLExtractorVersion
4
- def self.to_s; '2.5.1' end
6
+ def self.to_s; '2.6.3' end
5
7
  end
6
8
  # rubocop:enable Style/SingleLineMethods
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algolia_html_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.1
4
+ version: 2.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-13 00:00:00.000000000 Z
11
+ date: 2021-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: awesome_print
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.6'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '1.6'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: json
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -42,16 +28,16 @@ dependencies:
42
28
  name: nokogiri
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - "~>"
31
+ - - ">="
46
32
  - !ruby/object:Gem::Version
47
- version: 1.8.2
33
+ version: 1.10.4
48
34
  type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - "~>"
38
+ - - ">="
53
39
  - !ruby/object:Gem::Version
54
- version: 1.8.2
40
+ version: 1.10.4
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: coveralls
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -194,7 +180,7 @@ homepage: https://github.com/algolia/html-extractor
194
180
  licenses:
195
181
  - MIT
196
182
  metadata: {}
197
- post_install_message:
183
+ post_install_message:
198
184
  rdoc_options: []
199
185
  require_paths:
200
186
  - lib
@@ -209,9 +195,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
209
195
  - !ruby/object:Gem::Version
210
196
  version: '0'
211
197
  requirements: []
212
- rubyforge_project:
213
- rubygems_version: 2.6.13
214
- signing_key:
198
+ rubygems_version: 3.1.2
199
+ signing_key:
215
200
  specification_version: 4
216
201
  summary: Convert HTML content into Algolia records
217
202
  test_files: []