algolia_html_extractor 2.5.0 → 2.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/algolia_html_extractor.rb +3 -3
- data/lib/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9944d514907702afbbcf6c176fa83c961bee97ee
|
4
|
+
data.tar.gz: abe9a382fb695023abd965f0e5809311400da1c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 194f4e3fe482f40c1dfb43e40352eea7d24f822870d0686b047a0e87d01d4270621a4ffa2c81fc86334b3f75a6801c05e0c799a8d03f9c2b29dcc440140b600c
|
7
|
+
data.tar.gz: 7c0b204aef8574ef685c65c1198d5919b00e5936fbc8d90f0101c6f2c0b07d6c630743566e2e935824cbb638c300bd4fccecc219217792d2693c3e265970ead6
|
@@ -2,7 +2,7 @@ require 'nokogiri'
|
|
2
2
|
require 'digest/md5'
|
3
3
|
|
4
4
|
# Extract content from an HTML page in the form of items with associated
|
5
|
-
#
|
5
|
+
# headings data
|
6
6
|
module AlgoliaHTMLExtractor
|
7
7
|
# Extractor options, applying default options when none set
|
8
8
|
def self.default_options(options)
|
@@ -33,7 +33,7 @@ module AlgoliaHTMLExtractor
|
|
33
33
|
lvl5: nil
|
34
34
|
}
|
35
35
|
current_position = 0 # Position of the DOM node in the tree
|
36
|
-
current_lvl = nil # Current closest
|
36
|
+
current_lvl = nil # Current closest headings level
|
37
37
|
current_anchor = nil # Current closest anchor
|
38
38
|
|
39
39
|
# We select all nodes that match either the headings or the elements to
|
@@ -63,7 +63,7 @@ module AlgoliaHTMLExtractor
|
|
63
63
|
item = {
|
64
64
|
html: extract_html(node),
|
65
65
|
content: content,
|
66
|
-
|
66
|
+
headings: current_hierarchy.values.compact,
|
67
67
|
anchor: current_anchor,
|
68
68
|
node: node,
|
69
69
|
custom_ranking: {
|
data/lib/version.rb
CHANGED