algoliasearch-jekyll 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/algoliasearch-jekyll.rb +1 -11
- data/lib/push.rb +87 -148
- metadata +45 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1af115b167749491e0cfc3fbdfca8f7cbd0bfb1
|
4
|
+
data.tar.gz: 6cdc40cf3148a33400178ba9bb9076095adebd31
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 371291f704b4029819eb5dbb59de2e3ac2ac90c8973ae678fb1890824f3c9b4470782b1bfe8b7cbb528a374e80eeb4e274440c53a5561efc5bb3d703d4e19ead
|
7
|
+
data.tar.gz: c508df8e04d78ae5db324ef678cb27ae46f5e067030a45a373dc399f25eca244263ba3bddf998e35b965fed31fb374d1e565014e6fe7a06631f4e4831811b097
|
data/lib/algoliasearch-jekyll.rb
CHANGED
@@ -21,7 +21,7 @@ class AlgoliaSearchJekyll < Jekyll::Command
|
|
21
21
|
|
22
22
|
subcommand.action do |args, options|
|
23
23
|
@config = configuration_from_options(options)
|
24
|
-
AlgoliaSearchJekyllPush.
|
24
|
+
AlgoliaSearchJekyllPush.init_options(args, options, @config).process
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
@@ -39,15 +39,5 @@ class AlgoliaSearchJekyll < Jekyll::Command
|
|
39
39
|
command.option 'unpublished', '--unpublished',
|
40
40
|
'Index posts that were marked as unpublished'
|
41
41
|
end
|
42
|
-
|
43
|
-
def api_key
|
44
|
-
return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
|
45
|
-
key_file = File.join(@config['source'], '_algolia_api_key')
|
46
|
-
|
47
|
-
if File.exist?(key_file) && File.size(key_file) > 0
|
48
|
-
return File.open(key_file).read.strip
|
49
|
-
end
|
50
|
-
nil
|
51
|
-
end
|
52
42
|
end
|
53
43
|
end
|
data/lib/push.rb
CHANGED
@@ -1,52 +1,92 @@
|
|
1
1
|
require 'algoliasearch'
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'json'
|
4
|
+
require_relative './record_extractor.rb'
|
4
5
|
|
5
6
|
# `jekyll algolia push` command
|
6
7
|
class AlgoliaSearchJekyllPush < Jekyll::Command
|
7
8
|
class << self
|
9
|
+
attr_accessor :options, :config
|
10
|
+
|
8
11
|
def init_with_program(_prog)
|
9
12
|
end
|
10
13
|
|
11
|
-
|
14
|
+
# Init the command with options passed on the command line
|
15
|
+
# `jekyll algolia push ARG1 ARG2 --OPTION_NAME1 OPTION_VALUE1`
|
16
|
+
# config comes from _config.yml
|
17
|
+
def init_options(args = [], options = {}, config = {})
|
18
|
+
args = [] unless args
|
12
19
|
@args = args
|
13
20
|
@options = options
|
14
21
|
@config = config
|
15
22
|
|
23
|
+
# Allow for passing index name on the command line
|
16
24
|
index_name = args[0]
|
17
|
-
|
18
25
|
@config['algolia']['index_name'] = index_name if index_name
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
# Check if the specified file should be indexed (we exclude static files,
|
30
|
+
# robots.txt and custom defined exclusions).
|
31
|
+
def indexable?(file)
|
32
|
+
return false if file.is_a?(Jekyll::StaticFile)
|
33
|
+
|
34
|
+
# Keep only markdown and html files
|
35
|
+
allowed_extensions = %w(html)
|
36
|
+
if @config['markdown_ext']
|
37
|
+
allowed_extensions += @config['markdown_ext'].split(',')
|
38
|
+
end
|
39
|
+
current_extension = File.extname(file.name)[1..-1]
|
40
|
+
return false unless allowed_extensions.include?(current_extension)
|
41
|
+
|
42
|
+
# Exclude files manually excluded from config
|
43
|
+
excluded_files = @config['algolia']['excluded_files']
|
44
|
+
unless excluded_files.nil?
|
45
|
+
return false if excluded_files.include?(file.name)
|
46
|
+
end
|
47
|
+
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
# Run the default `jekyll build` command but overwrite the actual "write
|
52
|
+
# files on disk" part to instead push data to Algolia
|
53
|
+
def process
|
19
54
|
site = Jekyll::Site.new(@config)
|
20
55
|
|
21
|
-
# Instead of writing generated website to disk, we will push it to the
|
22
|
-
# index
|
23
56
|
def site.write
|
24
57
|
items = []
|
25
58
|
each_site_file do |file|
|
26
|
-
|
59
|
+
next unless AlgoliaSearchJekyllPush.indexable?(file)
|
60
|
+
|
61
|
+
new_items = AlgoliaSearchRecordExtractor.new(file).extract
|
27
62
|
next if new_items.nil?
|
28
63
|
items += new_items
|
29
64
|
end
|
30
65
|
AlgoliaSearchJekyllPush.push(items)
|
31
66
|
end
|
32
67
|
|
68
|
+
# This will call the build command by default, which will in turn call our
|
69
|
+
# custom .write method
|
33
70
|
site.process
|
34
71
|
end
|
35
72
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
return
|
41
|
-
return false unless file['title']
|
42
|
-
true
|
43
|
-
end
|
73
|
+
# Read the API key either from ENV or from an _algolia_api_key file in
|
74
|
+
# source folder
|
75
|
+
def api_key
|
76
|
+
# First read in ENV
|
77
|
+
return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
|
44
78
|
|
45
|
-
|
46
|
-
@config['
|
79
|
+
# Otherwise from file in source directory
|
80
|
+
key_file = File.join(@config['source'], '_algolia_api_key')
|
81
|
+
if File.exist?(key_file) && File.size(key_file) > 0
|
82
|
+
return File.open(key_file).read.strip
|
83
|
+
end
|
84
|
+
nil
|
47
85
|
end
|
48
86
|
|
49
|
-
|
87
|
+
# Check that all credentials are present, and stop with a helpfull message
|
88
|
+
# if not
|
89
|
+
def check_credentials
|
50
90
|
unless api_key
|
51
91
|
Jekyll.logger.error 'Algolia Error: No API key defined'
|
52
92
|
Jekyll.logger.warn ' You have two ways to configure your API key:'
|
@@ -56,30 +96,30 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|
56
96
|
exit 1
|
57
97
|
end
|
58
98
|
|
59
|
-
unless application_id
|
99
|
+
unless @config['algolia']['application_id']
|
60
100
|
Jekyll.logger.error 'Algolia Error: No application ID defined'
|
61
101
|
Jekyll.logger.warn ' Please set your application id in the '\
|
62
102
|
'_config.yml file, like so:'
|
63
|
-
|
103
|
+
Jekyll.logger.warn ''
|
64
104
|
# The spaces are needed otherwise the text is centered
|
65
105
|
Jekyll.logger.warn ' algolia: '
|
66
106
|
Jekyll.logger.warn ' application_id: \'{your_application_id}\''
|
67
|
-
|
107
|
+
Jekyll.logger.warn ''
|
68
108
|
Jekyll.logger.warn ' Your application ID can be found in your algolia'\
|
69
109
|
' dashboard'
|
70
110
|
Jekyll.logger.warn ' https://www.algolia.com/licensing'
|
71
111
|
exit 1
|
72
112
|
end
|
73
113
|
|
74
|
-
unless index_name
|
114
|
+
unless @config['algolia']['index_name']
|
75
115
|
Jekyll.logger.error 'Algolia Error: No index name defined'
|
76
116
|
Jekyll.logger.warn ' Please set your index name in the _config.yml'\
|
77
117
|
' file, like so:'
|
78
|
-
|
118
|
+
Jekyll.logger.warn ''
|
79
119
|
# The spaces are needed otherwise the text is centered
|
80
120
|
Jekyll.logger.warn ' algolia: '
|
81
121
|
Jekyll.logger.warn ' index_name: \'{your_index_name}\''
|
82
|
-
|
122
|
+
Jekyll.logger.warn ''
|
83
123
|
Jekyll.logger.warn ' You can edit your indices in your dashboard'
|
84
124
|
Jekyll.logger.warn ' https://www.algolia.com/explorer'
|
85
125
|
exit 1
|
@@ -87,10 +127,13 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|
87
127
|
true
|
88
128
|
end
|
89
129
|
|
130
|
+
# Get index settings
|
90
131
|
def configure_index(index)
|
91
|
-
|
92
|
-
|
93
|
-
|
132
|
+
settings = {
|
133
|
+
typoTolerance: true,
|
134
|
+
distinct: true,
|
135
|
+
attributeForDistinct: 'title',
|
136
|
+
attributesForFaceting: %w(tags type title),
|
94
137
|
attributesToIndex: %w(
|
95
138
|
title h1 h2 h3 h4 h5 h6
|
96
139
|
unordered(text)
|
@@ -98,143 +141,39 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|
98
141
|
),
|
99
142
|
attributesToRetrieve: %w(
|
100
143
|
title h1 h2 h3 h4 h5 h6
|
101
|
-
posted_at
|
102
|
-
content
|
103
|
-
text
|
104
144
|
url
|
145
|
+
tag_name
|
146
|
+
raw_html
|
147
|
+
text
|
148
|
+
posted_at
|
105
149
|
css_selector
|
150
|
+
css_selector_parent
|
106
151
|
),
|
107
152
|
customRanking: ['desc(posted_at)', 'desc(title_weight)'],
|
108
|
-
distinct: true,
|
109
153
|
highlightPreTag: '<span class="algolia__result-highlight">',
|
110
154
|
highlightPostTag: '</span>'
|
111
155
|
}
|
112
|
-
custom_settings = {}
|
113
|
-
@config['algolia']['settings'].each do |key, value|
|
114
|
-
custom_settings[key.to_sym] = value
|
115
|
-
end
|
116
|
-
settings = default_settings.merge(custom_settings)
|
117
|
-
|
118
|
-
index.set_settings(settings)
|
119
|
-
end
|
120
|
-
|
121
|
-
def get_items_from_file(file)
|
122
|
-
is_page = file.is_a?(Jekyll::Page)
|
123
|
-
is_post = file.is_a?(Jekyll::Post)
|
124
|
-
|
125
|
-
# We only index posts, and markdown pages
|
126
|
-
return nil unless is_page || is_post
|
127
|
-
return nil if is_page && !parseable?(file)
|
128
|
-
return nil if excluded_file?(file)
|
129
|
-
|
130
|
-
html = file.content.gsub("\n", ' ')
|
131
|
-
|
132
|
-
if is_post
|
133
|
-
tags = get_tags_from_post(file)
|
134
|
-
base_data = {
|
135
|
-
type: 'post',
|
136
|
-
parent_id: file.id,
|
137
|
-
url: file.url,
|
138
|
-
title: file.title,
|
139
|
-
tags: tags,
|
140
|
-
slug: file.slug,
|
141
|
-
posted_at: file.date.to_time.to_i
|
142
|
-
}
|
143
|
-
else
|
144
|
-
base_data = {
|
145
|
-
type: 'page',
|
146
|
-
parent_id: file.basename,
|
147
|
-
url: file.url,
|
148
|
-
title: file['title'],
|
149
|
-
slug: file.basename
|
150
|
-
}
|
151
|
-
end
|
152
|
-
|
153
|
-
get_paragraphs_from_html(html, base_data)
|
154
|
-
end
|
155
|
-
|
156
|
-
# Get a list of tags from a post. Handle both classic string tags or
|
157
|
-
# extended object tags
|
158
|
-
def get_tags_from_post(post)
|
159
|
-
tags = post.tags
|
160
|
-
return [] if tags.is_a?(Array) || tags.nil?
|
161
|
-
tags.map! { |tag| tag.to_s.gsub(',', '') }
|
162
|
-
end
|
163
|
-
|
164
|
-
# Get the list of headings (h1, h2, etc) above the specified node
|
165
|
-
def get_previous_hx(node, memo = { level: 7 })
|
166
|
-
previous = node.previous_sibling
|
167
|
-
# Stop if no previous element
|
168
|
-
unless previous
|
169
|
-
memo.delete(:level)
|
170
|
-
return memo
|
171
|
-
end
|
172
|
-
|
173
|
-
# Skip non-html elements
|
174
|
-
return get_previous_hx(previous, memo) unless previous.element?
|
175
156
|
|
176
|
-
#
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
157
|
+
# Merge default settings with user custom ones
|
158
|
+
if @config['algolia'].key?('settings')
|
159
|
+
custom_settings = {}
|
160
|
+
@config['algolia']['settings'].each do |key, value|
|
161
|
+
custom_settings[key.to_sym] = value
|
162
|
+
end
|
163
|
+
settings.merge!(custom_settings)
|
181
164
|
end
|
182
165
|
|
183
|
-
|
184
|
-
title_level = tag_name.gsub('h', '').to_i
|
185
|
-
return get_previous_hx(previous, memo) if title_level >= memo[:level]
|
186
|
-
memo[:level] = title_level
|
187
|
-
|
188
|
-
# Add to the memo and continue
|
189
|
-
memo[tag_name.to_sym] = previous.content
|
190
|
-
get_previous_hx(previous, memo)
|
191
|
-
end
|
192
|
-
|
193
|
-
# Get a custom value representing the number of word occurence from the
|
194
|
-
# titles into the content
|
195
|
-
def get_title_weight(content, item)
|
196
|
-
# Get list of words
|
197
|
-
words = %i(title h1 h2 h3 h4 h5 h6)
|
198
|
-
.select { |title| item.key?(title) }
|
199
|
-
.map { |title| item[title].split(/\W+/) }
|
200
|
-
.flatten
|
201
|
-
.compact
|
202
|
-
.uniq
|
203
|
-
# Count how many words are in the text
|
204
|
-
weight = 0
|
205
|
-
words.each { |word| weight += 1 if content.include?(word) }
|
206
|
-
weight
|
207
|
-
end
|
208
|
-
|
209
|
-
# Will get a unique css selector for the node
|
210
|
-
def get_css_selector(node)
|
211
|
-
node.css_path.gsub('html > body > ', '')
|
212
|
-
end
|
213
|
-
|
214
|
-
# Get a list of items representing the different paragraphs
|
215
|
-
def get_paragraphs_from_html(html, base_data)
|
216
|
-
doc = Nokogiri::HTML(html)
|
217
|
-
paragraphs = doc.css('p').map.with_index do |p, index|
|
218
|
-
next unless p.text.size > 0
|
219
|
-
new_item = base_data.clone
|
220
|
-
new_item.merge!(get_previous_hx(p))
|
221
|
-
new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
|
222
|
-
new_item[:css_selector] = get_css_selector(p)
|
223
|
-
new_item[:raw_html] = p.to_s
|
224
|
-
new_item[:text] = p.content
|
225
|
-
new_item[:title_weight] = get_title_weight(p.text, new_item)
|
226
|
-
new_item
|
227
|
-
end
|
228
|
-
paragraphs.compact
|
166
|
+
index.set_settings(settings)
|
229
167
|
end
|
230
168
|
|
231
169
|
def push(items)
|
232
|
-
|
233
|
-
application_id = @config['algolia']['application_id']
|
234
|
-
index_name = @config['algolia']['index_name']
|
235
|
-
check_credentials(api_key, application_id, index_name)
|
170
|
+
check_credentials
|
236
171
|
|
237
|
-
|
172
|
+
index_name = @config['algolia']['index_name']
|
173
|
+
Algolia.init(
|
174
|
+
application_id: @config['algolia']['application_id'],
|
175
|
+
api_key: api_key
|
176
|
+
)
|
238
177
|
index = Algolia::Index.new(index_name)
|
239
178
|
configure_index(index)
|
240
179
|
index.clear_index
|
metadata
CHANGED
@@ -1,15 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: algoliasearch-jekyll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Carry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: jekyll
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: guard-rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '4.6'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.6'
|
13
55
|
- !ruby/object:Gem::Dependency
|
14
56
|
name: nokogiri
|
15
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +109,7 @@ dependencies:
|
|
67
109
|
- !ruby/object:Gem::Version
|
68
110
|
version: '1.4'
|
69
111
|
description: Index all your pages and posts to an Algolia index with `jekyll algolia
|
70
|
-
|
112
|
+
push`
|
71
113
|
email: tim@pixelastic.com
|
72
114
|
executables: []
|
73
115
|
extensions: []
|