algoliasearch-jekyll 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/algoliasearch-jekyll.rb +1 -11
- data/lib/push.rb +87 -148
- metadata +45 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1af115b167749491e0cfc3fbdfca8f7cbd0bfb1
|
4
|
+
data.tar.gz: 6cdc40cf3148a33400178ba9bb9076095adebd31
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 371291f704b4029819eb5dbb59de2e3ac2ac90c8973ae678fb1890824f3c9b4470782b1bfe8b7cbb528a374e80eeb4e274440c53a5561efc5bb3d703d4e19ead
|
7
|
+
data.tar.gz: c508df8e04d78ae5db324ef678cb27ae46f5e067030a45a373dc399f25eca244263ba3bddf998e35b965fed31fb374d1e565014e6fe7a06631f4e4831811b097
|
data/lib/algoliasearch-jekyll.rb
CHANGED
@@ -21,7 +21,7 @@ class AlgoliaSearchJekyll < Jekyll::Command
|
|
21
21
|
|
22
22
|
subcommand.action do |args, options|
|
23
23
|
@config = configuration_from_options(options)
|
24
|
-
AlgoliaSearchJekyllPush.
|
24
|
+
AlgoliaSearchJekyllPush.init_options(args, options, @config).process
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
@@ -39,15 +39,5 @@ class AlgoliaSearchJekyll < Jekyll::Command
|
|
39
39
|
command.option 'unpublished', '--unpublished',
|
40
40
|
'Index posts that were marked as unpublished'
|
41
41
|
end
|
42
|
-
|
43
|
-
def api_key
|
44
|
-
return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
|
45
|
-
key_file = File.join(@config['source'], '_algolia_api_key')
|
46
|
-
|
47
|
-
if File.exist?(key_file) && File.size(key_file) > 0
|
48
|
-
return File.open(key_file).read.strip
|
49
|
-
end
|
50
|
-
nil
|
51
|
-
end
|
52
42
|
end
|
53
43
|
end
|
data/lib/push.rb
CHANGED
@@ -1,52 +1,92 @@
|
|
1
1
|
require 'algoliasearch'
|
2
2
|
require 'nokogiri'
|
3
3
|
require 'json'
|
4
|
+
require_relative './record_extractor.rb'
|
4
5
|
|
5
6
|
# `jekyll algolia push` command
|
6
7
|
class AlgoliaSearchJekyllPush < Jekyll::Command
|
7
8
|
class << self
|
9
|
+
attr_accessor :options, :config
|
10
|
+
|
8
11
|
def init_with_program(_prog)
|
9
12
|
end
|
10
13
|
|
11
|
-
|
14
|
+
# Init the command with options passed on the command line
|
15
|
+
# `jekyll algolia push ARG1 ARG2 --OPTION_NAME1 OPTION_VALUE1`
|
16
|
+
# config comes from _config.yml
|
17
|
+
def init_options(args = [], options = {}, config = {})
|
18
|
+
args = [] unless args
|
12
19
|
@args = args
|
13
20
|
@options = options
|
14
21
|
@config = config
|
15
22
|
|
23
|
+
# Allow for passing index name on the command line
|
16
24
|
index_name = args[0]
|
17
|
-
|
18
25
|
@config['algolia']['index_name'] = index_name if index_name
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
# Check if the specified file should be indexed (we exclude static files,
|
30
|
+
# robots.txt and custom defined exclusions).
|
31
|
+
def indexable?(file)
|
32
|
+
return false if file.is_a?(Jekyll::StaticFile)
|
33
|
+
|
34
|
+
# Keep only markdown and html files
|
35
|
+
allowed_extensions = %w(html)
|
36
|
+
if @config['markdown_ext']
|
37
|
+
allowed_extensions += @config['markdown_ext'].split(',')
|
38
|
+
end
|
39
|
+
current_extension = File.extname(file.name)[1..-1]
|
40
|
+
return false unless allowed_extensions.include?(current_extension)
|
41
|
+
|
42
|
+
# Exclude files manually excluded from config
|
43
|
+
excluded_files = @config['algolia']['excluded_files']
|
44
|
+
unless excluded_files.nil?
|
45
|
+
return false if excluded_files.include?(file.name)
|
46
|
+
end
|
47
|
+
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
# Run the default `jekyll build` command but overwrite the actual "write
|
52
|
+
# files on disk" part to instead push data to Algolia
|
53
|
+
def process
|
19
54
|
site = Jekyll::Site.new(@config)
|
20
55
|
|
21
|
-
# Instead of writing generated website to disk, we will push it to the
|
22
|
-
# index
|
23
56
|
def site.write
|
24
57
|
items = []
|
25
58
|
each_site_file do |file|
|
26
|
-
|
59
|
+
next unless AlgoliaSearchJekyllPush.indexable?(file)
|
60
|
+
|
61
|
+
new_items = AlgoliaSearchRecordExtractor.new(file).extract
|
27
62
|
next if new_items.nil?
|
28
63
|
items += new_items
|
29
64
|
end
|
30
65
|
AlgoliaSearchJekyllPush.push(items)
|
31
66
|
end
|
32
67
|
|
68
|
+
# This will call the build command by default, which will in turn call our
|
69
|
+
# custom .write method
|
33
70
|
site.process
|
34
71
|
end
|
35
72
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
return
|
41
|
-
return false unless file['title']
|
42
|
-
true
|
43
|
-
end
|
73
|
+
# Read the API key either from ENV or from an _algolia_api_key file in
|
74
|
+
# source folder
|
75
|
+
def api_key
|
76
|
+
# First read in ENV
|
77
|
+
return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
|
44
78
|
|
45
|
-
|
46
|
-
@config['
|
79
|
+
# Otherwise from file in source directory
|
80
|
+
key_file = File.join(@config['source'], '_algolia_api_key')
|
81
|
+
if File.exist?(key_file) && File.size(key_file) > 0
|
82
|
+
return File.open(key_file).read.strip
|
83
|
+
end
|
84
|
+
nil
|
47
85
|
end
|
48
86
|
|
49
|
-
|
87
|
+
# Check that all credentials are present, and stop with a helpfull message
|
88
|
+
# if not
|
89
|
+
def check_credentials
|
50
90
|
unless api_key
|
51
91
|
Jekyll.logger.error 'Algolia Error: No API key defined'
|
52
92
|
Jekyll.logger.warn ' You have two ways to configure your API key:'
|
@@ -56,30 +96,30 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|
56
96
|
exit 1
|
57
97
|
end
|
58
98
|
|
59
|
-
unless application_id
|
99
|
+
unless @config['algolia']['application_id']
|
60
100
|
Jekyll.logger.error 'Algolia Error: No application ID defined'
|
61
101
|
Jekyll.logger.warn ' Please set your application id in the '\
|
62
102
|
'_config.yml file, like so:'
|
63
|
-
|
103
|
+
Jekyll.logger.warn ''
|
64
104
|
# The spaces are needed otherwise the text is centered
|
65
105
|
Jekyll.logger.warn ' algolia: '
|
66
106
|
Jekyll.logger.warn ' application_id: \'{your_application_id}\''
|
67
|
-
|
107
|
+
Jekyll.logger.warn ''
|
68
108
|
Jekyll.logger.warn ' Your application ID can be found in your algolia'\
|
69
109
|
' dashboard'
|
70
110
|
Jekyll.logger.warn ' https://www.algolia.com/licensing'
|
71
111
|
exit 1
|
72
112
|
end
|
73
113
|
|
74
|
-
unless index_name
|
114
|
+
unless @config['algolia']['index_name']
|
75
115
|
Jekyll.logger.error 'Algolia Error: No index name defined'
|
76
116
|
Jekyll.logger.warn ' Please set your index name in the _config.yml'\
|
77
117
|
' file, like so:'
|
78
|
-
|
118
|
+
Jekyll.logger.warn ''
|
79
119
|
# The spaces are needed otherwise the text is centered
|
80
120
|
Jekyll.logger.warn ' algolia: '
|
81
121
|
Jekyll.logger.warn ' index_name: \'{your_index_name}\''
|
82
|
-
|
122
|
+
Jekyll.logger.warn ''
|
83
123
|
Jekyll.logger.warn ' You can edit your indices in your dashboard'
|
84
124
|
Jekyll.logger.warn ' https://www.algolia.com/explorer'
|
85
125
|
exit 1
|
@@ -87,10 +127,13 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|
87
127
|
true
|
88
128
|
end
|
89
129
|
|
130
|
+
# Get index settings
|
90
131
|
def configure_index(index)
|
91
|
-
|
92
|
-
|
93
|
-
|
132
|
+
settings = {
|
133
|
+
typoTolerance: true,
|
134
|
+
distinct: true,
|
135
|
+
attributeForDistinct: 'title',
|
136
|
+
attributesForFaceting: %w(tags type title),
|
94
137
|
attributesToIndex: %w(
|
95
138
|
title h1 h2 h3 h4 h5 h6
|
96
139
|
unordered(text)
|
@@ -98,143 +141,39 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|
98
141
|
),
|
99
142
|
attributesToRetrieve: %w(
|
100
143
|
title h1 h2 h3 h4 h5 h6
|
101
|
-
posted_at
|
102
|
-
content
|
103
|
-
text
|
104
144
|
url
|
145
|
+
tag_name
|
146
|
+
raw_html
|
147
|
+
text
|
148
|
+
posted_at
|
105
149
|
css_selector
|
150
|
+
css_selector_parent
|
106
151
|
),
|
107
152
|
customRanking: ['desc(posted_at)', 'desc(title_weight)'],
|
108
|
-
distinct: true,
|
109
153
|
highlightPreTag: '<span class="algolia__result-highlight">',
|
110
154
|
highlightPostTag: '</span>'
|
111
155
|
}
|
112
|
-
custom_settings = {}
|
113
|
-
@config['algolia']['settings'].each do |key, value|
|
114
|
-
custom_settings[key.to_sym] = value
|
115
|
-
end
|
116
|
-
settings = default_settings.merge(custom_settings)
|
117
|
-
|
118
|
-
index.set_settings(settings)
|
119
|
-
end
|
120
|
-
|
121
|
-
def get_items_from_file(file)
|
122
|
-
is_page = file.is_a?(Jekyll::Page)
|
123
|
-
is_post = file.is_a?(Jekyll::Post)
|
124
|
-
|
125
|
-
# We only index posts, and markdown pages
|
126
|
-
return nil unless is_page || is_post
|
127
|
-
return nil if is_page && !parseable?(file)
|
128
|
-
return nil if excluded_file?(file)
|
129
|
-
|
130
|
-
html = file.content.gsub("\n", ' ')
|
131
|
-
|
132
|
-
if is_post
|
133
|
-
tags = get_tags_from_post(file)
|
134
|
-
base_data = {
|
135
|
-
type: 'post',
|
136
|
-
parent_id: file.id,
|
137
|
-
url: file.url,
|
138
|
-
title: file.title,
|
139
|
-
tags: tags,
|
140
|
-
slug: file.slug,
|
141
|
-
posted_at: file.date.to_time.to_i
|
142
|
-
}
|
143
|
-
else
|
144
|
-
base_data = {
|
145
|
-
type: 'page',
|
146
|
-
parent_id: file.basename,
|
147
|
-
url: file.url,
|
148
|
-
title: file['title'],
|
149
|
-
slug: file.basename
|
150
|
-
}
|
151
|
-
end
|
152
|
-
|
153
|
-
get_paragraphs_from_html(html, base_data)
|
154
|
-
end
|
155
|
-
|
156
|
-
# Get a list of tags from a post. Handle both classic string tags or
|
157
|
-
# extended object tags
|
158
|
-
def get_tags_from_post(post)
|
159
|
-
tags = post.tags
|
160
|
-
return [] if tags.is_a?(Array) || tags.nil?
|
161
|
-
tags.map! { |tag| tag.to_s.gsub(',', '') }
|
162
|
-
end
|
163
|
-
|
164
|
-
# Get the list of headings (h1, h2, etc) above the specified node
|
165
|
-
def get_previous_hx(node, memo = { level: 7 })
|
166
|
-
previous = node.previous_sibling
|
167
|
-
# Stop if no previous element
|
168
|
-
unless previous
|
169
|
-
memo.delete(:level)
|
170
|
-
return memo
|
171
|
-
end
|
172
|
-
|
173
|
-
# Skip non-html elements
|
174
|
-
return get_previous_hx(previous, memo) unless previous.element?
|
175
156
|
|
176
|
-
#
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
157
|
+
# Merge default settings with user custom ones
|
158
|
+
if @config['algolia'].key?('settings')
|
159
|
+
custom_settings = {}
|
160
|
+
@config['algolia']['settings'].each do |key, value|
|
161
|
+
custom_settings[key.to_sym] = value
|
162
|
+
end
|
163
|
+
settings.merge!(custom_settings)
|
181
164
|
end
|
182
165
|
|
183
|
-
|
184
|
-
title_level = tag_name.gsub('h', '').to_i
|
185
|
-
return get_previous_hx(previous, memo) if title_level >= memo[:level]
|
186
|
-
memo[:level] = title_level
|
187
|
-
|
188
|
-
# Add to the memo and continue
|
189
|
-
memo[tag_name.to_sym] = previous.content
|
190
|
-
get_previous_hx(previous, memo)
|
191
|
-
end
|
192
|
-
|
193
|
-
# Get a custom value representing the number of word occurence from the
|
194
|
-
# titles into the content
|
195
|
-
def get_title_weight(content, item)
|
196
|
-
# Get list of words
|
197
|
-
words = %i(title h1 h2 h3 h4 h5 h6)
|
198
|
-
.select { |title| item.key?(title) }
|
199
|
-
.map { |title| item[title].split(/\W+/) }
|
200
|
-
.flatten
|
201
|
-
.compact
|
202
|
-
.uniq
|
203
|
-
# Count how many words are in the text
|
204
|
-
weight = 0
|
205
|
-
words.each { |word| weight += 1 if content.include?(word) }
|
206
|
-
weight
|
207
|
-
end
|
208
|
-
|
209
|
-
# Will get a unique css selector for the node
|
210
|
-
def get_css_selector(node)
|
211
|
-
node.css_path.gsub('html > body > ', '')
|
212
|
-
end
|
213
|
-
|
214
|
-
# Get a list of items representing the different paragraphs
|
215
|
-
def get_paragraphs_from_html(html, base_data)
|
216
|
-
doc = Nokogiri::HTML(html)
|
217
|
-
paragraphs = doc.css('p').map.with_index do |p, index|
|
218
|
-
next unless p.text.size > 0
|
219
|
-
new_item = base_data.clone
|
220
|
-
new_item.merge!(get_previous_hx(p))
|
221
|
-
new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
|
222
|
-
new_item[:css_selector] = get_css_selector(p)
|
223
|
-
new_item[:raw_html] = p.to_s
|
224
|
-
new_item[:text] = p.content
|
225
|
-
new_item[:title_weight] = get_title_weight(p.text, new_item)
|
226
|
-
new_item
|
227
|
-
end
|
228
|
-
paragraphs.compact
|
166
|
+
index.set_settings(settings)
|
229
167
|
end
|
230
168
|
|
231
169
|
def push(items)
|
232
|
-
|
233
|
-
application_id = @config['algolia']['application_id']
|
234
|
-
index_name = @config['algolia']['index_name']
|
235
|
-
check_credentials(api_key, application_id, index_name)
|
170
|
+
check_credentials
|
236
171
|
|
237
|
-
|
172
|
+
index_name = @config['algolia']['index_name']
|
173
|
+
Algolia.init(
|
174
|
+
application_id: @config['algolia']['application_id'],
|
175
|
+
api_key: api_key
|
176
|
+
)
|
238
177
|
index = Algolia::Index.new(index_name)
|
239
178
|
configure_index(index)
|
240
179
|
index.clear_index
|
metadata
CHANGED
@@ -1,15 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: algoliasearch-jekyll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tim Carry
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: jekyll
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: guard-rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '4.6'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.6'
|
13
55
|
- !ruby/object:Gem::Dependency
|
14
56
|
name: nokogiri
|
15
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +109,7 @@ dependencies:
|
|
67
109
|
- !ruby/object:Gem::Version
|
68
110
|
version: '1.4'
|
69
111
|
description: Index all your pages and posts to an Algolia index with `jekyll algolia
|
70
|
-
|
112
|
+
push`
|
71
113
|
email: tim@pixelastic.com
|
72
114
|
executables: []
|
73
115
|
extensions: []
|