algoliasearch-jekyll 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/algoliasearch-jekyll.rb +53 -0
- data/lib/push.rb +226 -0
- metadata +102 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 300e5c91ad0901840959d34b5cbb65d85f8508a8
|
4
|
+
data.tar.gz: fb8ec8f5a6a9afb49c81adf20c0b09ddbc4da15f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cee39f180e347887d52eaf144530aa146e057be93f395291d7f74e4e4ee608de2d8a804b7a6334b5d38247d5e4b1e5641fc192c7ae5b55a6f3dd85eb9852fde4
|
7
|
+
data.tar.gz: 2864b693fa1409dda9edfe64724c2e771ab5d9b6c778f8d1034dcd931471e5d02ce5c8bff2708db4711a5898bb266d3c6190428c2fab26d6e0576007973a997b
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'awesome_print'
|
5
|
+
|
6
|
+
require_relative './push.rb'
|
7
|
+
|
8
|
+
# `jekyll algolia` main entry
|
9
|
+
class AlgoliaSearchJekyll < Jekyll::Command
|
10
|
+
class << self
|
11
|
+
def init_with_program(prog)
|
12
|
+
prog.command(:algolia) do |command|
|
13
|
+
command.syntax 'algolia <command> [options]'
|
14
|
+
command.description 'Keep your content in sync with your Algolia index'
|
15
|
+
|
16
|
+
command.command(:push) do |subcommand|
|
17
|
+
subcommand.syntax 'push [options]'
|
18
|
+
subcommand.description 'Push your content to your index'
|
19
|
+
|
20
|
+
add_build_options(subcommand)
|
21
|
+
|
22
|
+
subcommand.action do |args, options|
|
23
|
+
@config = configuration_from_options(options)
|
24
|
+
AlgoliaSearchJekyllPush.process(args, options, @config)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Allow a subset of the default `jekyll build` options
|
31
|
+
def add_build_options(command)
|
32
|
+
command.option 'config', '--config CONFIG_FILE[,CONFIG_FILE2,...]',
|
33
|
+
Array, 'Custom configuration file'
|
34
|
+
command.option 'future', '--future', 'Index posts with a future date'
|
35
|
+
command.option 'limit_posts', '--limit_posts MAX_POSTS', Integer,
|
36
|
+
'Limits the number of posts to parse and index'
|
37
|
+
command.option 'show_drafts', '-D', '--drafts',
|
38
|
+
'Index posts in the _drafts folder'
|
39
|
+
command.option 'unpublished', '--unpublished',
|
40
|
+
'Index posts that were marked as unpublished'
|
41
|
+
end
|
42
|
+
|
43
|
+
def api_key
|
44
|
+
return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
|
45
|
+
key_file = File.join(@config['source'], '_algolia_api_key')
|
46
|
+
|
47
|
+
if File.exist?(key_file) && File.size(key_file) > 0
|
48
|
+
return File.open(key_file).read.strip
|
49
|
+
end
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/push.rb
ADDED
@@ -0,0 +1,226 @@
|
|
1
|
+
require 'algoliasearch'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
# `jekyll algolia push` command
|
6
|
+
class AlgoliaSearchJekyllPush < Jekyll::Command
|
7
|
+
class << self
|
8
|
+
def init_with_program(_prog)
|
9
|
+
end
|
10
|
+
|
11
|
+
def process(args = [], options = {}, config = {})
|
12
|
+
@args = args
|
13
|
+
@options = options
|
14
|
+
@config = config
|
15
|
+
|
16
|
+
index_name = args[0]
|
17
|
+
|
18
|
+
@config['algolia']['index_name'] = index_name if index_name
|
19
|
+
site = Jekyll::Site.new(@config)
|
20
|
+
|
21
|
+
# Instead of writing generated website to disk, we will push it to the
|
22
|
+
# index
|
23
|
+
def site.write
|
24
|
+
items = []
|
25
|
+
each_site_file do |file|
|
26
|
+
new_items = AlgoliaSearchJekyllPush.get_items_from_file(file)
|
27
|
+
next if new_items.nil?
|
28
|
+
items += new_items
|
29
|
+
end
|
30
|
+
AlgoliaSearchJekyllPush.push(items)
|
31
|
+
end
|
32
|
+
|
33
|
+
site.process
|
34
|
+
end
|
35
|
+
|
36
|
+
def markdown?(filename)
|
37
|
+
ext = File.extname(filename).delete('.')
|
38
|
+
@config['markdown_ext'].split(',').include?(ext)
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_credentials(api_key, application_id, index_name)
|
42
|
+
unless api_key
|
43
|
+
Jekyll.logger.error 'Algolia Error: No API key defined'
|
44
|
+
Jekyll.logger.warn ' You have two ways to configure your API key:'
|
45
|
+
Jekyll.logger.warn ' - The ALGOLIA_API_KEY environment variable'
|
46
|
+
Jekyll.logger.warn ' - A file named ./_algolia_api_key in your '\
|
47
|
+
'source folder'
|
48
|
+
exit 1
|
49
|
+
end
|
50
|
+
|
51
|
+
unless application_id
|
52
|
+
Jekyll.logger.error 'Algolia Error: No application ID defined'
|
53
|
+
Jekyll.logger.warn ' Please set your application id in the '\
|
54
|
+
'_config.yml file, like so:'
|
55
|
+
puts ''
|
56
|
+
# The spaces are needed otherwise the text is centered
|
57
|
+
Jekyll.logger.warn ' algolia: '
|
58
|
+
Jekyll.logger.warn ' application_id: \'{your_application_id}\''
|
59
|
+
puts ''
|
60
|
+
Jekyll.logger.warn ' Your application ID can be found in your algolia'\
|
61
|
+
' dashboard'
|
62
|
+
Jekyll.logger.warn ' https://www.algolia.com/licensing'
|
63
|
+
exit 1
|
64
|
+
end
|
65
|
+
|
66
|
+
unless index_name
|
67
|
+
Jekyll.logger.error 'Algolia Error: No index name defined'
|
68
|
+
Jekyll.logger.warn ' Please set your index name in the _config.yml'\
|
69
|
+
' file, like so:'
|
70
|
+
puts ''
|
71
|
+
# The spaces are needed otherwise the text is centered
|
72
|
+
Jekyll.logger.warn ' algolia: '
|
73
|
+
Jekyll.logger.warn ' index_name: \'{your_index_name}\''
|
74
|
+
puts ''
|
75
|
+
Jekyll.logger.warn ' You can edit your indices in your dashboard'
|
76
|
+
Jekyll.logger.warn ' https://www.algolia.com/explorer'
|
77
|
+
exit 1
|
78
|
+
end
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
82
|
+
def configure_index(index)
|
83
|
+
index.set_settings(
|
84
|
+
attributeForDistinct: 'parent_id',
|
85
|
+
attributesForFaceting: %w(tags type),
|
86
|
+
attributesToHighlight: %w(title content),
|
87
|
+
attributesToIndex: %w(title h1 h2 h3 h4 h5 h6 content tags),
|
88
|
+
attributesToRetrieve: %w(title posted_at content url css_selector),
|
89
|
+
customRanking: ['desc(posted_at)', 'desc(title_weight)'],
|
90
|
+
distinct: true,
|
91
|
+
highlightPreTag: '<span class="algolia__result-highlight">',
|
92
|
+
highlightPostTag: '</span>'
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
def push(items)
|
97
|
+
api_key = AlgoliaSearchJekyll.api_key
|
98
|
+
application_id = @config['algolia']['application_id']
|
99
|
+
index_name = @config['algolia']['index_name']
|
100
|
+
check_credentials(api_key, application_id, index_name)
|
101
|
+
|
102
|
+
Algolia.init(application_id: application_id, api_key: api_key)
|
103
|
+
index = Algolia::Index.new(index_name)
|
104
|
+
configure_index(index)
|
105
|
+
index.clear_index
|
106
|
+
|
107
|
+
items.each_slice(1000) do |batch|
|
108
|
+
Jekyll.logger.info "Indexing #{batch.size} items"
|
109
|
+
begin
|
110
|
+
index.add_objects(batch)
|
111
|
+
rescue StandardError => error
|
112
|
+
Jekyll.logger.error 'Algolia Error: HTTP Error'
|
113
|
+
Jekyll.logger.warn error.message
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
Jekyll.logger.info "Indexing of #{items.size} items " \
|
119
|
+
"in #{index_name} done."
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_items_from_file(file)
|
123
|
+
is_page = file.is_a?(Jekyll::Page)
|
124
|
+
is_post = file.is_a?(Jekyll::Post)
|
125
|
+
|
126
|
+
# We only index posts, and markdown pages
|
127
|
+
return nil unless is_page || is_post
|
128
|
+
return nil if is_page && !markdown?(file.path)
|
129
|
+
|
130
|
+
html = file.content.gsub("\n", ' ')
|
131
|
+
|
132
|
+
if is_post
|
133
|
+
tags = get_tags_from_post(file)
|
134
|
+
base_data = {
|
135
|
+
type: 'post',
|
136
|
+
parent_id: file.id,
|
137
|
+
url: file.url,
|
138
|
+
title: file.title,
|
139
|
+
tags: tags,
|
140
|
+
slug: file.slug,
|
141
|
+
posted_at: file.date.to_time.to_i
|
142
|
+
}
|
143
|
+
else
|
144
|
+
base_data = {
|
145
|
+
type: 'page',
|
146
|
+
parent_id: file.basename,
|
147
|
+
url: file.url,
|
148
|
+
title: file['title'],
|
149
|
+
slug: file.basename
|
150
|
+
}
|
151
|
+
end
|
152
|
+
|
153
|
+
get_paragraphs_from_html(html, base_data)
|
154
|
+
end
|
155
|
+
|
156
|
+
# Get a list of tags from a post. Handle both classic string tags or
|
157
|
+
# extended object tags
|
158
|
+
def get_tags_from_post(post)
|
159
|
+
tags = post.tags
|
160
|
+
return [] if tags.is_a?(Array) || tags.nil?
|
161
|
+
tags.map! { |tag| tag.to_s.gsub(',', '') }
|
162
|
+
end
|
163
|
+
|
164
|
+
def get_previous_hx(node, memo = { level: 7 })
|
165
|
+
previous = node.previous_sibling
|
166
|
+
# Stop if no previous element
|
167
|
+
unless previous
|
168
|
+
memo.delete(:level)
|
169
|
+
return memo
|
170
|
+
end
|
171
|
+
|
172
|
+
# Skip non-html elements
|
173
|
+
return get_previous_hx(previous, memo) unless previous.element?
|
174
|
+
|
175
|
+
# Skip non-title elements
|
176
|
+
tag_name = previous.name
|
177
|
+
possible_title_elements = %w(h1 h2 h3 h4 h5 h6)
|
178
|
+
unless possible_title_elements.include?(tag_name)
|
179
|
+
return get_previous_hx(previous, memo)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Skip if item already as title of a higher level
|
183
|
+
title_level = tag_name.gsub('h', '').to_i
|
184
|
+
return get_previous_hx(previous, memo) if title_level >= memo[:level]
|
185
|
+
memo[:level] = title_level
|
186
|
+
|
187
|
+
# Add to the memo and continue
|
188
|
+
memo[tag_name.to_sym] = previous.text
|
189
|
+
get_previous_hx(previous, memo)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Get a custom value representing the number of word occurence from the
|
193
|
+
# titles into the content
|
194
|
+
def get_title_weight(content, item)
|
195
|
+
# Get list of words
|
196
|
+
words = %i(title h1 h2 h3 h4 h5 h6)
|
197
|
+
.select { |title| item.key?(title) }
|
198
|
+
.map { |title| item[title].split(/\W+/) }
|
199
|
+
.flatten
|
200
|
+
.compact
|
201
|
+
.uniq
|
202
|
+
# Count how many words are in the text
|
203
|
+
weight = 0
|
204
|
+
words.each { |word| weight += 1 if content.include?(word) }
|
205
|
+
weight
|
206
|
+
end
|
207
|
+
|
208
|
+
# Will get a unique css selector for the node
|
209
|
+
def get_css_selector(node)
|
210
|
+
node.css_path.gsub('html > body > ', '')
|
211
|
+
end
|
212
|
+
|
213
|
+
def get_paragraphs_from_html(html, base_data)
|
214
|
+
doc = Nokogiri::HTML(html)
|
215
|
+
doc.css('p').map.with_index do |p, index|
|
216
|
+
new_item = base_data.clone
|
217
|
+
new_item.merge!(get_previous_hx(p))
|
218
|
+
new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
|
219
|
+
new_item[:css_selector] = get_css_selector(p)
|
220
|
+
new_item[:content] = p.to_s
|
221
|
+
new_item[:title_weight] = get_title_weight(p.text, new_item)
|
222
|
+
new_item
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: algoliasearch-jekyll
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tim Carry
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-06-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: awesome_print
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.6'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: algoliasearch
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
description: Index all your pages and posts to an Algolia index with `jekyll algolia
|
70
|
+
index`
|
71
|
+
email: tim@pixelastic.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- lib/algoliasearch-jekyll.rb
|
77
|
+
- lib/push.rb
|
78
|
+
homepage: https://github.com/algolia/algoliasearch-jekyll
|
79
|
+
licenses:
|
80
|
+
- MIT
|
81
|
+
metadata: {}
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 2.4.6
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: AlgoliaSearch for Jekyll
|
102
|
+
test_files: []
|