algoliasearch-jekyll 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/algoliasearch-jekyll.rb +53 -0
- data/lib/push.rb +226 -0
- metadata +102 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 300e5c91ad0901840959d34b5cbb65d85f8508a8
|
4
|
+
data.tar.gz: fb8ec8f5a6a9afb49c81adf20c0b09ddbc4da15f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cee39f180e347887d52eaf144530aa146e057be93f395291d7f74e4e4ee608de2d8a804b7a6334b5d38247d5e4b1e5641fc192c7ae5b55a6f3dd85eb9852fde4
|
7
|
+
data.tar.gz: 2864b693fa1409dda9edfe64724c2e771ab5d9b6c778f8d1034dcd931471e5d02ce5c8bff2708db4711a5898bb266d3c6190428c2fab26d6e0576007973a997b
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
|
4
|
+
require 'awesome_print'
|
5
|
+
|
6
|
+
require_relative './push.rb'
|
7
|
+
|
8
|
+
# `jekyll algolia` main entry
|
9
|
+
class AlgoliaSearchJekyll < Jekyll::Command
|
10
|
+
class << self
|
11
|
+
def init_with_program(prog)
|
12
|
+
prog.command(:algolia) do |command|
|
13
|
+
command.syntax 'algolia <command> [options]'
|
14
|
+
command.description 'Keep your content in sync with your Algolia index'
|
15
|
+
|
16
|
+
command.command(:push) do |subcommand|
|
17
|
+
subcommand.syntax 'push [options]'
|
18
|
+
subcommand.description 'Push your content to your index'
|
19
|
+
|
20
|
+
add_build_options(subcommand)
|
21
|
+
|
22
|
+
subcommand.action do |args, options|
|
23
|
+
@config = configuration_from_options(options)
|
24
|
+
AlgoliaSearchJekyllPush.process(args, options, @config)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Allow a subset of the default `jekyll build` options
|
31
|
+
def add_build_options(command)
|
32
|
+
command.option 'config', '--config CONFIG_FILE[,CONFIG_FILE2,...]',
|
33
|
+
Array, 'Custom configuration file'
|
34
|
+
command.option 'future', '--future', 'Index posts with a future date'
|
35
|
+
command.option 'limit_posts', '--limit_posts MAX_POSTS', Integer,
|
36
|
+
'Limits the number of posts to parse and index'
|
37
|
+
command.option 'show_drafts', '-D', '--drafts',
|
38
|
+
'Index posts in the _drafts folder'
|
39
|
+
command.option 'unpublished', '--unpublished',
|
40
|
+
'Index posts that were marked as unpublished'
|
41
|
+
end
|
42
|
+
|
43
|
+
def api_key
|
44
|
+
return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
|
45
|
+
key_file = File.join(@config['source'], '_algolia_api_key')
|
46
|
+
|
47
|
+
if File.exist?(key_file) && File.size(key_file) > 0
|
48
|
+
return File.open(key_file).read.strip
|
49
|
+
end
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/push.rb
ADDED
@@ -0,0 +1,226 @@
|
|
1
|
+
require 'algoliasearch'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
# `jekyll algolia push` command
|
6
|
+
class AlgoliaSearchJekyllPush < Jekyll::Command
|
7
|
+
class << self
|
8
|
+
def init_with_program(_prog)
|
9
|
+
end
|
10
|
+
|
11
|
+
def process(args = [], options = {}, config = {})
|
12
|
+
@args = args
|
13
|
+
@options = options
|
14
|
+
@config = config
|
15
|
+
|
16
|
+
index_name = args[0]
|
17
|
+
|
18
|
+
@config['algolia']['index_name'] = index_name if index_name
|
19
|
+
site = Jekyll::Site.new(@config)
|
20
|
+
|
21
|
+
# Instead of writing generated website to disk, we will push it to the
|
22
|
+
# index
|
23
|
+
def site.write
|
24
|
+
items = []
|
25
|
+
each_site_file do |file|
|
26
|
+
new_items = AlgoliaSearchJekyllPush.get_items_from_file(file)
|
27
|
+
next if new_items.nil?
|
28
|
+
items += new_items
|
29
|
+
end
|
30
|
+
AlgoliaSearchJekyllPush.push(items)
|
31
|
+
end
|
32
|
+
|
33
|
+
site.process
|
34
|
+
end
|
35
|
+
|
36
|
+
def markdown?(filename)
|
37
|
+
ext = File.extname(filename).delete('.')
|
38
|
+
@config['markdown_ext'].split(',').include?(ext)
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_credentials(api_key, application_id, index_name)
|
42
|
+
unless api_key
|
43
|
+
Jekyll.logger.error 'Algolia Error: No API key defined'
|
44
|
+
Jekyll.logger.warn ' You have two ways to configure your API key:'
|
45
|
+
Jekyll.logger.warn ' - The ALGOLIA_API_KEY environment variable'
|
46
|
+
Jekyll.logger.warn ' - A file named ./_algolia_api_key in your '\
|
47
|
+
'source folder'
|
48
|
+
exit 1
|
49
|
+
end
|
50
|
+
|
51
|
+
unless application_id
|
52
|
+
Jekyll.logger.error 'Algolia Error: No application ID defined'
|
53
|
+
Jekyll.logger.warn ' Please set your application id in the '\
|
54
|
+
'_config.yml file, like so:'
|
55
|
+
puts ''
|
56
|
+
# The spaces are needed otherwise the text is centered
|
57
|
+
Jekyll.logger.warn ' algolia: '
|
58
|
+
Jekyll.logger.warn ' application_id: \'{your_application_id}\''
|
59
|
+
puts ''
|
60
|
+
Jekyll.logger.warn ' Your application ID can be found in your algolia'\
|
61
|
+
' dashboard'
|
62
|
+
Jekyll.logger.warn ' https://www.algolia.com/licensing'
|
63
|
+
exit 1
|
64
|
+
end
|
65
|
+
|
66
|
+
unless index_name
|
67
|
+
Jekyll.logger.error 'Algolia Error: No index name defined'
|
68
|
+
Jekyll.logger.warn ' Please set your index name in the _config.yml'\
|
69
|
+
' file, like so:'
|
70
|
+
puts ''
|
71
|
+
# The spaces are needed otherwise the text is centered
|
72
|
+
Jekyll.logger.warn ' algolia: '
|
73
|
+
Jekyll.logger.warn ' index_name: \'{your_index_name}\''
|
74
|
+
puts ''
|
75
|
+
Jekyll.logger.warn ' You can edit your indices in your dashboard'
|
76
|
+
Jekyll.logger.warn ' https://www.algolia.com/explorer'
|
77
|
+
exit 1
|
78
|
+
end
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
82
|
+
def configure_index(index)
|
83
|
+
index.set_settings(
|
84
|
+
attributeForDistinct: 'parent_id',
|
85
|
+
attributesForFaceting: %w(tags type),
|
86
|
+
attributesToHighlight: %w(title content),
|
87
|
+
attributesToIndex: %w(title h1 h2 h3 h4 h5 h6 content tags),
|
88
|
+
attributesToRetrieve: %w(title posted_at content url css_selector),
|
89
|
+
customRanking: ['desc(posted_at)', 'desc(title_weight)'],
|
90
|
+
distinct: true,
|
91
|
+
highlightPreTag: '<span class="algolia__result-highlight">',
|
92
|
+
highlightPostTag: '</span>'
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
def push(items)
|
97
|
+
api_key = AlgoliaSearchJekyll.api_key
|
98
|
+
application_id = @config['algolia']['application_id']
|
99
|
+
index_name = @config['algolia']['index_name']
|
100
|
+
check_credentials(api_key, application_id, index_name)
|
101
|
+
|
102
|
+
Algolia.init(application_id: application_id, api_key: api_key)
|
103
|
+
index = Algolia::Index.new(index_name)
|
104
|
+
configure_index(index)
|
105
|
+
index.clear_index
|
106
|
+
|
107
|
+
items.each_slice(1000) do |batch|
|
108
|
+
Jekyll.logger.info "Indexing #{batch.size} items"
|
109
|
+
begin
|
110
|
+
index.add_objects(batch)
|
111
|
+
rescue StandardError => error
|
112
|
+
Jekyll.logger.error 'Algolia Error: HTTP Error'
|
113
|
+
Jekyll.logger.warn error.message
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
Jekyll.logger.info "Indexing of #{items.size} items " \
|
119
|
+
"in #{index_name} done."
|
120
|
+
end
|
121
|
+
|
122
|
+
def get_items_from_file(file)
|
123
|
+
is_page = file.is_a?(Jekyll::Page)
|
124
|
+
is_post = file.is_a?(Jekyll::Post)
|
125
|
+
|
126
|
+
# We only index posts, and markdown pages
|
127
|
+
return nil unless is_page || is_post
|
128
|
+
return nil if is_page && !markdown?(file.path)
|
129
|
+
|
130
|
+
html = file.content.gsub("\n", ' ')
|
131
|
+
|
132
|
+
if is_post
|
133
|
+
tags = get_tags_from_post(file)
|
134
|
+
base_data = {
|
135
|
+
type: 'post',
|
136
|
+
parent_id: file.id,
|
137
|
+
url: file.url,
|
138
|
+
title: file.title,
|
139
|
+
tags: tags,
|
140
|
+
slug: file.slug,
|
141
|
+
posted_at: file.date.to_time.to_i
|
142
|
+
}
|
143
|
+
else
|
144
|
+
base_data = {
|
145
|
+
type: 'page',
|
146
|
+
parent_id: file.basename,
|
147
|
+
url: file.url,
|
148
|
+
title: file['title'],
|
149
|
+
slug: file.basename
|
150
|
+
}
|
151
|
+
end
|
152
|
+
|
153
|
+
get_paragraphs_from_html(html, base_data)
|
154
|
+
end
|
155
|
+
|
156
|
+
# Get a list of tags from a post. Handle both classic string tags or
|
157
|
+
# extended object tags
|
158
|
+
def get_tags_from_post(post)
|
159
|
+
tags = post.tags
|
160
|
+
return [] if tags.is_a?(Array) || tags.nil?
|
161
|
+
tags.map! { |tag| tag.to_s.gsub(',', '') }
|
162
|
+
end
|
163
|
+
|
164
|
+
def get_previous_hx(node, memo = { level: 7 })
|
165
|
+
previous = node.previous_sibling
|
166
|
+
# Stop if no previous element
|
167
|
+
unless previous
|
168
|
+
memo.delete(:level)
|
169
|
+
return memo
|
170
|
+
end
|
171
|
+
|
172
|
+
# Skip non-html elements
|
173
|
+
return get_previous_hx(previous, memo) unless previous.element?
|
174
|
+
|
175
|
+
# Skip non-title elements
|
176
|
+
tag_name = previous.name
|
177
|
+
possible_title_elements = %w(h1 h2 h3 h4 h5 h6)
|
178
|
+
unless possible_title_elements.include?(tag_name)
|
179
|
+
return get_previous_hx(previous, memo)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Skip if item already as title of a higher level
|
183
|
+
title_level = tag_name.gsub('h', '').to_i
|
184
|
+
return get_previous_hx(previous, memo) if title_level >= memo[:level]
|
185
|
+
memo[:level] = title_level
|
186
|
+
|
187
|
+
# Add to the memo and continue
|
188
|
+
memo[tag_name.to_sym] = previous.text
|
189
|
+
get_previous_hx(previous, memo)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Get a custom value representing the number of word occurence from the
|
193
|
+
# titles into the content
|
194
|
+
def get_title_weight(content, item)
|
195
|
+
# Get list of words
|
196
|
+
words = %i(title h1 h2 h3 h4 h5 h6)
|
197
|
+
.select { |title| item.key?(title) }
|
198
|
+
.map { |title| item[title].split(/\W+/) }
|
199
|
+
.flatten
|
200
|
+
.compact
|
201
|
+
.uniq
|
202
|
+
# Count how many words are in the text
|
203
|
+
weight = 0
|
204
|
+
words.each { |word| weight += 1 if content.include?(word) }
|
205
|
+
weight
|
206
|
+
end
|
207
|
+
|
208
|
+
# Will get a unique css selector for the node
|
209
|
+
def get_css_selector(node)
|
210
|
+
node.css_path.gsub('html > body > ', '')
|
211
|
+
end
|
212
|
+
|
213
|
+
def get_paragraphs_from_html(html, base_data)
|
214
|
+
doc = Nokogiri::HTML(html)
|
215
|
+
doc.css('p').map.with_index do |p, index|
|
216
|
+
new_item = base_data.clone
|
217
|
+
new_item.merge!(get_previous_hx(p))
|
218
|
+
new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
|
219
|
+
new_item[:css_selector] = get_css_selector(p)
|
220
|
+
new_item[:content] = p.to_s
|
221
|
+
new_item[:title_weight] = get_title_weight(p.text, new_item)
|
222
|
+
new_item
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: algoliasearch-jekyll
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tim Carry
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-06-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: json
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: awesome_print
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.6'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.6'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: algoliasearch
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
description: Index all your pages and posts to an Algolia index with `jekyll algolia
|
70
|
+
index`
|
71
|
+
email: tim@pixelastic.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- lib/algoliasearch-jekyll.rb
|
77
|
+
- lib/push.rb
|
78
|
+
homepage: https://github.com/algolia/algoliasearch-jekyll
|
79
|
+
licenses:
|
80
|
+
- MIT
|
81
|
+
metadata: {}
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 2.4.6
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: AlgoliaSearch for Jekyll
|
102
|
+
test_files: []
|