jekyll-algolia 1.0.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +51 -30
- data/README.md +69 -27
- data/lib/errors/invalid_credentials.txt +12 -0
- data/lib/errors/invalid_index_name.txt +9 -0
- data/lib/errors/missing_api_key.txt +15 -0
- data/lib/errors/missing_application_id.txt +11 -0
- data/lib/errors/missing_index_name.txt +18 -0
- data/lib/errors/no_records_found.txt +14 -0
- data/lib/errors/record_too_big.txt +27 -0
- data/lib/errors/record_too_big_api.txt +10 -0
- data/lib/errors/settings_manually_edited.txt +17 -0
- data/lib/errors/too_many_records.txt +14 -0
- data/lib/errors/unknown_application_id.txt +16 -0
- data/lib/errors/unknown_settings.txt +12 -0
- data/lib/jekyll-algolia.rb +45 -60
- data/lib/jekyll/algolia/configurator.rb +137 -44
- data/lib/jekyll/algolia/error_handler.rb +36 -48
- data/lib/jekyll/algolia/extractor.rb +16 -6
- data/lib/jekyll/algolia/file_browser.rb +161 -68
- data/lib/jekyll/algolia/hooks.rb +18 -6
- data/lib/jekyll/algolia/indexer.rb +283 -145
- data/lib/jekyll/algolia/logger.rb +39 -8
- data/lib/jekyll/algolia/overwrites/githubpages-configuration.rb +32 -0
- data/lib/jekyll/algolia/overwrites/jekyll-algolia-site.rb +151 -0
- data/lib/jekyll/algolia/overwrites/jekyll-document.rb +13 -0
- data/lib/jekyll/algolia/overwrites/jekyll-paginate-pager.rb +20 -0
- data/lib/jekyll/algolia/overwrites/jekyll-tags-link.rb +33 -0
- data/lib/jekyll/algolia/progress_bar.rb +27 -0
- data/lib/jekyll/algolia/shrinker.rb +112 -0
- data/lib/jekyll/algolia/utils.rb +118 -2
- data/lib/jekyll/algolia/version.rb +1 -1
- data/lib/jekyll/commands/algolia.rb +3 -14
- metadata +75 -31
- data/errors/invalid_credentials.txt +0 -10
- data/errors/invalid_credentials_for_tmp_index.txt +0 -17
- data/errors/invalid_index_name.txt +0 -11
- data/errors/missing_api_key.txt +0 -17
- data/errors/missing_application_id.txt +0 -12
- data/errors/missing_index_name.txt +0 -19
- data/errors/no_records_found.txt +0 -20
- data/errors/record_too_big.txt +0 -25
- data/errors/unknown_application_id.txt +0 -20
- data/errors/unknown_settings.txt +0 -15
@@ -4,24 +4,44 @@ module Jekyll
|
|
4
4
|
module Algolia
|
5
5
|
# Display helpful error messages
|
6
6
|
module Logger
|
7
|
+
# Public: Silence all Jekyll log output in this block
|
8
|
+
# Usage:
|
9
|
+
# Logger.silence do
|
10
|
+
# # whatever Jekyll code here
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# This is especially useful when Jekyll is too talkative about what is
|
14
|
+
# loggued. It works by redefining Jekyll.logger.write to a noop
|
15
|
+
# temporarily and re-attributing the original method once finished.
|
16
|
+
def self.silent
|
17
|
+
initial_method = Jekyll.logger.method(:write)
|
18
|
+
Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
|
19
|
+
begin
|
20
|
+
yield
|
21
|
+
ensure
|
22
|
+
Utils.monkey_patch(Jekyll.logger, :write, initial_method)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
7
26
|
# Public: Displays a log line
|
8
27
|
#
|
9
28
|
# line - Line to display. Expected to be of the following format:
|
10
29
|
# "X:Your content"
|
11
30
|
# Where X is either I, W or E for marking respectively an info, warning or
|
12
31
|
# error display
|
13
|
-
def self.log(
|
14
|
-
type, content = /^(I|W|E):(.*)
|
32
|
+
def self.log(input)
|
33
|
+
type, content = /^(I|W|E):(.*)/m.match(input).captures
|
15
34
|
logger_mapping = {
|
16
35
|
'E' => :error,
|
17
36
|
'I' => :info,
|
18
37
|
'W' => :warn
|
19
38
|
}
|
20
39
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
40
|
+
# Display by chunk of 80-characters lines
|
41
|
+
lines = Utils.split_lines(content, 80)
|
42
|
+
lines.each do |line|
|
43
|
+
Jekyll.logger.send(logger_mapping[type], line)
|
44
|
+
end
|
25
45
|
end
|
26
46
|
|
27
47
|
# Public: Only display a log line if verbose mode is enabled
|
@@ -29,9 +49,20 @@ module Jekyll
|
|
29
49
|
# line - The line to display, following the same format as .log
|
30
50
|
def self.verbose(line)
|
31
51
|
return unless Configurator.verbose?
|
52
|
+
|
32
53
|
log(line)
|
33
54
|
end
|
34
55
|
|
56
|
+
# Public: Write the specified content to a file in the source directory
|
57
|
+
#
|
58
|
+
# filename - the file basename
|
59
|
+
# content - the actual content of the file
|
60
|
+
def self.write_to_file(filename, content)
|
61
|
+
filepath = File.join(Configurator.get('source'), filename)
|
62
|
+
File.write(filepath, content)
|
63
|
+
filepath
|
64
|
+
end
|
65
|
+
|
35
66
|
# Public: Displays a helpful error message for one of the knows errors
|
36
67
|
#
|
37
68
|
# message_id: A string identifying a know message
|
@@ -42,14 +73,14 @@ module Jekyll
|
|
42
73
|
def self.known_message(message_id, metadata = {})
|
43
74
|
file = File.expand_path(
|
44
75
|
File.join(
|
45
|
-
__dir__, '
|
76
|
+
__dir__, '../..', 'errors', "#{message_id}.txt"
|
46
77
|
)
|
47
78
|
)
|
48
79
|
|
49
80
|
# Convert all variables
|
50
81
|
content = File.open(file).read
|
51
82
|
metadata.each do |key, value|
|
52
|
-
content = content.gsub("{#{key}}", value)
|
83
|
+
content = content.gsub("{#{key}}", value.to_s)
|
53
84
|
end
|
54
85
|
|
55
86
|
# Display each line differently
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GitHubPages
|
4
|
+
# The github-pages gem will automatically disable every plugin that is not in
|
5
|
+
# the whitelist of plugins allowed by GitHub. This includes any plugin defined
|
6
|
+
# in the `_plugins` folder as well.
|
7
|
+
#
|
8
|
+
# Users of the jekyll-algolia plugin will use custom plugins in _plugins to
|
9
|
+
# define custom hooks to modify the indexing. If they happen to have the
|
10
|
+
# github-pages gem installed at the same time, those hooks will never be
|
11
|
+
# executed.
|
12
|
+
#
|
13
|
+
# The GitHub Pages gem prevent access to custom plugins by doing two things:
|
14
|
+
# - forcing safe mode
|
15
|
+
# - loading custom plugins from a random dir
|
16
|
+
#
|
17
|
+
# We cancel those by disabling safe mode and forcing back plugins to be read
|
18
|
+
# from ./_plugins.
|
19
|
+
#
|
20
|
+
# This file will only be loaded when running `jekyll algolia`, so it won't
|
21
|
+
# interfere with the regular usage of `jekyll build`
|
22
|
+
class Configuration
|
23
|
+
class << self
|
24
|
+
def set!(site)
|
25
|
+
config = effective_config(site.config)
|
26
|
+
config['safe'] = false
|
27
|
+
config['plugins_dir'] = '_plugins'
|
28
|
+
site.config = config
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module Algolia
|
5
|
+
# A Jekyll::Site subclass that overrides process from the parent class to
|
6
|
+
# create JSON records out of rendered documents and push those records to
|
7
|
+
# Algolia instead of writing files to disk.
|
8
|
+
class Site < Jekyll::Site
|
9
|
+
# We expose a way to reset the collection, as it will be needed in the
|
10
|
+
# tests
|
11
|
+
attr_writer :collections
|
12
|
+
|
13
|
+
attr_reader :original_site_files
|
14
|
+
|
15
|
+
# Public: Overwriting the parent method
|
16
|
+
#
|
17
|
+
# This will prepare the website, gathering all files, excluding the one we
|
18
|
+
# don't need to index, then render them (converting to HTML), the finally
|
19
|
+
# calling `push` to push to Algolia
|
20
|
+
def process
|
21
|
+
# Default Jekyll preflight
|
22
|
+
reset
|
23
|
+
read
|
24
|
+
generate
|
25
|
+
|
26
|
+
# Removing all files that won't be indexed, so we don't waste time
|
27
|
+
# rendering them
|
28
|
+
keep_only_indexable_files
|
29
|
+
|
30
|
+
# Starting the rendering progress bar
|
31
|
+
init_rendering_progress_bar
|
32
|
+
|
33
|
+
# Converting them to HTML
|
34
|
+
render
|
35
|
+
|
36
|
+
# Pushing them Algolia
|
37
|
+
push
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Return the number of pages/documents to index
|
41
|
+
def indexable_item_count
|
42
|
+
count = @pages.length
|
43
|
+
@collections.each_value { |collection| count += collection.docs.length }
|
44
|
+
count
|
45
|
+
end
|
46
|
+
|
47
|
+
# Public: Init the rendering progress bar, incrementing it for each
|
48
|
+
# rendered item
|
49
|
+
#
|
50
|
+
# This uses Jekyll post_render hooks, listening to both pages and
|
51
|
+
# documents
|
52
|
+
def init_rendering_progress_bar
|
53
|
+
progress_bar = ProgressBar.create(
|
54
|
+
total: indexable_item_count,
|
55
|
+
format: 'Rendering to HTML (%j%%) |%B|'
|
56
|
+
)
|
57
|
+
Jekyll::Hooks.register [:pages, :documents], :post_render do
|
58
|
+
progress_bar.increment
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Public: Filtering a list of items to only keep the one that are
|
63
|
+
# indexable.
|
64
|
+
#
|
65
|
+
# items - List of Pages/Documents
|
66
|
+
#
|
67
|
+
# Note: It also sets the layout to nil, to further speed up the rendering
|
68
|
+
def indexable_list(items)
|
69
|
+
new_list = []
|
70
|
+
items.each do |item|
|
71
|
+
next unless FileBrowser.indexable?(item)
|
72
|
+
|
73
|
+
item.data = {} if item.data.nil?
|
74
|
+
item.data['layout'] = nil
|
75
|
+
new_list << item
|
76
|
+
end
|
77
|
+
new_list
|
78
|
+
end
|
79
|
+
|
80
|
+
# Public: Removing non-indexable Pages, Posts and Documents from the
|
81
|
+
# internals
|
82
|
+
def keep_only_indexable_files
|
83
|
+
@original_site_files = {
|
84
|
+
pages: @pages,
|
85
|
+
collections: @collections,
|
86
|
+
static_files: @static_files
|
87
|
+
}
|
88
|
+
|
89
|
+
@pages = indexable_list(@pages)
|
90
|
+
|
91
|
+
# Applying to each collections
|
92
|
+
@collections.each_value do |collection|
|
93
|
+
collection.docs = indexable_list(collection.docs)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Remove all static files
|
97
|
+
@static_files = []
|
98
|
+
end
|
99
|
+
|
100
|
+
# Public: Extract records from every file and index them
|
101
|
+
def push
|
102
|
+
records = []
|
103
|
+
files = []
|
104
|
+
progress_bar = ProgressBar.create(
|
105
|
+
total: indexable_item_count,
|
106
|
+
format: 'Extracting records (%j%%) |%B|'
|
107
|
+
)
|
108
|
+
each_site_file do |file|
|
109
|
+
# Even if we cleared the list of documents/pages beforehand, some
|
110
|
+
# files might still sneak up to this point (like static files added to
|
111
|
+
# a collection directory), so we check again if they can really be
|
112
|
+
# indexed.
|
113
|
+
next unless FileBrowser.indexable?(file)
|
114
|
+
|
115
|
+
path = FileBrowser.relative_path(file.path)
|
116
|
+
|
117
|
+
Logger.verbose("I:Extracting records from #{path}")
|
118
|
+
file_records = Extractor.run(file)
|
119
|
+
|
120
|
+
files << file
|
121
|
+
records += file_records
|
122
|
+
|
123
|
+
progress_bar.increment
|
124
|
+
end
|
125
|
+
|
126
|
+
# Applying the user hook on the whole list of records
|
127
|
+
records = Hooks.apply_all(records, self)
|
128
|
+
|
129
|
+
# Shrinking records to force them to fit under the max record size
|
130
|
+
# limit, or displaying an error message if not possible
|
131
|
+
max_record_size = Configurator.algolia('max_record_size')
|
132
|
+
# We take into account the objectID that will be added in the form of:
|
133
|
+
# "objectID": "16cd998991cc40d92402b0b4e6c55e8a"
|
134
|
+
object_id_attribute_length = 46
|
135
|
+
max_record_size -= object_id_attribute_length
|
136
|
+
records.map! do |record|
|
137
|
+
Shrinker.fit_to_size(record, max_record_size)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Adding a unique objectID to each record
|
141
|
+
records.map! do |record|
|
142
|
+
Extractor.add_unique_object_id(record)
|
143
|
+
end
|
144
|
+
|
145
|
+
Logger.verbose("I:Found #{files.length} files")
|
146
|
+
|
147
|
+
Indexer.run(records)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
# Overwriting the Jekyll::Document class
|
5
|
+
class Document
|
6
|
+
# By default, Jekyll will set the current date (time of build) to any
|
7
|
+
# collection item. This will break our diff algorithm, so we monkey patch
|
8
|
+
# this call to return nil if no date is defined instead.
|
9
|
+
def date
|
10
|
+
data['date'] || nil
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module Paginate
|
5
|
+
# Disable pagination from jekyll-paginate
|
6
|
+
#
|
7
|
+
# This plugin will create pages that contain a list of all items to
|
8
|
+
# paginate. Those pages won't contain any interesting data to be indexed
|
9
|
+
# (as it will be duplicated content of the real pages), but will still
|
10
|
+
# take time to generate.
|
11
|
+
#
|
12
|
+
# By monkey-patching the plugin, we force it to be disabled
|
13
|
+
# https://github.com/jekyll/jekyll-paginate/blob/master/lib/jekyll-paginate/pager.rb#L22
|
14
|
+
class Pager
|
15
|
+
def self.pagination_enabled?(_site)
|
16
|
+
false
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# The default `link` tag allow to link to a specific page, using its relative
|
4
|
+
# path. Because we might not be indexing the destination of the link, we might
|
5
|
+
# not have the representation of the page in our data. If that happens, the
|
6
|
+
# `link` tag fails.
|
7
|
+
#
|
8
|
+
# To fix that we'll overwrite the default `link` tag to loop over a backup copy
|
9
|
+
# of the original files (before we clean it for indexing)
|
10
|
+
#
|
11
|
+
# https://github.com/algolia/jekyll-algolia/issues/62
|
12
|
+
class JekyllAlgoliaLink < Jekyll::Tags::Link
|
13
|
+
def render(context)
|
14
|
+
original_files = context.registers[:site].original_site_files
|
15
|
+
|
16
|
+
original_files[:pages].each do |page|
|
17
|
+
return page.url if page.relative_path == @relative_path
|
18
|
+
end
|
19
|
+
|
20
|
+
original_files[:collections].each_value do |collection|
|
21
|
+
collection.docs.each do |item|
|
22
|
+
return item.url if item.relative_path == @relative_path
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
original_files[:static_files].each do |asset|
|
27
|
+
return asset.url if asset.relative_path == @relative_path
|
28
|
+
return asset.url if asset.relative_path == "/#{@relative_path}"
|
29
|
+
end
|
30
|
+
|
31
|
+
'/'
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'progressbar'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
module Jekyll
|
7
|
+
module Algolia
|
8
|
+
# Module to push records to Algolia and configure the index
|
9
|
+
module ProgressBar
|
10
|
+
include Jekyll::Algolia
|
11
|
+
|
12
|
+
def self.should_be_silenced?
|
13
|
+
Configurator.verbose?
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.create(options)
|
17
|
+
if should_be_silenced?
|
18
|
+
fake_bar = OpenStruct.new
|
19
|
+
fake_bar.increment = nil
|
20
|
+
return fake_bar
|
21
|
+
end
|
22
|
+
|
23
|
+
::ProgressBar.create(options)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
module Jekyll
|
5
|
+
module Algolia
|
6
|
+
# Module to shrink a record so it fits in the plan quotas
|
7
|
+
module Shrinker
|
8
|
+
include Jekyll::Algolia
|
9
|
+
|
10
|
+
# Public: Get the byte size of the object once converted to JSON
|
11
|
+
# - record: The record to estimate
|
12
|
+
def self.size(record)
|
13
|
+
record.to_json.bytesize
|
14
|
+
end
|
15
|
+
|
16
|
+
# Public: Attempt to reduce the size of the record by reducing the size of
|
17
|
+
# the less needed attributes
|
18
|
+
#
|
19
|
+
# - raw_record: The record to attempt to reduce
|
20
|
+
# - max_size: The max size to achieve in bytes
|
21
|
+
#
|
22
|
+
# The excerpts are the attributes most subject to being reduced. We'll go
|
23
|
+
# as far as removing them if there is no other choice.
|
24
|
+
def self.fit_to_size(raw_record, max_size)
|
25
|
+
return raw_record if size(raw_record) <= max_size
|
26
|
+
|
27
|
+
# No excerpt, we can't shrink it
|
28
|
+
if !raw_record.key?(:excerpt_html) || !raw_record.key?(:excerpt_text)
|
29
|
+
return stop_with_error(raw_record)
|
30
|
+
end
|
31
|
+
|
32
|
+
record = raw_record.clone
|
33
|
+
|
34
|
+
# We replace the HTML excerpt with the textual one
|
35
|
+
record[:excerpt_html] = record[:excerpt_text]
|
36
|
+
return record if size(record) <= max_size
|
37
|
+
|
38
|
+
# We half the excerpts
|
39
|
+
excerpt_words = record[:excerpt_text].split(/\s+/)
|
40
|
+
shortened_excerpt = excerpt_words[0...excerpt_words.size / 2].join(' ')
|
41
|
+
record[:excerpt_text] = shortened_excerpt
|
42
|
+
record[:excerpt_html] = shortened_excerpt
|
43
|
+
return record if size(record) <= max_size
|
44
|
+
|
45
|
+
# We remove the excerpts completely
|
46
|
+
record.delete(:excerpt_text)
|
47
|
+
record.delete(:excerpt_html)
|
48
|
+
return record if size(record) <= max_size
|
49
|
+
|
50
|
+
# Still too big, we fail
|
51
|
+
stop_with_error(record)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Public: Stop the current indexing process and display details about the
|
55
|
+
# record that is too big to be pushed
|
56
|
+
#
|
57
|
+
# - record: The record causing the error
|
58
|
+
#
|
59
|
+
# This will display an error message and log the wrong record in a file in
|
60
|
+
# the source directory
|
61
|
+
def self.stop_with_error(record)
|
62
|
+
record_size = size(record)
|
63
|
+
record_size_readable = Filesize.from("#{record_size}B").to_s('Kb')
|
64
|
+
max_record_size = Configurator.algolia('max_record_size')
|
65
|
+
max_record_size_readable = Filesize
|
66
|
+
.from("#{max_record_size}B").to_s('Kb')
|
67
|
+
|
68
|
+
probable_wrong_keys = readable_largest_record_keys(record)
|
69
|
+
|
70
|
+
# Writing the full record to disk for inspection
|
71
|
+
record_log_path = Logger.write_to_file(
|
72
|
+
'jekyll-algolia-record-too-big.log',
|
73
|
+
JSON.pretty_generate(record)
|
74
|
+
)
|
75
|
+
|
76
|
+
details = {
|
77
|
+
'object_title' => record[:title],
|
78
|
+
'object_url' => record[:url],
|
79
|
+
'probable_wrong_keys' => probable_wrong_keys,
|
80
|
+
'record_log_path' => record_log_path,
|
81
|
+
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
|
82
|
+
'record_size' => record_size_readable,
|
83
|
+
'max_record_size' => max_record_size_readable
|
84
|
+
}
|
85
|
+
|
86
|
+
Logger.known_message('record_too_big', details)
|
87
|
+
|
88
|
+
stop_process
|
89
|
+
end
|
90
|
+
|
91
|
+
# Public: Returns a string explaining which attributes are the largest in
|
92
|
+
# the record
|
93
|
+
#
|
94
|
+
# record - The record hash to analyze
|
95
|
+
def self.readable_largest_record_keys(record)
|
96
|
+
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
97
|
+
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
98
|
+
output = []
|
99
|
+
largest_keys.each do |key, size|
|
100
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
101
|
+
output << "#{key} (#{size})"
|
102
|
+
end
|
103
|
+
output.join(', ')
|
104
|
+
end
|
105
|
+
|
106
|
+
# Public: Stop the current process
|
107
|
+
def self.stop_process
|
108
|
+
exit 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|