jekyll-algolia 1.0.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +51 -30
- data/README.md +69 -27
- data/lib/errors/invalid_credentials.txt +12 -0
- data/lib/errors/invalid_index_name.txt +9 -0
- data/lib/errors/missing_api_key.txt +15 -0
- data/lib/errors/missing_application_id.txt +11 -0
- data/lib/errors/missing_index_name.txt +18 -0
- data/lib/errors/no_records_found.txt +14 -0
- data/lib/errors/record_too_big.txt +27 -0
- data/lib/errors/record_too_big_api.txt +10 -0
- data/lib/errors/settings_manually_edited.txt +17 -0
- data/lib/errors/too_many_records.txt +14 -0
- data/lib/errors/unknown_application_id.txt +16 -0
- data/lib/errors/unknown_settings.txt +12 -0
- data/lib/jekyll-algolia.rb +45 -60
- data/lib/jekyll/algolia/configurator.rb +137 -44
- data/lib/jekyll/algolia/error_handler.rb +36 -48
- data/lib/jekyll/algolia/extractor.rb +16 -6
- data/lib/jekyll/algolia/file_browser.rb +161 -68
- data/lib/jekyll/algolia/hooks.rb +18 -6
- data/lib/jekyll/algolia/indexer.rb +283 -145
- data/lib/jekyll/algolia/logger.rb +39 -8
- data/lib/jekyll/algolia/overwrites/githubpages-configuration.rb +32 -0
- data/lib/jekyll/algolia/overwrites/jekyll-algolia-site.rb +151 -0
- data/lib/jekyll/algolia/overwrites/jekyll-document.rb +13 -0
- data/lib/jekyll/algolia/overwrites/jekyll-paginate-pager.rb +20 -0
- data/lib/jekyll/algolia/overwrites/jekyll-tags-link.rb +33 -0
- data/lib/jekyll/algolia/progress_bar.rb +27 -0
- data/lib/jekyll/algolia/shrinker.rb +112 -0
- data/lib/jekyll/algolia/utils.rb +118 -2
- data/lib/jekyll/algolia/version.rb +1 -1
- data/lib/jekyll/commands/algolia.rb +3 -14
- metadata +75 -31
- data/errors/invalid_credentials.txt +0 -10
- data/errors/invalid_credentials_for_tmp_index.txt +0 -17
- data/errors/invalid_index_name.txt +0 -11
- data/errors/missing_api_key.txt +0 -17
- data/errors/missing_application_id.txt +0 -12
- data/errors/missing_index_name.txt +0 -19
- data/errors/no_records_found.txt +0 -20
- data/errors/record_too_big.txt +0 -25
- data/errors/unknown_application_id.txt +0 -20
- data/errors/unknown_settings.txt +0 -15
@@ -4,24 +4,44 @@ module Jekyll
|
|
4
4
|
module Algolia
|
5
5
|
# Display helpful error messages
|
6
6
|
module Logger
|
7
|
+
# Public: Silence all Jekyll log output in this block
|
8
|
+
# Usage:
|
9
|
+
# Logger.silence do
|
10
|
+
# # whatever Jekyll code here
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# This is especially useful when Jekyll is too talkative about what is
|
14
|
+
# loggued. It works by redefining Jekyll.logger.write to a noop
|
15
|
+
# temporarily and re-attributing the original method once finished.
|
16
|
+
def self.silent
|
17
|
+
initial_method = Jekyll.logger.method(:write)
|
18
|
+
Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
|
19
|
+
begin
|
20
|
+
yield
|
21
|
+
ensure
|
22
|
+
Utils.monkey_patch(Jekyll.logger, :write, initial_method)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
7
26
|
# Public: Displays a log line
|
8
27
|
#
|
9
28
|
# line - Line to display. Expected to be of the following format:
|
10
29
|
# "X:Your content"
|
11
30
|
# Where X is either I, W or E for marking respectively an info, warning or
|
12
31
|
# error display
|
13
|
-
def self.log(
|
14
|
-
type, content = /^(I|W|E):(.*)
|
32
|
+
def self.log(input)
|
33
|
+
type, content = /^(I|W|E):(.*)/m.match(input).captures
|
15
34
|
logger_mapping = {
|
16
35
|
'E' => :error,
|
17
36
|
'I' => :info,
|
18
37
|
'W' => :warn
|
19
38
|
}
|
20
39
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
40
|
+
# Display by chunk of 80-characters lines
|
41
|
+
lines = Utils.split_lines(content, 80)
|
42
|
+
lines.each do |line|
|
43
|
+
Jekyll.logger.send(logger_mapping[type], line)
|
44
|
+
end
|
25
45
|
end
|
26
46
|
|
27
47
|
# Public: Only display a log line if verbose mode is enabled
|
@@ -29,9 +49,20 @@ module Jekyll
|
|
29
49
|
# line - The line to display, following the same format as .log
|
30
50
|
def self.verbose(line)
|
31
51
|
return unless Configurator.verbose?
|
52
|
+
|
32
53
|
log(line)
|
33
54
|
end
|
34
55
|
|
56
|
+
# Public: Write the specified content to a file in the source directory
|
57
|
+
#
|
58
|
+
# filename - the file basename
|
59
|
+
# content - the actual content of the file
|
60
|
+
def self.write_to_file(filename, content)
|
61
|
+
filepath = File.join(Configurator.get('source'), filename)
|
62
|
+
File.write(filepath, content)
|
63
|
+
filepath
|
64
|
+
end
|
65
|
+
|
35
66
|
# Public: Displays a helpful error message for one of the knows errors
|
36
67
|
#
|
37
68
|
# message_id: A string identifying a know message
|
@@ -42,14 +73,14 @@ module Jekyll
|
|
42
73
|
def self.known_message(message_id, metadata = {})
|
43
74
|
file = File.expand_path(
|
44
75
|
File.join(
|
45
|
-
__dir__, '
|
76
|
+
__dir__, '../..', 'errors', "#{message_id}.txt"
|
46
77
|
)
|
47
78
|
)
|
48
79
|
|
49
80
|
# Convert all variables
|
50
81
|
content = File.open(file).read
|
51
82
|
metadata.each do |key, value|
|
52
|
-
content = content.gsub("{#{key}}", value)
|
83
|
+
content = content.gsub("{#{key}}", value.to_s)
|
53
84
|
end
|
54
85
|
|
55
86
|
# Display each line differently
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GitHubPages
|
4
|
+
# The github-pages gem will automatically disable every plugin that is not in
|
5
|
+
# the whitelist of plugins allowed by GitHub. This includes any plugin defined
|
6
|
+
# in the `_plugins` folder as well.
|
7
|
+
#
|
8
|
+
# Users of the jekyll-algolia plugin will use custom plugins in _plugins to
|
9
|
+
# define custom hooks to modify the indexing. If they happen to have the
|
10
|
+
# github-pages gem installed at the same time, those hooks will never be
|
11
|
+
# executed.
|
12
|
+
#
|
13
|
+
# The GitHub Pages gem prevent access to custom plugins by doing two things:
|
14
|
+
# - forcing safe mode
|
15
|
+
# - loading custom plugins from a random dir
|
16
|
+
#
|
17
|
+
# We cancel those by disabling safe mode and forcing back plugins to be read
|
18
|
+
# from ./_plugins.
|
19
|
+
#
|
20
|
+
# This file will only be loaded when running `jekyll algolia`, so it won't
|
21
|
+
# interfere with the regular usage of `jekyll build`
|
22
|
+
class Configuration
|
23
|
+
class << self
|
24
|
+
def set!(site)
|
25
|
+
config = effective_config(site.config)
|
26
|
+
config['safe'] = false
|
27
|
+
config['plugins_dir'] = '_plugins'
|
28
|
+
site.config = config
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module Algolia
|
5
|
+
# A Jekyll::Site subclass that overrides process from the parent class to
|
6
|
+
# create JSON records out of rendered documents and push those records to
|
7
|
+
# Algolia instead of writing files to disk.
|
8
|
+
class Site < Jekyll::Site
|
9
|
+
# We expose a way to reset the collection, as it will be needed in the
|
10
|
+
# tests
|
11
|
+
attr_writer :collections
|
12
|
+
|
13
|
+
attr_reader :original_site_files
|
14
|
+
|
15
|
+
# Public: Overwriting the parent method
|
16
|
+
#
|
17
|
+
# This will prepare the website, gathering all files, excluding the one we
|
18
|
+
# don't need to index, then render them (converting to HTML), the finally
|
19
|
+
# calling `push` to push to Algolia
|
20
|
+
def process
|
21
|
+
# Default Jekyll preflight
|
22
|
+
reset
|
23
|
+
read
|
24
|
+
generate
|
25
|
+
|
26
|
+
# Removing all files that won't be indexed, so we don't waste time
|
27
|
+
# rendering them
|
28
|
+
keep_only_indexable_files
|
29
|
+
|
30
|
+
# Starting the rendering progress bar
|
31
|
+
init_rendering_progress_bar
|
32
|
+
|
33
|
+
# Converting them to HTML
|
34
|
+
render
|
35
|
+
|
36
|
+
# Pushing them Algolia
|
37
|
+
push
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: Return the number of pages/documents to index
|
41
|
+
def indexable_item_count
|
42
|
+
count = @pages.length
|
43
|
+
@collections.each_value { |collection| count += collection.docs.length }
|
44
|
+
count
|
45
|
+
end
|
46
|
+
|
47
|
+
# Public: Init the rendering progress bar, incrementing it for each
|
48
|
+
# rendered item
|
49
|
+
#
|
50
|
+
# This uses Jekyll post_render hooks, listening to both pages and
|
51
|
+
# documents
|
52
|
+
def init_rendering_progress_bar
|
53
|
+
progress_bar = ProgressBar.create(
|
54
|
+
total: indexable_item_count,
|
55
|
+
format: 'Rendering to HTML (%j%%) |%B|'
|
56
|
+
)
|
57
|
+
Jekyll::Hooks.register [:pages, :documents], :post_render do
|
58
|
+
progress_bar.increment
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Public: Filtering a list of items to only keep the one that are
|
63
|
+
# indexable.
|
64
|
+
#
|
65
|
+
# items - List of Pages/Documents
|
66
|
+
#
|
67
|
+
# Note: It also sets the layout to nil, to further speed up the rendering
|
68
|
+
def indexable_list(items)
|
69
|
+
new_list = []
|
70
|
+
items.each do |item|
|
71
|
+
next unless FileBrowser.indexable?(item)
|
72
|
+
|
73
|
+
item.data = {} if item.data.nil?
|
74
|
+
item.data['layout'] = nil
|
75
|
+
new_list << item
|
76
|
+
end
|
77
|
+
new_list
|
78
|
+
end
|
79
|
+
|
80
|
+
# Public: Removing non-indexable Pages, Posts and Documents from the
|
81
|
+
# internals
|
82
|
+
def keep_only_indexable_files
|
83
|
+
@original_site_files = {
|
84
|
+
pages: @pages,
|
85
|
+
collections: @collections,
|
86
|
+
static_files: @static_files
|
87
|
+
}
|
88
|
+
|
89
|
+
@pages = indexable_list(@pages)
|
90
|
+
|
91
|
+
# Applying to each collections
|
92
|
+
@collections.each_value do |collection|
|
93
|
+
collection.docs = indexable_list(collection.docs)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Remove all static files
|
97
|
+
@static_files = []
|
98
|
+
end
|
99
|
+
|
100
|
+
# Public: Extract records from every file and index them
|
101
|
+
def push
|
102
|
+
records = []
|
103
|
+
files = []
|
104
|
+
progress_bar = ProgressBar.create(
|
105
|
+
total: indexable_item_count,
|
106
|
+
format: 'Extracting records (%j%%) |%B|'
|
107
|
+
)
|
108
|
+
each_site_file do |file|
|
109
|
+
# Even if we cleared the list of documents/pages beforehand, some
|
110
|
+
# files might still sneak up to this point (like static files added to
|
111
|
+
# a collection directory), so we check again if they can really be
|
112
|
+
# indexed.
|
113
|
+
next unless FileBrowser.indexable?(file)
|
114
|
+
|
115
|
+
path = FileBrowser.relative_path(file.path)
|
116
|
+
|
117
|
+
Logger.verbose("I:Extracting records from #{path}")
|
118
|
+
file_records = Extractor.run(file)
|
119
|
+
|
120
|
+
files << file
|
121
|
+
records += file_records
|
122
|
+
|
123
|
+
progress_bar.increment
|
124
|
+
end
|
125
|
+
|
126
|
+
# Applying the user hook on the whole list of records
|
127
|
+
records = Hooks.apply_all(records, self)
|
128
|
+
|
129
|
+
# Shrinking records to force them to fit under the max record size
|
130
|
+
# limit, or displaying an error message if not possible
|
131
|
+
max_record_size = Configurator.algolia('max_record_size')
|
132
|
+
# We take into account the objectID that will be added in the form of:
|
133
|
+
# "objectID": "16cd998991cc40d92402b0b4e6c55e8a"
|
134
|
+
object_id_attribute_length = 46
|
135
|
+
max_record_size -= object_id_attribute_length
|
136
|
+
records.map! do |record|
|
137
|
+
Shrinker.fit_to_size(record, max_record_size)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Adding a unique objectID to each record
|
141
|
+
records.map! do |record|
|
142
|
+
Extractor.add_unique_object_id(record)
|
143
|
+
end
|
144
|
+
|
145
|
+
Logger.verbose("I:Found #{files.length} files")
|
146
|
+
|
147
|
+
Indexer.run(records)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
# Overwriting the Jekyll::Document class
|
5
|
+
class Document
|
6
|
+
# By default, Jekyll will set the current date (time of build) to any
|
7
|
+
# collection item. This will break our diff algorithm, so we monkey patch
|
8
|
+
# this call to return nil if no date is defined instead.
|
9
|
+
def date
|
10
|
+
data['date'] || nil
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Jekyll
|
4
|
+
module Paginate
|
5
|
+
# Disable pagination from jekyll-paginate
|
6
|
+
#
|
7
|
+
# This plugin will create pages that contain a list of all items to
|
8
|
+
# paginate. Those pages won't contain any interesting data to be indexed
|
9
|
+
# (as it will be duplicated content of the real pages), but will still
|
10
|
+
# take time to generate.
|
11
|
+
#
|
12
|
+
# By monkey-patching the plugin, we force it to be disabled
|
13
|
+
# https://github.com/jekyll/jekyll-paginate/blob/master/lib/jekyll-paginate/pager.rb#L22
|
14
|
+
class Pager
|
15
|
+
def self.pagination_enabled?(_site)
|
16
|
+
false
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# The default `link` tag allow to link to a specific page, using its relative
|
4
|
+
# path. Because we might not be indexing the destination of the link, we might
|
5
|
+
# not have the representation of the page in our data. If that happens, the
|
6
|
+
# `link` tag fails.
|
7
|
+
#
|
8
|
+
# To fix that we'll overwrite the default `link` tag to loop over a backup copy
|
9
|
+
# of the original files (before we clean it for indexing)
|
10
|
+
#
|
11
|
+
# https://github.com/algolia/jekyll-algolia/issues/62
|
12
|
+
class JekyllAlgoliaLink < Jekyll::Tags::Link
|
13
|
+
def render(context)
|
14
|
+
original_files = context.registers[:site].original_site_files
|
15
|
+
|
16
|
+
original_files[:pages].each do |page|
|
17
|
+
return page.url if page.relative_path == @relative_path
|
18
|
+
end
|
19
|
+
|
20
|
+
original_files[:collections].each_value do |collection|
|
21
|
+
collection.docs.each do |item|
|
22
|
+
return item.url if item.relative_path == @relative_path
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
original_files[:static_files].each do |asset|
|
27
|
+
return asset.url if asset.relative_path == @relative_path
|
28
|
+
return asset.url if asset.relative_path == "/#{@relative_path}"
|
29
|
+
end
|
30
|
+
|
31
|
+
'/'
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'progressbar'
|
4
|
+
require 'ostruct'
|
5
|
+
|
6
|
+
module Jekyll
|
7
|
+
module Algolia
|
8
|
+
# Module to push records to Algolia and configure the index
|
9
|
+
module ProgressBar
|
10
|
+
include Jekyll::Algolia
|
11
|
+
|
12
|
+
def self.should_be_silenced?
|
13
|
+
Configurator.verbose?
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.create(options)
|
17
|
+
if should_be_silenced?
|
18
|
+
fake_bar = OpenStruct.new
|
19
|
+
fake_bar.increment = nil
|
20
|
+
return fake_bar
|
21
|
+
end
|
22
|
+
|
23
|
+
::ProgressBar.create(options)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
module Jekyll
|
5
|
+
module Algolia
|
6
|
+
# Module to shrink a record so it fits in the plan quotas
|
7
|
+
module Shrinker
|
8
|
+
include Jekyll::Algolia
|
9
|
+
|
10
|
+
# Public: Get the byte size of the object once converted to JSON
|
11
|
+
# - record: The record to estimate
|
12
|
+
def self.size(record)
|
13
|
+
record.to_json.bytesize
|
14
|
+
end
|
15
|
+
|
16
|
+
# Public: Attempt to reduce the size of the record by reducing the size of
|
17
|
+
# the less needed attributes
|
18
|
+
#
|
19
|
+
# - raw_record: The record to attempt to reduce
|
20
|
+
# - max_size: The max size to achieve in bytes
|
21
|
+
#
|
22
|
+
# The excerpts are the attributes most subject to being reduced. We'll go
|
23
|
+
# as far as removing them if there is no other choice.
|
24
|
+
def self.fit_to_size(raw_record, max_size)
|
25
|
+
return raw_record if size(raw_record) <= max_size
|
26
|
+
|
27
|
+
# No excerpt, we can't shrink it
|
28
|
+
if !raw_record.key?(:excerpt_html) || !raw_record.key?(:excerpt_text)
|
29
|
+
return stop_with_error(raw_record)
|
30
|
+
end
|
31
|
+
|
32
|
+
record = raw_record.clone
|
33
|
+
|
34
|
+
# We replace the HTML excerpt with the textual one
|
35
|
+
record[:excerpt_html] = record[:excerpt_text]
|
36
|
+
return record if size(record) <= max_size
|
37
|
+
|
38
|
+
# We half the excerpts
|
39
|
+
excerpt_words = record[:excerpt_text].split(/\s+/)
|
40
|
+
shortened_excerpt = excerpt_words[0...excerpt_words.size / 2].join(' ')
|
41
|
+
record[:excerpt_text] = shortened_excerpt
|
42
|
+
record[:excerpt_html] = shortened_excerpt
|
43
|
+
return record if size(record) <= max_size
|
44
|
+
|
45
|
+
# We remove the excerpts completely
|
46
|
+
record.delete(:excerpt_text)
|
47
|
+
record.delete(:excerpt_html)
|
48
|
+
return record if size(record) <= max_size
|
49
|
+
|
50
|
+
# Still too big, we fail
|
51
|
+
stop_with_error(record)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Public: Stop the current indexing process and display details about the
|
55
|
+
# record that is too big to be pushed
|
56
|
+
#
|
57
|
+
# - record: The record causing the error
|
58
|
+
#
|
59
|
+
# This will display an error message and log the wrong record in a file in
|
60
|
+
# the source directory
|
61
|
+
def self.stop_with_error(record)
|
62
|
+
record_size = size(record)
|
63
|
+
record_size_readable = Filesize.from("#{record_size}B").to_s('Kb')
|
64
|
+
max_record_size = Configurator.algolia('max_record_size')
|
65
|
+
max_record_size_readable = Filesize
|
66
|
+
.from("#{max_record_size}B").to_s('Kb')
|
67
|
+
|
68
|
+
probable_wrong_keys = readable_largest_record_keys(record)
|
69
|
+
|
70
|
+
# Writing the full record to disk for inspection
|
71
|
+
record_log_path = Logger.write_to_file(
|
72
|
+
'jekyll-algolia-record-too-big.log',
|
73
|
+
JSON.pretty_generate(record)
|
74
|
+
)
|
75
|
+
|
76
|
+
details = {
|
77
|
+
'object_title' => record[:title],
|
78
|
+
'object_url' => record[:url],
|
79
|
+
'probable_wrong_keys' => probable_wrong_keys,
|
80
|
+
'record_log_path' => record_log_path,
|
81
|
+
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
|
82
|
+
'record_size' => record_size_readable,
|
83
|
+
'max_record_size' => max_record_size_readable
|
84
|
+
}
|
85
|
+
|
86
|
+
Logger.known_message('record_too_big', details)
|
87
|
+
|
88
|
+
stop_process
|
89
|
+
end
|
90
|
+
|
91
|
+
# Public: Returns a string explaining which attributes are the largest in
|
92
|
+
# the record
|
93
|
+
#
|
94
|
+
# record - The record hash to analyze
|
95
|
+
def self.readable_largest_record_keys(record)
|
96
|
+
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
97
|
+
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
98
|
+
output = []
|
99
|
+
largest_keys.each do |key, size|
|
100
|
+
size = Filesize.from("#{size} B").to_s('Kb')
|
101
|
+
output << "#{key} (#{size})"
|
102
|
+
end
|
103
|
+
output.join(', ')
|
104
|
+
end
|
105
|
+
|
106
|
+
# Public: Stop the current process
|
107
|
+
def self.stop_process
|
108
|
+
exit 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|