jekyll-algolia 1.0.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +51 -30
  3. data/README.md +69 -27
  4. data/lib/errors/invalid_credentials.txt +12 -0
  5. data/lib/errors/invalid_index_name.txt +9 -0
  6. data/lib/errors/missing_api_key.txt +15 -0
  7. data/lib/errors/missing_application_id.txt +11 -0
  8. data/lib/errors/missing_index_name.txt +18 -0
  9. data/lib/errors/no_records_found.txt +14 -0
  10. data/lib/errors/record_too_big.txt +27 -0
  11. data/lib/errors/record_too_big_api.txt +10 -0
  12. data/lib/errors/settings_manually_edited.txt +17 -0
  13. data/lib/errors/too_many_records.txt +14 -0
  14. data/lib/errors/unknown_application_id.txt +16 -0
  15. data/lib/errors/unknown_settings.txt +12 -0
  16. data/lib/jekyll-algolia.rb +45 -60
  17. data/lib/jekyll/algolia/configurator.rb +137 -44
  18. data/lib/jekyll/algolia/error_handler.rb +36 -48
  19. data/lib/jekyll/algolia/extractor.rb +16 -6
  20. data/lib/jekyll/algolia/file_browser.rb +161 -68
  21. data/lib/jekyll/algolia/hooks.rb +18 -6
  22. data/lib/jekyll/algolia/indexer.rb +283 -145
  23. data/lib/jekyll/algolia/logger.rb +39 -8
  24. data/lib/jekyll/algolia/overwrites/githubpages-configuration.rb +32 -0
  25. data/lib/jekyll/algolia/overwrites/jekyll-algolia-site.rb +151 -0
  26. data/lib/jekyll/algolia/overwrites/jekyll-document.rb +13 -0
  27. data/lib/jekyll/algolia/overwrites/jekyll-paginate-pager.rb +20 -0
  28. data/lib/jekyll/algolia/overwrites/jekyll-tags-link.rb +33 -0
  29. data/lib/jekyll/algolia/progress_bar.rb +27 -0
  30. data/lib/jekyll/algolia/shrinker.rb +112 -0
  31. data/lib/jekyll/algolia/utils.rb +118 -2
  32. data/lib/jekyll/algolia/version.rb +1 -1
  33. data/lib/jekyll/commands/algolia.rb +3 -14
  34. metadata +75 -31
  35. data/errors/invalid_credentials.txt +0 -10
  36. data/errors/invalid_credentials_for_tmp_index.txt +0 -17
  37. data/errors/invalid_index_name.txt +0 -11
  38. data/errors/missing_api_key.txt +0 -17
  39. data/errors/missing_application_id.txt +0 -12
  40. data/errors/missing_index_name.txt +0 -19
  41. data/errors/no_records_found.txt +0 -20
  42. data/errors/record_too_big.txt +0 -25
  43. data/errors/unknown_application_id.txt +0 -20
  44. data/errors/unknown_settings.txt +0 -15
@@ -4,24 +4,44 @@ module Jekyll
4
4
  module Algolia
5
5
  # Display helpful error messages
6
6
  module Logger
7
+ # Public: Silence all Jekyll log output in this block
8
+ # Usage:
9
+ # Logger.silence do
10
+ # # whatever Jekyll code here
11
+ # end
12
+ #
13
+ # This is especially useful when Jekyll is too talkative about what is
14
+ # loggued. It works by redefining Jekyll.logger.write to a noop
15
+ # temporarily and re-attributing the original method once finished.
16
+ def self.silent
17
+ initial_method = Jekyll.logger.method(:write)
18
+ Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
19
+ begin
20
+ yield
21
+ ensure
22
+ Utils.monkey_patch(Jekyll.logger, :write, initial_method)
23
+ end
24
+ end
25
+
7
26
  # Public: Displays a log line
8
27
  #
9
28
  # line - Line to display. Expected to be of the following format:
10
29
  # "X:Your content"
11
30
  # Where X is either I, W or E for marking respectively an info, warning or
12
31
  # error display
13
- def self.log(line)
14
- type, content = /^(I|W|E):(.*)/.match(line).captures
32
+ def self.log(input)
33
+ type, content = /^(I|W|E):(.*)/m.match(input).captures
15
34
  logger_mapping = {
16
35
  'E' => :error,
17
36
  'I' => :info,
18
37
  'W' => :warn
19
38
  }
20
39
 
21
- # Jekyll logger tries to center log lines, so we force a consistent
22
- # width of 80 chars
23
- content = content.ljust(80, ' ')
24
- Jekyll.logger.send(logger_mapping[type], content)
40
+ # Display by chunk of 80-characters lines
41
+ lines = Utils.split_lines(content, 80)
42
+ lines.each do |line|
43
+ Jekyll.logger.send(logger_mapping[type], line)
44
+ end
25
45
  end
26
46
 
27
47
  # Public: Only display a log line if verbose mode is enabled
@@ -29,9 +49,20 @@ module Jekyll
29
49
  # line - The line to display, following the same format as .log
30
50
  def self.verbose(line)
31
51
  return unless Configurator.verbose?
52
+
32
53
  log(line)
33
54
  end
34
55
 
56
+ # Public: Write the specified content to a file in the source directory
57
+ #
58
+ # filename - the file basename
59
+ # content - the actual content of the file
60
+ def self.write_to_file(filename, content)
61
+ filepath = File.join(Configurator.get('source'), filename)
62
+ File.write(filepath, content)
63
+ filepath
64
+ end
65
+
35
66
  # Public: Displays a helpful error message for one of the knows errors
36
67
  #
37
68
  # message_id: A string identifying a know message
@@ -42,14 +73,14 @@ module Jekyll
42
73
  def self.known_message(message_id, metadata = {})
43
74
  file = File.expand_path(
44
75
  File.join(
45
- __dir__, '../../..', 'errors', "#{message_id}.txt"
76
+ __dir__, '../..', 'errors', "#{message_id}.txt"
46
77
  )
47
78
  )
48
79
 
49
80
  # Convert all variables
50
81
  content = File.open(file).read
51
82
  metadata.each do |key, value|
52
- content = content.gsub("{#{key}}", value)
83
+ content = content.gsub("{#{key}}", value.to_s)
53
84
  end
54
85
 
55
86
  # Display each line differently
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GitHubPages
4
+ # The github-pages gem will automatically disable every plugin that is not in
5
+ # the whitelist of plugins allowed by GitHub. This includes any plugin defined
6
+ # in the `_plugins` folder as well.
7
+ #
8
+ # Users of the jekyll-algolia plugin will use custom plugins in _plugins to
9
+ # define custom hooks to modify the indexing. If they happen to have the
10
+ # github-pages gem installed at the same time, those hooks will never be
11
+ # executed.
12
+ #
13
+ # The GitHub Pages gem prevent access to custom plugins by doing two things:
14
+ # - forcing safe mode
15
+ # - loading custom plugins from a random dir
16
+ #
17
+ # We cancel those by disabling safe mode and forcing back plugins to be read
18
+ # from ./_plugins.
19
+ #
20
+ # This file will only be loaded when running `jekyll algolia`, so it won't
21
+ # interfere with the regular usage of `jekyll build`
22
+ class Configuration
23
+ class << self
24
+ def set!(site)
25
+ config = effective_config(site.config)
26
+ config['safe'] = false
27
+ config['plugins_dir'] = '_plugins'
28
+ site.config = config
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module Algolia
5
+ # A Jekyll::Site subclass that overrides process from the parent class to
6
+ # create JSON records out of rendered documents and push those records to
7
+ # Algolia instead of writing files to disk.
8
+ class Site < Jekyll::Site
9
+ # We expose a way to reset the collection, as it will be needed in the
10
+ # tests
11
+ attr_writer :collections
12
+
13
+ attr_reader :original_site_files
14
+
15
+ # Public: Overwriting the parent method
16
+ #
17
+ # This will prepare the website, gathering all files, excluding the one we
18
+ # don't need to index, then render them (converting to HTML), the finally
19
+ # calling `push` to push to Algolia
20
+ def process
21
+ # Default Jekyll preflight
22
+ reset
23
+ read
24
+ generate
25
+
26
+ # Removing all files that won't be indexed, so we don't waste time
27
+ # rendering them
28
+ keep_only_indexable_files
29
+
30
+ # Starting the rendering progress bar
31
+ init_rendering_progress_bar
32
+
33
+ # Converting them to HTML
34
+ render
35
+
36
+ # Pushing them Algolia
37
+ push
38
+ end
39
+
40
+ # Public: Return the number of pages/documents to index
41
+ def indexable_item_count
42
+ count = @pages.length
43
+ @collections.each_value { |collection| count += collection.docs.length }
44
+ count
45
+ end
46
+
47
+ # Public: Init the rendering progress bar, incrementing it for each
48
+ # rendered item
49
+ #
50
+ # This uses Jekyll post_render hooks, listening to both pages and
51
+ # documents
52
+ def init_rendering_progress_bar
53
+ progress_bar = ProgressBar.create(
54
+ total: indexable_item_count,
55
+ format: 'Rendering to HTML (%j%%) |%B|'
56
+ )
57
+ Jekyll::Hooks.register [:pages, :documents], :post_render do
58
+ progress_bar.increment
59
+ end
60
+ end
61
+
62
+ # Public: Filtering a list of items to only keep the one that are
63
+ # indexable.
64
+ #
65
+ # items - List of Pages/Documents
66
+ #
67
+ # Note: It also sets the layout to nil, to further speed up the rendering
68
+ def indexable_list(items)
69
+ new_list = []
70
+ items.each do |item|
71
+ next unless FileBrowser.indexable?(item)
72
+
73
+ item.data = {} if item.data.nil?
74
+ item.data['layout'] = nil
75
+ new_list << item
76
+ end
77
+ new_list
78
+ end
79
+
80
+ # Public: Removing non-indexable Pages, Posts and Documents from the
81
+ # internals
82
+ def keep_only_indexable_files
83
+ @original_site_files = {
84
+ pages: @pages,
85
+ collections: @collections,
86
+ static_files: @static_files
87
+ }
88
+
89
+ @pages = indexable_list(@pages)
90
+
91
+ # Applying to each collections
92
+ @collections.each_value do |collection|
93
+ collection.docs = indexable_list(collection.docs)
94
+ end
95
+
96
+ # Remove all static files
97
+ @static_files = []
98
+ end
99
+
100
+ # Public: Extract records from every file and index them
101
+ def push
102
+ records = []
103
+ files = []
104
+ progress_bar = ProgressBar.create(
105
+ total: indexable_item_count,
106
+ format: 'Extracting records (%j%%) |%B|'
107
+ )
108
+ each_site_file do |file|
109
+ # Even if we cleared the list of documents/pages beforehand, some
110
+ # files might still sneak up to this point (like static files added to
111
+ # a collection directory), so we check again if they can really be
112
+ # indexed.
113
+ next unless FileBrowser.indexable?(file)
114
+
115
+ path = FileBrowser.relative_path(file.path)
116
+
117
+ Logger.verbose("I:Extracting records from #{path}")
118
+ file_records = Extractor.run(file)
119
+
120
+ files << file
121
+ records += file_records
122
+
123
+ progress_bar.increment
124
+ end
125
+
126
+ # Applying the user hook on the whole list of records
127
+ records = Hooks.apply_all(records, self)
128
+
129
+ # Shrinking records to force them to fit under the max record size
130
+ # limit, or displaying an error message if not possible
131
+ max_record_size = Configurator.algolia('max_record_size')
132
+ # We take into account the objectID that will be added in the form of:
133
+ # "objectID": "16cd998991cc40d92402b0b4e6c55e8a"
134
+ object_id_attribute_length = 46
135
+ max_record_size -= object_id_attribute_length
136
+ records.map! do |record|
137
+ Shrinker.fit_to_size(record, max_record_size)
138
+ end
139
+
140
+ # Adding a unique objectID to each record
141
+ records.map! do |record|
142
+ Extractor.add_unique_object_id(record)
143
+ end
144
+
145
+ Logger.verbose("I:Found #{files.length} files")
146
+
147
+ Indexer.run(records)
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ # Overwriting the Jekyll::Document class
5
+ class Document
6
+ # By default, Jekyll will set the current date (time of build) to any
7
+ # collection item. This will break our diff algorithm, so we monkey patch
8
+ # this call to return nil if no date is defined instead.
9
+ def date
10
+ data['date'] || nil
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module Paginate
5
+ # Disable pagination from jekyll-paginate
6
+ #
7
+ # This plugin will create pages that contain a list of all items to
8
+ # paginate. Those pages won't contain any interesting data to be indexed
9
+ # (as it will be duplicated content of the real pages), but will still
10
+ # take time to generate.
11
+ #
12
+ # By monkey-patching the plugin, we force it to be disabled
13
+ # https://github.com/jekyll/jekyll-paginate/blob/master/lib/jekyll-paginate/pager.rb#L22
14
+ class Pager
15
+ def self.pagination_enabled?(_site)
16
+ false
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # The default `link` tag allow to link to a specific page, using its relative
4
+ # path. Because we might not be indexing the destination of the link, we might
5
+ # not have the representation of the page in our data. If that happens, the
6
+ # `link` tag fails.
7
+ #
8
+ # To fix that we'll overwrite the default `link` tag to loop over a backup copy
9
+ # of the original files (before we clean it for indexing)
10
+ #
11
+ # https://github.com/algolia/jekyll-algolia/issues/62
12
+ class JekyllAlgoliaLink < Jekyll::Tags::Link
13
+ def render(context)
14
+ original_files = context.registers[:site].original_site_files
15
+
16
+ original_files[:pages].each do |page|
17
+ return page.url if page.relative_path == @relative_path
18
+ end
19
+
20
+ original_files[:collections].each_value do |collection|
21
+ collection.docs.each do |item|
22
+ return item.url if item.relative_path == @relative_path
23
+ end
24
+ end
25
+
26
+ original_files[:static_files].each do |asset|
27
+ return asset.url if asset.relative_path == @relative_path
28
+ return asset.url if asset.relative_path == "/#{@relative_path}"
29
+ end
30
+
31
+ '/'
32
+ end
33
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'progressbar'
4
+ require 'ostruct'
5
+
6
+ module Jekyll
7
+ module Algolia
8
+ # Module to push records to Algolia and configure the index
9
+ module ProgressBar
10
+ include Jekyll::Algolia
11
+
12
+ def self.should_be_silenced?
13
+ Configurator.verbose?
14
+ end
15
+
16
+ def self.create(options)
17
+ if should_be_silenced?
18
+ fake_bar = OpenStruct.new
19
+ fake_bar.increment = nil
20
+ return fake_bar
21
+ end
22
+
23
+ ::ProgressBar.create(options)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ module Jekyll
5
+ module Algolia
6
+ # Module to shrink a record so it fits in the plan quotas
7
+ module Shrinker
8
+ include Jekyll::Algolia
9
+
10
+ # Public: Get the byte size of the object once converted to JSON
11
+ # - record: The record to estimate
12
+ def self.size(record)
13
+ record.to_json.bytesize
14
+ end
15
+
16
+ # Public: Attempt to reduce the size of the record by reducing the size of
17
+ # the less needed attributes
18
+ #
19
+ # - raw_record: The record to attempt to reduce
20
+ # - max_size: The max size to achieve in bytes
21
+ #
22
+ # The excerpts are the attributes most subject to being reduced. We'll go
23
+ # as far as removing them if there is no other choice.
24
+ def self.fit_to_size(raw_record, max_size)
25
+ return raw_record if size(raw_record) <= max_size
26
+
27
+ # No excerpt, we can't shrink it
28
+ if !raw_record.key?(:excerpt_html) || !raw_record.key?(:excerpt_text)
29
+ return stop_with_error(raw_record)
30
+ end
31
+
32
+ record = raw_record.clone
33
+
34
+ # We replace the HTML excerpt with the textual one
35
+ record[:excerpt_html] = record[:excerpt_text]
36
+ return record if size(record) <= max_size
37
+
38
+ # We half the excerpts
39
+ excerpt_words = record[:excerpt_text].split(/\s+/)
40
+ shortened_excerpt = excerpt_words[0...excerpt_words.size / 2].join(' ')
41
+ record[:excerpt_text] = shortened_excerpt
42
+ record[:excerpt_html] = shortened_excerpt
43
+ return record if size(record) <= max_size
44
+
45
+ # We remove the excerpts completely
46
+ record.delete(:excerpt_text)
47
+ record.delete(:excerpt_html)
48
+ return record if size(record) <= max_size
49
+
50
+ # Still too big, we fail
51
+ stop_with_error(record)
52
+ end
53
+
54
+ # Public: Stop the current indexing process and display details about the
55
+ # record that is too big to be pushed
56
+ #
57
+ # - record: The record causing the error
58
+ #
59
+ # This will display an error message and log the wrong record in a file in
60
+ # the source directory
61
+ def self.stop_with_error(record)
62
+ record_size = size(record)
63
+ record_size_readable = Filesize.from("#{record_size}B").to_s('Kb')
64
+ max_record_size = Configurator.algolia('max_record_size')
65
+ max_record_size_readable = Filesize
66
+ .from("#{max_record_size}B").to_s('Kb')
67
+
68
+ probable_wrong_keys = readable_largest_record_keys(record)
69
+
70
+ # Writing the full record to disk for inspection
71
+ record_log_path = Logger.write_to_file(
72
+ 'jekyll-algolia-record-too-big.log',
73
+ JSON.pretty_generate(record)
74
+ )
75
+
76
+ details = {
77
+ 'object_title' => record[:title],
78
+ 'object_url' => record[:url],
79
+ 'probable_wrong_keys' => probable_wrong_keys,
80
+ 'record_log_path' => record_log_path,
81
+ 'nodes_to_index' => Configurator.algolia('nodes_to_index'),
82
+ 'record_size' => record_size_readable,
83
+ 'max_record_size' => max_record_size_readable
84
+ }
85
+
86
+ Logger.known_message('record_too_big', details)
87
+
88
+ stop_process
89
+ end
90
+
91
+ # Public: Returns a string explaining which attributes are the largest in
92
+ # the record
93
+ #
94
+ # record - The record hash to analyze
95
+ def self.readable_largest_record_keys(record)
96
+ keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
97
+ largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
98
+ output = []
99
+ largest_keys.each do |key, size|
100
+ size = Filesize.from("#{size} B").to_s('Kb')
101
+ output << "#{key} (#{size})"
102
+ end
103
+ output.join(', ')
104
+ end
105
+
106
+ # Public: Stop the current process
107
+ def self.stop_process
108
+ exit 1
109
+ end
110
+ end
111
+ end
112
+ end