jekyll-algolia 1.0.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTING.md +51 -30
  3. data/README.md +69 -27
  4. data/lib/errors/invalid_credentials.txt +12 -0
  5. data/lib/errors/invalid_index_name.txt +9 -0
  6. data/lib/errors/missing_api_key.txt +15 -0
  7. data/lib/errors/missing_application_id.txt +11 -0
  8. data/lib/errors/missing_index_name.txt +18 -0
  9. data/lib/errors/no_records_found.txt +14 -0
  10. data/lib/errors/record_too_big.txt +27 -0
  11. data/lib/errors/record_too_big_api.txt +10 -0
  12. data/lib/errors/settings_manually_edited.txt +17 -0
  13. data/lib/errors/too_many_records.txt +14 -0
  14. data/lib/errors/unknown_application_id.txt +16 -0
  15. data/lib/errors/unknown_settings.txt +12 -0
  16. data/lib/jekyll-algolia.rb +45 -60
  17. data/lib/jekyll/algolia/configurator.rb +137 -44
  18. data/lib/jekyll/algolia/error_handler.rb +36 -48
  19. data/lib/jekyll/algolia/extractor.rb +16 -6
  20. data/lib/jekyll/algolia/file_browser.rb +161 -68
  21. data/lib/jekyll/algolia/hooks.rb +18 -6
  22. data/lib/jekyll/algolia/indexer.rb +283 -145
  23. data/lib/jekyll/algolia/logger.rb +39 -8
  24. data/lib/jekyll/algolia/overwrites/githubpages-configuration.rb +32 -0
  25. data/lib/jekyll/algolia/overwrites/jekyll-algolia-site.rb +151 -0
  26. data/lib/jekyll/algolia/overwrites/jekyll-document.rb +13 -0
  27. data/lib/jekyll/algolia/overwrites/jekyll-paginate-pager.rb +20 -0
  28. data/lib/jekyll/algolia/overwrites/jekyll-tags-link.rb +33 -0
  29. data/lib/jekyll/algolia/progress_bar.rb +27 -0
  30. data/lib/jekyll/algolia/shrinker.rb +112 -0
  31. data/lib/jekyll/algolia/utils.rb +118 -2
  32. data/lib/jekyll/algolia/version.rb +1 -1
  33. data/lib/jekyll/commands/algolia.rb +3 -14
  34. metadata +75 -31
  35. data/errors/invalid_credentials.txt +0 -10
  36. data/errors/invalid_credentials_for_tmp_index.txt +0 -17
  37. data/errors/invalid_index_name.txt +0 -11
  38. data/errors/missing_api_key.txt +0 -17
  39. data/errors/missing_application_id.txt +0 -12
  40. data/errors/missing_index_name.txt +0 -19
  41. data/errors/no_records_found.txt +0 -20
  42. data/errors/record_too_big.txt +0 -25
  43. data/errors/unknown_application_id.txt +0 -20
  44. data/errors/unknown_settings.txt +0 -15
@@ -4,24 +4,44 @@ module Jekyll
4
4
  module Algolia
5
5
  # Display helpful error messages
6
6
  module Logger
7
+ # Public: Silence all Jekyll log output in this block
8
+ # Usage:
9
+ # Logger.silence do
10
+ # # whatever Jekyll code here
11
+ # end
12
+ #
13
+ # This is especially useful when Jekyll is too talkative about what is
14
+ # loggued. It works by redefining Jekyll.logger.write to a noop
15
+ # temporarily and re-attributing the original method once finished.
16
+ def self.silent
17
+ initial_method = Jekyll.logger.method(:write)
18
+ Utils.monkey_patch(Jekyll.logger, :write, proc { |*args| })
19
+ begin
20
+ yield
21
+ ensure
22
+ Utils.monkey_patch(Jekyll.logger, :write, initial_method)
23
+ end
24
+ end
25
+
7
26
  # Public: Displays a log line
8
27
  #
9
28
  # line - Line to display. Expected to be of the following format:
10
29
  # "X:Your content"
11
30
  # Where X is either I, W or E for marking respectively an info, warning or
12
31
  # error display
13
- def self.log(line)
14
- type, content = /^(I|W|E):(.*)/.match(line).captures
32
+ def self.log(input)
33
+ type, content = /^(I|W|E):(.*)/m.match(input).captures
15
34
  logger_mapping = {
16
35
  'E' => :error,
17
36
  'I' => :info,
18
37
  'W' => :warn
19
38
  }
20
39
 
21
- # Jekyll logger tries to center log lines, so we force a consistent
22
- # width of 80 chars
23
- content = content.ljust(80, ' ')
24
- Jekyll.logger.send(logger_mapping[type], content)
40
+ # Display by chunk of 80-characters lines
41
+ lines = Utils.split_lines(content, 80)
42
+ lines.each do |line|
43
+ Jekyll.logger.send(logger_mapping[type], line)
44
+ end
25
45
  end
26
46
 
27
47
  # Public: Only display a log line if verbose mode is enabled
@@ -29,9 +49,20 @@ module Jekyll
29
49
  # line - The line to display, following the same format as .log
30
50
  def self.verbose(line)
31
51
  return unless Configurator.verbose?
52
+
32
53
  log(line)
33
54
  end
34
55
 
56
+ # Public: Write the specified content to a file in the source directory
57
+ #
58
+ # filename - the file basename
59
+ # content - the actual content of the file
60
+ def self.write_to_file(filename, content)
61
+ filepath = File.join(Configurator.get('source'), filename)
62
+ File.write(filepath, content)
63
+ filepath
64
+ end
65
+
35
66
  # Public: Displays a helpful error message for one of the knows errors
36
67
  #
37
68
  # message_id: A string identifying a know message
@@ -42,14 +73,14 @@ module Jekyll
42
73
  def self.known_message(message_id, metadata = {})
43
74
  file = File.expand_path(
44
75
  File.join(
45
- __dir__, '../../..', 'errors', "#{message_id}.txt"
76
+ __dir__, '../..', 'errors', "#{message_id}.txt"
46
77
  )
47
78
  )
48
79
 
49
80
  # Convert all variables
50
81
  content = File.open(file).read
51
82
  metadata.each do |key, value|
52
- content = content.gsub("{#{key}}", value)
83
+ content = content.gsub("{#{key}}", value.to_s)
53
84
  end
54
85
 
55
86
  # Display each line differently
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module GitHubPages
4
+ # The github-pages gem will automatically disable every plugin that is not in
5
+ # the whitelist of plugins allowed by GitHub. This includes any plugin defined
6
+ # in the `_plugins` folder as well.
7
+ #
8
+ # Users of the jekyll-algolia plugin will use custom plugins in _plugins to
9
+ # define custom hooks to modify the indexing. If they happen to have the
10
+ # github-pages gem installed at the same time, those hooks will never be
11
+ # executed.
12
+ #
13
+ # The GitHub Pages gem prevent access to custom plugins by doing two things:
14
+ # - forcing safe mode
15
+ # - loading custom plugins from a random dir
16
+ #
17
+ # We cancel those by disabling safe mode and forcing back plugins to be read
18
+ # from ./_plugins.
19
+ #
20
+ # This file will only be loaded when running `jekyll algolia`, so it won't
21
+ # interfere with the regular usage of `jekyll build`
22
+ class Configuration
23
+ class << self
24
+ def set!(site)
25
+ config = effective_config(site.config)
26
+ config['safe'] = false
27
+ config['plugins_dir'] = '_plugins'
28
+ site.config = config
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module Algolia
5
+ # A Jekyll::Site subclass that overrides process from the parent class to
6
+ # create JSON records out of rendered documents and push those records to
7
+ # Algolia instead of writing files to disk.
8
+ class Site < Jekyll::Site
9
+ # We expose a way to reset the collection, as it will be needed in the
10
+ # tests
11
+ attr_writer :collections
12
+
13
+ attr_reader :original_site_files
14
+
15
+ # Public: Overwriting the parent method
16
+ #
17
+ # This will prepare the website, gathering all files, excluding the one we
18
+ # don't need to index, then render them (converting to HTML), the finally
19
+ # calling `push` to push to Algolia
20
+ def process
21
+ # Default Jekyll preflight
22
+ reset
23
+ read
24
+ generate
25
+
26
+ # Removing all files that won't be indexed, so we don't waste time
27
+ # rendering them
28
+ keep_only_indexable_files
29
+
30
+ # Starting the rendering progress bar
31
+ init_rendering_progress_bar
32
+
33
+ # Converting them to HTML
34
+ render
35
+
36
+ # Pushing them Algolia
37
+ push
38
+ end
39
+
40
+ # Public: Return the number of pages/documents to index
41
+ def indexable_item_count
42
+ count = @pages.length
43
+ @collections.each_value { |collection| count += collection.docs.length }
44
+ count
45
+ end
46
+
47
+ # Public: Init the rendering progress bar, incrementing it for each
48
+ # rendered item
49
+ #
50
+ # This uses Jekyll post_render hooks, listening to both pages and
51
+ # documents
52
+ def init_rendering_progress_bar
53
+ progress_bar = ProgressBar.create(
54
+ total: indexable_item_count,
55
+ format: 'Rendering to HTML (%j%%) |%B|'
56
+ )
57
+ Jekyll::Hooks.register [:pages, :documents], :post_render do
58
+ progress_bar.increment
59
+ end
60
+ end
61
+
62
+ # Public: Filtering a list of items to only keep the one that are
63
+ # indexable.
64
+ #
65
+ # items - List of Pages/Documents
66
+ #
67
+ # Note: It also sets the layout to nil, to further speed up the rendering
68
+ def indexable_list(items)
69
+ new_list = []
70
+ items.each do |item|
71
+ next unless FileBrowser.indexable?(item)
72
+
73
+ item.data = {} if item.data.nil?
74
+ item.data['layout'] = nil
75
+ new_list << item
76
+ end
77
+ new_list
78
+ end
79
+
80
+ # Public: Removing non-indexable Pages, Posts and Documents from the
81
+ # internals
82
+ def keep_only_indexable_files
83
+ @original_site_files = {
84
+ pages: @pages,
85
+ collections: @collections,
86
+ static_files: @static_files
87
+ }
88
+
89
+ @pages = indexable_list(@pages)
90
+
91
+ # Applying to each collections
92
+ @collections.each_value do |collection|
93
+ collection.docs = indexable_list(collection.docs)
94
+ end
95
+
96
+ # Remove all static files
97
+ @static_files = []
98
+ end
99
+
100
+ # Public: Extract records from every file and index them
101
+ def push
102
+ records = []
103
+ files = []
104
+ progress_bar = ProgressBar.create(
105
+ total: indexable_item_count,
106
+ format: 'Extracting records (%j%%) |%B|'
107
+ )
108
+ each_site_file do |file|
109
+ # Even if we cleared the list of documents/pages beforehand, some
110
+ # files might still sneak up to this point (like static files added to
111
+ # a collection directory), so we check again if they can really be
112
+ # indexed.
113
+ next unless FileBrowser.indexable?(file)
114
+
115
+ path = FileBrowser.relative_path(file.path)
116
+
117
+ Logger.verbose("I:Extracting records from #{path}")
118
+ file_records = Extractor.run(file)
119
+
120
+ files << file
121
+ records += file_records
122
+
123
+ progress_bar.increment
124
+ end
125
+
126
+ # Applying the user hook on the whole list of records
127
+ records = Hooks.apply_all(records, self)
128
+
129
+ # Shrinking records to force them to fit under the max record size
130
+ # limit, or displaying an error message if not possible
131
+ max_record_size = Configurator.algolia('max_record_size')
132
+ # We take into account the objectID that will be added in the form of:
133
+ # "objectID": "16cd998991cc40d92402b0b4e6c55e8a"
134
+ object_id_attribute_length = 46
135
+ max_record_size -= object_id_attribute_length
136
+ records.map! do |record|
137
+ Shrinker.fit_to_size(record, max_record_size)
138
+ end
139
+
140
+ # Adding a unique objectID to each record
141
+ records.map! do |record|
142
+ Extractor.add_unique_object_id(record)
143
+ end
144
+
145
+ Logger.verbose("I:Found #{files.length} files")
146
+
147
+ Indexer.run(records)
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ # Overwriting the Jekyll::Document class
5
+ class Document
6
+ # By default, Jekyll will set the current date (time of build) to any
7
+ # collection item. This will break our diff algorithm, so we monkey patch
8
+ # this call to return nil if no date is defined instead.
9
+ def date
10
+ data['date'] || nil
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Jekyll
4
+ module Paginate
5
+ # Disable pagination from jekyll-paginate
6
+ #
7
+ # This plugin will create pages that contain a list of all items to
8
+ # paginate. Those pages won't contain any interesting data to be indexed
9
+ # (as it will be duplicated content of the real pages), but will still
10
+ # take time to generate.
11
+ #
12
+ # By monkey-patching the plugin, we force it to be disabled
13
+ # https://github.com/jekyll/jekyll-paginate/blob/master/lib/jekyll-paginate/pager.rb#L22
14
+ class Pager
15
+ def self.pagination_enabled?(_site)
16
+ false
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ # The default `link` tag allow to link to a specific page, using its relative
4
+ # path. Because we might not be indexing the destination of the link, we might
5
+ # not have the representation of the page in our data. If that happens, the
6
+ # `link` tag fails.
7
+ #
8
+ # To fix that we'll overwrite the default `link` tag to loop over a backup copy
9
+ # of the original files (before we clean it for indexing)
10
+ #
11
+ # https://github.com/algolia/jekyll-algolia/issues/62
12
+ class JekyllAlgoliaLink < Jekyll::Tags::Link
13
+ def render(context)
14
+ original_files = context.registers[:site].original_site_files
15
+
16
+ original_files[:pages].each do |page|
17
+ return page.url if page.relative_path == @relative_path
18
+ end
19
+
20
+ original_files[:collections].each_value do |collection|
21
+ collection.docs.each do |item|
22
+ return item.url if item.relative_path == @relative_path
23
+ end
24
+ end
25
+
26
+ original_files[:static_files].each do |asset|
27
+ return asset.url if asset.relative_path == @relative_path
28
+ return asset.url if asset.relative_path == "/#{@relative_path}"
29
+ end
30
+
31
+ '/'
32
+ end
33
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'progressbar'
4
+ require 'ostruct'
5
+
6
+ module Jekyll
7
+ module Algolia
8
+ # Module to push records to Algolia and configure the index
9
+ module ProgressBar
10
+ include Jekyll::Algolia
11
+
12
+ def self.should_be_silenced?
13
+ Configurator.verbose?
14
+ end
15
+
16
+ def self.create(options)
17
+ if should_be_silenced?
18
+ fake_bar = OpenStruct.new
19
+ fake_bar.increment = nil
20
+ return fake_bar
21
+ end
22
+
23
+ ::ProgressBar.create(options)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,112 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ module Jekyll
5
+ module Algolia
6
+ # Module to shrink a record so it fits in the plan quotas
7
+ module Shrinker
8
+ include Jekyll::Algolia
9
+
10
+ # Public: Get the byte size of the object once converted to JSON
11
+ # - record: The record to estimate
12
+ def self.size(record)
13
+ record.to_json.bytesize
14
+ end
15
+
16
+ # Public: Attempt to reduce the size of the record by reducing the size of
17
+ # the less needed attributes
18
+ #
19
+ # - raw_record: The record to attempt to reduce
20
+ # - max_size: The max size to achieve in bytes
21
+ #
22
+ # The excerpts are the attributes most subject to being reduced. We'll go
23
+ # as far as removing them if there is no other choice.
24
+ def self.fit_to_size(raw_record, max_size)
25
+ return raw_record if size(raw_record) <= max_size
26
+
27
+ # No excerpt, we can't shrink it
28
+ if !raw_record.key?(:excerpt_html) || !raw_record.key?(:excerpt_text)
29
+ return stop_with_error(raw_record)
30
+ end
31
+
32
+ record = raw_record.clone
33
+
34
+ # We replace the HTML excerpt with the textual one
35
+ record[:excerpt_html] = record[:excerpt_text]
36
+ return record if size(record) <= max_size
37
+
38
+ # We half the excerpts
39
+ excerpt_words = record[:excerpt_text].split(/\s+/)
40
+ shortened_excerpt = excerpt_words[0...excerpt_words.size / 2].join(' ')
41
+ record[:excerpt_text] = shortened_excerpt
42
+ record[:excerpt_html] = shortened_excerpt
43
+ return record if size(record) <= max_size
44
+
45
+ # We remove the excerpts completely
46
+ record.delete(:excerpt_text)
47
+ record.delete(:excerpt_html)
48
+ return record if size(record) <= max_size
49
+
50
+ # Still too big, we fail
51
+ stop_with_error(record)
52
+ end
53
+
54
+ # Public: Stop the current indexing process and display details about the
55
+ # record that is too big to be pushed
56
+ #
57
+ # - record: The record causing the error
58
+ #
59
+ # This will display an error message and log the wrong record in a file in
60
+ # the source directory
61
+ def self.stop_with_error(record)
62
+ record_size = size(record)
63
+ record_size_readable = Filesize.from("#{record_size}B").to_s('Kb')
64
+ max_record_size = Configurator.algolia('max_record_size')
65
+ max_record_size_readable = Filesize
66
+ .from("#{max_record_size}B").to_s('Kb')
67
+
68
+ probable_wrong_keys = readable_largest_record_keys(record)
69
+
70
+ # Writing the full record to disk for inspection
71
+ record_log_path = Logger.write_to_file(
72
+ 'jekyll-algolia-record-too-big.log',
73
+ JSON.pretty_generate(record)
74
+ )
75
+
76
+ details = {
77
+ 'object_title' => record[:title],
78
+ 'object_url' => record[:url],
79
+ 'probable_wrong_keys' => probable_wrong_keys,
80
+ 'record_log_path' => record_log_path,
81
+ 'nodes_to_index' => Configurator.algolia('nodes_to_index'),
82
+ 'record_size' => record_size_readable,
83
+ 'max_record_size' => max_record_size_readable
84
+ }
85
+
86
+ Logger.known_message('record_too_big', details)
87
+
88
+ stop_process
89
+ end
90
+
91
+ # Public: Returns a string explaining which attributes are the largest in
92
+ # the record
93
+ #
94
+ # record - The record hash to analyze
95
+ def self.readable_largest_record_keys(record)
96
+ keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
97
+ largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
98
+ output = []
99
+ largest_keys.each do |key, size|
100
+ size = Filesize.from("#{size} B").to_s('Kb')
101
+ output << "#{key} (#{size})"
102
+ end
103
+ output.join(', ')
104
+ end
105
+
106
+ # Public: Stop the current process
107
+ def self.stop_process
108
+ exit 1
109
+ end
110
+ end
111
+ end
112
+ end