algoliasearch-jekyll 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/algoliasearch-jekyll.rb +1 -11
  3. data/lib/push.rb +87 -148
  4. metadata +45 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e1b192525e42f3f0b61d0587ab11fdef936acc10
4
- data.tar.gz: b8557b3ae3e1df1d6e3561c3614a8c4e5e3dc23e
3
+ metadata.gz: f1af115b167749491e0cfc3fbdfca8f7cbd0bfb1
4
+ data.tar.gz: 6cdc40cf3148a33400178ba9bb9076095adebd31
5
5
  SHA512:
6
- metadata.gz: b52e88d691848055afbe673401adbef36faf32ad3de1c62985577e0790fd6f97eb4ad8dd0e4f717c6c43b38b5c41a194652e254ed91fdc397931f413fcab3670
7
- data.tar.gz: 356ea6edf1d3fdc9ff1b1792294afaa966661ab3c50e3a6eee0d7567e381f742df423b3f6349eb7761393385635c9599144e1e9a0ba2c4c219580790e2eea7f5
6
+ metadata.gz: 371291f704b4029819eb5dbb59de2e3ac2ac90c8973ae678fb1890824f3c9b4470782b1bfe8b7cbb528a374e80eeb4e274440c53a5561efc5bb3d703d4e19ead
7
+ data.tar.gz: c508df8e04d78ae5db324ef678cb27ae46f5e067030a45a373dc399f25eca244263ba3bddf998e35b965fed31fb374d1e565014e6fe7a06631f4e4831811b097
@@ -21,7 +21,7 @@ class AlgoliaSearchJekyll < Jekyll::Command
21
21
 
22
22
  subcommand.action do |args, options|
23
23
  @config = configuration_from_options(options)
24
- AlgoliaSearchJekyllPush.process(args, options, @config)
24
+ AlgoliaSearchJekyllPush.init_options(args, options, @config).process
25
25
  end
26
26
  end
27
27
  end
@@ -39,15 +39,5 @@ class AlgoliaSearchJekyll < Jekyll::Command
39
39
  command.option 'unpublished', '--unpublished',
40
40
  'Index posts that were marked as unpublished'
41
41
  end
42
-
43
- def api_key
44
- return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
45
- key_file = File.join(@config['source'], '_algolia_api_key')
46
-
47
- if File.exist?(key_file) && File.size(key_file) > 0
48
- return File.open(key_file).read.strip
49
- end
50
- nil
51
- end
52
42
  end
53
43
  end
data/lib/push.rb CHANGED
@@ -1,52 +1,92 @@
1
1
  require 'algoliasearch'
2
2
  require 'nokogiri'
3
3
  require 'json'
4
+ require_relative './record_extractor.rb'
4
5
 
5
6
  # `jekyll algolia push` command
6
7
  class AlgoliaSearchJekyllPush < Jekyll::Command
7
8
  class << self
9
+ attr_accessor :options, :config
10
+
8
11
  def init_with_program(_prog)
9
12
  end
10
13
 
11
- def process(args = [], options = {}, config = {})
14
+ # Init the command with options passed on the command line
15
+ # `jekyll algolia push ARG1 ARG2 --OPTION_NAME1 OPTION_VALUE1`
16
+ # config comes from _config.yml
17
+ def init_options(args = [], options = {}, config = {})
18
+ args = [] unless args
12
19
  @args = args
13
20
  @options = options
14
21
  @config = config
15
22
 
23
+ # Allow for passing index name on the command line
16
24
  index_name = args[0]
17
-
18
25
  @config['algolia']['index_name'] = index_name if index_name
26
+ self
27
+ end
28
+
29
+ # Check if the specified file should be indexed (we exclude static files,
30
+ # robots.txt and custom defined exclusions).
31
+ def indexable?(file)
32
+ return false if file.is_a?(Jekyll::StaticFile)
33
+
34
+ # Keep only markdown and html files
35
+ allowed_extensions = %w(html)
36
+ if @config['markdown_ext']
37
+ allowed_extensions += @config['markdown_ext'].split(',')
38
+ end
39
+ current_extension = File.extname(file.name)[1..-1]
40
+ return false unless allowed_extensions.include?(current_extension)
41
+
42
+ # Exclude files manually excluded from config
43
+ excluded_files = @config['algolia']['excluded_files']
44
+ unless excluded_files.nil?
45
+ return false if excluded_files.include?(file.name)
46
+ end
47
+
48
+ true
49
+ end
50
+
51
+ # Run the default `jekyll build` command but overwrite the actual "write
52
+ # files on disk" part to instead push data to Algolia
53
+ def process
19
54
  site = Jekyll::Site.new(@config)
20
55
 
21
- # Instead of writing generated website to disk, we will push it to the
22
- # index
23
56
  def site.write
24
57
  items = []
25
58
  each_site_file do |file|
26
- new_items = AlgoliaSearchJekyllPush.get_items_from_file(file)
59
+ next unless AlgoliaSearchJekyllPush.indexable?(file)
60
+
61
+ new_items = AlgoliaSearchRecordExtractor.new(file).extract
27
62
  next if new_items.nil?
28
63
  items += new_items
29
64
  end
30
65
  AlgoliaSearchJekyllPush.push(items)
31
66
  end
32
67
 
68
+ # This will call the build command by default, which will in turn call our
69
+ # custom .write method
33
70
  site.process
34
71
  end
35
72
 
36
- def parseable?(file)
37
- ext = file.ext.delete('.')
38
- # Allow markdown and html pages
39
- return true if @config['markdown_ext'].split(',').include?(ext)
40
- return false unless ext == 'html'
41
- return false unless file['title']
42
- true
43
- end
73
+ # Read the API key either from ENV or from an _algolia_api_key file in
74
+ # source folder
75
+ def api_key
76
+ # First read in ENV
77
+ return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
44
78
 
45
- def excluded_file?(file)
46
- @config['algolia']['excluded_files'].include?(file.name)
79
+ # Otherwise from file in source directory
80
+ key_file = File.join(@config['source'], '_algolia_api_key')
81
+ if File.exist?(key_file) && File.size(key_file) > 0
82
+ return File.open(key_file).read.strip
83
+ end
84
+ nil
47
85
  end
48
86
 
49
- def check_credentials(api_key, application_id, index_name)
87
+ # Check that all credentials are present, and stop with a helpfull message
88
+ # if not
89
+ def check_credentials
50
90
  unless api_key
51
91
  Jekyll.logger.error 'Algolia Error: No API key defined'
52
92
  Jekyll.logger.warn ' You have two ways to configure your API key:'
@@ -56,30 +96,30 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
56
96
  exit 1
57
97
  end
58
98
 
59
- unless application_id
99
+ unless @config['algolia']['application_id']
60
100
  Jekyll.logger.error 'Algolia Error: No application ID defined'
61
101
  Jekyll.logger.warn ' Please set your application id in the '\
62
102
  '_config.yml file, like so:'
63
- puts ''
103
+ Jekyll.logger.warn ''
64
104
  # The spaces are needed otherwise the text is centered
65
105
  Jekyll.logger.warn ' algolia: '
66
106
  Jekyll.logger.warn ' application_id: \'{your_application_id}\''
67
- puts ''
107
+ Jekyll.logger.warn ''
68
108
  Jekyll.logger.warn ' Your application ID can be found in your algolia'\
69
109
  ' dashboard'
70
110
  Jekyll.logger.warn ' https://www.algolia.com/licensing'
71
111
  exit 1
72
112
  end
73
113
 
74
- unless index_name
114
+ unless @config['algolia']['index_name']
75
115
  Jekyll.logger.error 'Algolia Error: No index name defined'
76
116
  Jekyll.logger.warn ' Please set your index name in the _config.yml'\
77
117
  ' file, like so:'
78
- puts ''
118
+ Jekyll.logger.warn ''
79
119
  # The spaces are needed otherwise the text is centered
80
120
  Jekyll.logger.warn ' algolia: '
81
121
  Jekyll.logger.warn ' index_name: \'{your_index_name}\''
82
- puts ''
122
+ Jekyll.logger.warn ''
83
123
  Jekyll.logger.warn ' You can edit your indices in your dashboard'
84
124
  Jekyll.logger.warn ' https://www.algolia.com/explorer'
85
125
  exit 1
@@ -87,10 +127,13 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
87
127
  true
88
128
  end
89
129
 
130
+ # Get index settings
90
131
  def configure_index(index)
91
- default_settings = {
92
- attributeForDistinct: 'parent_id',
93
- attributesForFaceting: %w(tags type),
132
+ settings = {
133
+ typoTolerance: true,
134
+ distinct: true,
135
+ attributeForDistinct: 'title',
136
+ attributesForFaceting: %w(tags type title),
94
137
  attributesToIndex: %w(
95
138
  title h1 h2 h3 h4 h5 h6
96
139
  unordered(text)
@@ -98,143 +141,39 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
98
141
  ),
99
142
  attributesToRetrieve: %w(
100
143
  title h1 h2 h3 h4 h5 h6
101
- posted_at
102
- content
103
- text
104
144
  url
145
+ tag_name
146
+ raw_html
147
+ text
148
+ posted_at
105
149
  css_selector
150
+ css_selector_parent
106
151
  ),
107
152
  customRanking: ['desc(posted_at)', 'desc(title_weight)'],
108
- distinct: true,
109
153
  highlightPreTag: '<span class="algolia__result-highlight">',
110
154
  highlightPostTag: '</span>'
111
155
  }
112
- custom_settings = {}
113
- @config['algolia']['settings'].each do |key, value|
114
- custom_settings[key.to_sym] = value
115
- end
116
- settings = default_settings.merge(custom_settings)
117
-
118
- index.set_settings(settings)
119
- end
120
-
121
- def get_items_from_file(file)
122
- is_page = file.is_a?(Jekyll::Page)
123
- is_post = file.is_a?(Jekyll::Post)
124
-
125
- # We only index posts, and markdown pages
126
- return nil unless is_page || is_post
127
- return nil if is_page && !parseable?(file)
128
- return nil if excluded_file?(file)
129
-
130
- html = file.content.gsub("\n", ' ')
131
-
132
- if is_post
133
- tags = get_tags_from_post(file)
134
- base_data = {
135
- type: 'post',
136
- parent_id: file.id,
137
- url: file.url,
138
- title: file.title,
139
- tags: tags,
140
- slug: file.slug,
141
- posted_at: file.date.to_time.to_i
142
- }
143
- else
144
- base_data = {
145
- type: 'page',
146
- parent_id: file.basename,
147
- url: file.url,
148
- title: file['title'],
149
- slug: file.basename
150
- }
151
- end
152
-
153
- get_paragraphs_from_html(html, base_data)
154
- end
155
-
156
- # Get a list of tags from a post. Handle both classic string tags or
157
- # extended object tags
158
- def get_tags_from_post(post)
159
- tags = post.tags
160
- return [] if tags.is_a?(Array) || tags.nil?
161
- tags.map! { |tag| tag.to_s.gsub(',', '') }
162
- end
163
-
164
- # Get the list of headings (h1, h2, etc) above the specified node
165
- def get_previous_hx(node, memo = { level: 7 })
166
- previous = node.previous_sibling
167
- # Stop if no previous element
168
- unless previous
169
- memo.delete(:level)
170
- return memo
171
- end
172
-
173
- # Skip non-html elements
174
- return get_previous_hx(previous, memo) unless previous.element?
175
156
 
176
- # Skip non-title elements
177
- tag_name = previous.name
178
- possible_title_elements = %w(h1 h2 h3 h4 h5 h6)
179
- unless possible_title_elements.include?(tag_name)
180
- return get_previous_hx(previous, memo)
157
+ # Merge default settings with user custom ones
158
+ if @config['algolia'].key?('settings')
159
+ custom_settings = {}
160
+ @config['algolia']['settings'].each do |key, value|
161
+ custom_settings[key.to_sym] = value
162
+ end
163
+ settings.merge!(custom_settings)
181
164
  end
182
165
 
183
- # Skip if item already as title of a higher level
184
- title_level = tag_name.gsub('h', '').to_i
185
- return get_previous_hx(previous, memo) if title_level >= memo[:level]
186
- memo[:level] = title_level
187
-
188
- # Add to the memo and continue
189
- memo[tag_name.to_sym] = previous.content
190
- get_previous_hx(previous, memo)
191
- end
192
-
193
- # Get a custom value representing the number of word occurence from the
194
- # titles into the content
195
- def get_title_weight(content, item)
196
- # Get list of words
197
- words = %i(title h1 h2 h3 h4 h5 h6)
198
- .select { |title| item.key?(title) }
199
- .map { |title| item[title].split(/\W+/) }
200
- .flatten
201
- .compact
202
- .uniq
203
- # Count how many words are in the text
204
- weight = 0
205
- words.each { |word| weight += 1 if content.include?(word) }
206
- weight
207
- end
208
-
209
- # Will get a unique css selector for the node
210
- def get_css_selector(node)
211
- node.css_path.gsub('html > body > ', '')
212
- end
213
-
214
- # Get a list of items representing the different paragraphs
215
- def get_paragraphs_from_html(html, base_data)
216
- doc = Nokogiri::HTML(html)
217
- paragraphs = doc.css('p').map.with_index do |p, index|
218
- next unless p.text.size > 0
219
- new_item = base_data.clone
220
- new_item.merge!(get_previous_hx(p))
221
- new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
222
- new_item[:css_selector] = get_css_selector(p)
223
- new_item[:raw_html] = p.to_s
224
- new_item[:text] = p.content
225
- new_item[:title_weight] = get_title_weight(p.text, new_item)
226
- new_item
227
- end
228
- paragraphs.compact
166
+ index.set_settings(settings)
229
167
  end
230
168
 
231
169
  def push(items)
232
- api_key = AlgoliaSearchJekyll.api_key
233
- application_id = @config['algolia']['application_id']
234
- index_name = @config['algolia']['index_name']
235
- check_credentials(api_key, application_id, index_name)
170
+ check_credentials
236
171
 
237
- Algolia.init(application_id: application_id, api_key: api_key)
172
+ index_name = @config['algolia']['index_name']
173
+ Algolia.init(
174
+ application_id: @config['algolia']['application_id'],
175
+ api_key: api_key
176
+ )
238
177
  index = Algolia::Index.new(index_name)
239
178
  configure_index(index)
240
179
  index.clear_index
metadata CHANGED
@@ -1,15 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algoliasearch-jekyll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-30 00:00:00.000000000 Z
11
+ date: 2015-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: guard-rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '4.6'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '4.6'
13
55
  - !ruby/object:Gem::Dependency
14
56
  name: nokogiri
15
57
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +109,7 @@ dependencies:
67
109
  - !ruby/object:Gem::Version
68
110
  version: '1.4'
69
111
  description: Index all your pages and posts to an Algolia index with `jekyll algolia
70
- index`
112
+ push`
71
113
  email: tim@pixelastic.com
72
114
  executables: []
73
115
  extensions: []