algoliasearch-jekyll 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/algoliasearch-jekyll.rb +1 -11
  3. data/lib/push.rb +87 -148
  4. metadata +45 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e1b192525e42f3f0b61d0587ab11fdef936acc10
4
- data.tar.gz: b8557b3ae3e1df1d6e3561c3614a8c4e5e3dc23e
3
+ metadata.gz: f1af115b167749491e0cfc3fbdfca8f7cbd0bfb1
4
+ data.tar.gz: 6cdc40cf3148a33400178ba9bb9076095adebd31
5
5
  SHA512:
6
- metadata.gz: b52e88d691848055afbe673401adbef36faf32ad3de1c62985577e0790fd6f97eb4ad8dd0e4f717c6c43b38b5c41a194652e254ed91fdc397931f413fcab3670
7
- data.tar.gz: 356ea6edf1d3fdc9ff1b1792294afaa966661ab3c50e3a6eee0d7567e381f742df423b3f6349eb7761393385635c9599144e1e9a0ba2c4c219580790e2eea7f5
6
+ metadata.gz: 371291f704b4029819eb5dbb59de2e3ac2ac90c8973ae678fb1890824f3c9b4470782b1bfe8b7cbb528a374e80eeb4e274440c53a5561efc5bb3d703d4e19ead
7
+ data.tar.gz: c508df8e04d78ae5db324ef678cb27ae46f5e067030a45a373dc399f25eca244263ba3bddf998e35b965fed31fb374d1e565014e6fe7a06631f4e4831811b097
@@ -21,7 +21,7 @@ class AlgoliaSearchJekyll < Jekyll::Command
21
21
 
22
22
  subcommand.action do |args, options|
23
23
  @config = configuration_from_options(options)
24
- AlgoliaSearchJekyllPush.process(args, options, @config)
24
+ AlgoliaSearchJekyllPush.init_options(args, options, @config).process
25
25
  end
26
26
  end
27
27
  end
@@ -39,15 +39,5 @@ class AlgoliaSearchJekyll < Jekyll::Command
39
39
  command.option 'unpublished', '--unpublished',
40
40
  'Index posts that were marked as unpublished'
41
41
  end
42
-
43
- def api_key
44
- return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
45
- key_file = File.join(@config['source'], '_algolia_api_key')
46
-
47
- if File.exist?(key_file) && File.size(key_file) > 0
48
- return File.open(key_file).read.strip
49
- end
50
- nil
51
- end
52
42
  end
53
43
  end
data/lib/push.rb CHANGED
@@ -1,52 +1,92 @@
1
1
  require 'algoliasearch'
2
2
  require 'nokogiri'
3
3
  require 'json'
4
+ require_relative './record_extractor.rb'
4
5
 
5
6
  # `jekyll algolia push` command
6
7
  class AlgoliaSearchJekyllPush < Jekyll::Command
7
8
  class << self
9
+ attr_accessor :options, :config
10
+
8
11
  def init_with_program(_prog)
9
12
  end
10
13
 
11
- def process(args = [], options = {}, config = {})
14
+ # Init the command with options passed on the command line
15
+ # `jekyll algolia push ARG1 ARG2 --OPTION_NAME1 OPTION_VALUE1`
16
+ # config comes from _config.yml
17
+ def init_options(args = [], options = {}, config = {})
18
+ args = [] unless args
12
19
  @args = args
13
20
  @options = options
14
21
  @config = config
15
22
 
23
+ # Allow for passing index name on the command line
16
24
  index_name = args[0]
17
-
18
25
  @config['algolia']['index_name'] = index_name if index_name
26
+ self
27
+ end
28
+
29
+ # Check if the specified file should be indexed (we exclude static files,
30
+ # robots.txt and custom defined exclusions).
31
+ def indexable?(file)
32
+ return false if file.is_a?(Jekyll::StaticFile)
33
+
34
+ # Keep only markdown and html files
35
+ allowed_extensions = %w(html)
36
+ if @config['markdown_ext']
37
+ allowed_extensions += @config['markdown_ext'].split(',')
38
+ end
39
+ current_extension = File.extname(file.name)[1..-1]
40
+ return false unless allowed_extensions.include?(current_extension)
41
+
42
+ # Exclude files manually excluded from config
43
+ excluded_files = @config['algolia']['excluded_files']
44
+ unless excluded_files.nil?
45
+ return false if excluded_files.include?(file.name)
46
+ end
47
+
48
+ true
49
+ end
50
+
51
+ # Run the default `jekyll build` command but overwrite the actual "write
52
+ # files on disk" part to instead push data to Algolia
53
+ def process
19
54
  site = Jekyll::Site.new(@config)
20
55
 
21
- # Instead of writing generated website to disk, we will push it to the
22
- # index
23
56
  def site.write
24
57
  items = []
25
58
  each_site_file do |file|
26
- new_items = AlgoliaSearchJekyllPush.get_items_from_file(file)
59
+ next unless AlgoliaSearchJekyllPush.indexable?(file)
60
+
61
+ new_items = AlgoliaSearchRecordExtractor.new(file).extract
27
62
  next if new_items.nil?
28
63
  items += new_items
29
64
  end
30
65
  AlgoliaSearchJekyllPush.push(items)
31
66
  end
32
67
 
68
+ # This will call the build command by default, which will in turn call our
69
+ # custom .write method
33
70
  site.process
34
71
  end
35
72
 
36
- def parseable?(file)
37
- ext = file.ext.delete('.')
38
- # Allow markdown and html pages
39
- return true if @config['markdown_ext'].split(',').include?(ext)
40
- return false unless ext == 'html'
41
- return false unless file['title']
42
- true
43
- end
73
+ # Read the API key either from ENV or from an _algolia_api_key file in
74
+ # source folder
75
+ def api_key
76
+ # First read in ENV
77
+ return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
44
78
 
45
- def excluded_file?(file)
46
- @config['algolia']['excluded_files'].include?(file.name)
79
+ # Otherwise from file in source directory
80
+ key_file = File.join(@config['source'], '_algolia_api_key')
81
+ if File.exist?(key_file) && File.size(key_file) > 0
82
+ return File.open(key_file).read.strip
83
+ end
84
+ nil
47
85
  end
48
86
 
49
- def check_credentials(api_key, application_id, index_name)
87
+ # Check that all credentials are present, and stop with a helpfull message
88
+ # if not
89
+ def check_credentials
50
90
  unless api_key
51
91
  Jekyll.logger.error 'Algolia Error: No API key defined'
52
92
  Jekyll.logger.warn ' You have two ways to configure your API key:'
@@ -56,30 +96,30 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
56
96
  exit 1
57
97
  end
58
98
 
59
- unless application_id
99
+ unless @config['algolia']['application_id']
60
100
  Jekyll.logger.error 'Algolia Error: No application ID defined'
61
101
  Jekyll.logger.warn ' Please set your application id in the '\
62
102
  '_config.yml file, like so:'
63
- puts ''
103
+ Jekyll.logger.warn ''
64
104
  # The spaces are needed otherwise the text is centered
65
105
  Jekyll.logger.warn ' algolia: '
66
106
  Jekyll.logger.warn ' application_id: \'{your_application_id}\''
67
- puts ''
107
+ Jekyll.logger.warn ''
68
108
  Jekyll.logger.warn ' Your application ID can be found in your algolia'\
69
109
  ' dashboard'
70
110
  Jekyll.logger.warn ' https://www.algolia.com/licensing'
71
111
  exit 1
72
112
  end
73
113
 
74
- unless index_name
114
+ unless @config['algolia']['index_name']
75
115
  Jekyll.logger.error 'Algolia Error: No index name defined'
76
116
  Jekyll.logger.warn ' Please set your index name in the _config.yml'\
77
117
  ' file, like so:'
78
- puts ''
118
+ Jekyll.logger.warn ''
79
119
  # The spaces are needed otherwise the text is centered
80
120
  Jekyll.logger.warn ' algolia: '
81
121
  Jekyll.logger.warn ' index_name: \'{your_index_name}\''
82
- puts ''
122
+ Jekyll.logger.warn ''
83
123
  Jekyll.logger.warn ' You can edit your indices in your dashboard'
84
124
  Jekyll.logger.warn ' https://www.algolia.com/explorer'
85
125
  exit 1
@@ -87,10 +127,13 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
87
127
  true
88
128
  end
89
129
 
130
+ # Get index settings
90
131
  def configure_index(index)
91
- default_settings = {
92
- attributeForDistinct: 'parent_id',
93
- attributesForFaceting: %w(tags type),
132
+ settings = {
133
+ typoTolerance: true,
134
+ distinct: true,
135
+ attributeForDistinct: 'title',
136
+ attributesForFaceting: %w(tags type title),
94
137
  attributesToIndex: %w(
95
138
  title h1 h2 h3 h4 h5 h6
96
139
  unordered(text)
@@ -98,143 +141,39 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
98
141
  ),
99
142
  attributesToRetrieve: %w(
100
143
  title h1 h2 h3 h4 h5 h6
101
- posted_at
102
- content
103
- text
104
144
  url
145
+ tag_name
146
+ raw_html
147
+ text
148
+ posted_at
105
149
  css_selector
150
+ css_selector_parent
106
151
  ),
107
152
  customRanking: ['desc(posted_at)', 'desc(title_weight)'],
108
- distinct: true,
109
153
  highlightPreTag: '<span class="algolia__result-highlight">',
110
154
  highlightPostTag: '</span>'
111
155
  }
112
- custom_settings = {}
113
- @config['algolia']['settings'].each do |key, value|
114
- custom_settings[key.to_sym] = value
115
- end
116
- settings = default_settings.merge(custom_settings)
117
-
118
- index.set_settings(settings)
119
- end
120
-
121
- def get_items_from_file(file)
122
- is_page = file.is_a?(Jekyll::Page)
123
- is_post = file.is_a?(Jekyll::Post)
124
-
125
- # We only index posts, and markdown pages
126
- return nil unless is_page || is_post
127
- return nil if is_page && !parseable?(file)
128
- return nil if excluded_file?(file)
129
-
130
- html = file.content.gsub("\n", ' ')
131
-
132
- if is_post
133
- tags = get_tags_from_post(file)
134
- base_data = {
135
- type: 'post',
136
- parent_id: file.id,
137
- url: file.url,
138
- title: file.title,
139
- tags: tags,
140
- slug: file.slug,
141
- posted_at: file.date.to_time.to_i
142
- }
143
- else
144
- base_data = {
145
- type: 'page',
146
- parent_id: file.basename,
147
- url: file.url,
148
- title: file['title'],
149
- slug: file.basename
150
- }
151
- end
152
-
153
- get_paragraphs_from_html(html, base_data)
154
- end
155
-
156
- # Get a list of tags from a post. Handle both classic string tags or
157
- # extended object tags
158
- def get_tags_from_post(post)
159
- tags = post.tags
160
- return [] if tags.is_a?(Array) || tags.nil?
161
- tags.map! { |tag| tag.to_s.gsub(',', '') }
162
- end
163
-
164
- # Get the list of headings (h1, h2, etc) above the specified node
165
- def get_previous_hx(node, memo = { level: 7 })
166
- previous = node.previous_sibling
167
- # Stop if no previous element
168
- unless previous
169
- memo.delete(:level)
170
- return memo
171
- end
172
-
173
- # Skip non-html elements
174
- return get_previous_hx(previous, memo) unless previous.element?
175
156
 
176
- # Skip non-title elements
177
- tag_name = previous.name
178
- possible_title_elements = %w(h1 h2 h3 h4 h5 h6)
179
- unless possible_title_elements.include?(tag_name)
180
- return get_previous_hx(previous, memo)
157
+ # Merge default settings with user custom ones
158
+ if @config['algolia'].key?('settings')
159
+ custom_settings = {}
160
+ @config['algolia']['settings'].each do |key, value|
161
+ custom_settings[key.to_sym] = value
162
+ end
163
+ settings.merge!(custom_settings)
181
164
  end
182
165
 
183
- # Skip if item already as title of a higher level
184
- title_level = tag_name.gsub('h', '').to_i
185
- return get_previous_hx(previous, memo) if title_level >= memo[:level]
186
- memo[:level] = title_level
187
-
188
- # Add to the memo and continue
189
- memo[tag_name.to_sym] = previous.content
190
- get_previous_hx(previous, memo)
191
- end
192
-
193
- # Get a custom value representing the number of word occurence from the
194
- # titles into the content
195
- def get_title_weight(content, item)
196
- # Get list of words
197
- words = %i(title h1 h2 h3 h4 h5 h6)
198
- .select { |title| item.key?(title) }
199
- .map { |title| item[title].split(/\W+/) }
200
- .flatten
201
- .compact
202
- .uniq
203
- # Count how many words are in the text
204
- weight = 0
205
- words.each { |word| weight += 1 if content.include?(word) }
206
- weight
207
- end
208
-
209
- # Will get a unique css selector for the node
210
- def get_css_selector(node)
211
- node.css_path.gsub('html > body > ', '')
212
- end
213
-
214
- # Get a list of items representing the different paragraphs
215
- def get_paragraphs_from_html(html, base_data)
216
- doc = Nokogiri::HTML(html)
217
- paragraphs = doc.css('p').map.with_index do |p, index|
218
- next unless p.text.size > 0
219
- new_item = base_data.clone
220
- new_item.merge!(get_previous_hx(p))
221
- new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
222
- new_item[:css_selector] = get_css_selector(p)
223
- new_item[:raw_html] = p.to_s
224
- new_item[:text] = p.content
225
- new_item[:title_weight] = get_title_weight(p.text, new_item)
226
- new_item
227
- end
228
- paragraphs.compact
166
+ index.set_settings(settings)
229
167
  end
230
168
 
231
169
  def push(items)
232
- api_key = AlgoliaSearchJekyll.api_key
233
- application_id = @config['algolia']['application_id']
234
- index_name = @config['algolia']['index_name']
235
- check_credentials(api_key, application_id, index_name)
170
+ check_credentials
236
171
 
237
- Algolia.init(application_id: application_id, api_key: api_key)
172
+ index_name = @config['algolia']['index_name']
173
+ Algolia.init(
174
+ application_id: @config['algolia']['application_id'],
175
+ api_key: api_key
176
+ )
238
177
  index = Algolia::Index.new(index_name)
239
178
  configure_index(index)
240
179
  index.clear_index
metadata CHANGED
@@ -1,15 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algoliasearch-jekyll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-30 00:00:00.000000000 Z
11
+ date: 2015-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jekyll
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: guard-rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '4.6'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '4.6'
13
55
  - !ruby/object:Gem::Dependency
14
56
  name: nokogiri
15
57
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +109,7 @@ dependencies:
67
109
  - !ruby/object:Gem::Version
68
110
  version: '1.4'
69
111
  description: Index all your pages and posts to an Algolia index with `jekyll algolia
70
- index`
112
+ push`
71
113
  email: tim@pixelastic.com
72
114
  executables: []
73
115
  extensions: []