algoliasearch-jekyll 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/push.rb +59 -32
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 33c6e72889f35db789517b3a9a8b86e97355dac4
4
- data.tar.gz: b10d1a46e90b2fe938aebc8f801d3a1b034f029e
3
+ metadata.gz: e1b192525e42f3f0b61d0587ab11fdef936acc10
4
+ data.tar.gz: b8557b3ae3e1df1d6e3561c3614a8c4e5e3dc23e
5
5
  SHA512:
6
- metadata.gz: 1687ec9dcfe975c964ca5d27c3a6f553d746399b1d7db71462bdad3463e40f0cecf89db9624d1eab80b7b07fe9880ecf0ead27aa68db7c74cdf6608ed83607c6
7
- data.tar.gz: df50afe1129386a620c1df879b3c90645d49e783e2a6e33fa86c1078ebfb717968f9a1576a99fc59022a612101dd605f1c5cadebc1f5d47640abb45cf7f5d987
6
+ metadata.gz: b52e88d691848055afbe673401adbef36faf32ad3de1c62985577e0790fd6f97eb4ad8dd0e4f717c6c43b38b5c41a194652e254ed91fdc397931f413fcab3670
7
+ data.tar.gz: 356ea6edf1d3fdc9ff1b1792294afaa966661ab3c50e3a6eee0d7567e381f742df423b3f6349eb7761393385635c9599144e1e9a0ba2c4c219580790e2eea7f5
data/lib/push.rb CHANGED
@@ -42,6 +42,10 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
42
42
  true
43
43
  end
44
44
 
45
+ def excluded_file?(file)
46
+ @config['algolia']['excluded_files'].include?(file.name)
47
+ end
48
+
45
49
  def check_credentials(api_key, application_id, index_name)
46
50
  unless api_key
47
51
  Jekyll.logger.error 'Algolia Error: No API key defined'
@@ -84,43 +88,34 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
84
88
  end
85
89
 
86
90
  def configure_index(index)
87
- index.set_settings(
91
+ default_settings = {
88
92
  attributeForDistinct: 'parent_id',
89
93
  attributesForFaceting: %w(tags type),
90
- attributesToHighlight: %w(title content),
91
- attributesToIndex: %w(title h1 h2 h3 h4 h5 h6 content tags),
92
- attributesToRetrieve: %w(title posted_at content url css_selector),
94
+ attributesToIndex: %w(
95
+ title h1 h2 h3 h4 h5 h6
96
+ unordered(text)
97
+ unordered(tags)
98
+ ),
99
+ attributesToRetrieve: %w(
100
+ title h1 h2 h3 h4 h5 h6
101
+ posted_at
102
+ content
103
+ text
104
+ url
105
+ css_selector
106
+ ),
93
107
  customRanking: ['desc(posted_at)', 'desc(title_weight)'],
94
108
  distinct: true,
95
109
  highlightPreTag: '<span class="algolia__result-highlight">',
96
110
  highlightPostTag: '</span>'
97
- )
98
- end
99
-
100
- def push(items)
101
- api_key = AlgoliaSearchJekyll.api_key
102
- application_id = @config['algolia']['application_id']
103
- index_name = @config['algolia']['index_name']
104
- check_credentials(api_key, application_id, index_name)
105
-
106
- Algolia.init(application_id: application_id, api_key: api_key)
107
- index = Algolia::Index.new(index_name)
108
- configure_index(index)
109
- index.clear_index
110
-
111
- items.each_slice(1000) do |batch|
112
- Jekyll.logger.info "Indexing #{batch.size} items"
113
- begin
114
- index.add_objects(batch)
115
- rescue StandardError => error
116
- Jekyll.logger.error 'Algolia Error: HTTP Error'
117
- Jekyll.logger.warn error.message
118
- exit 1
119
- end
111
+ }
112
+ custom_settings = {}
113
+ @config['algolia']['settings'].each do |key, value|
114
+ custom_settings[key.to_sym] = value
120
115
  end
116
+ settings = default_settings.merge(custom_settings)
121
117
 
122
- Jekyll.logger.info "Indexing of #{items.size} items " \
123
- "in #{index_name} done."
118
+ index.set_settings(settings)
124
119
  end
125
120
 
126
121
  def get_items_from_file(file)
@@ -130,6 +125,7 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
130
125
  # We only index posts, and markdown pages
131
126
  return nil unless is_page || is_post
132
127
  return nil if is_page && !parseable?(file)
128
+ return nil if excluded_file?(file)
133
129
 
134
130
  html = file.content.gsub("\n", ' ')
135
131
 
@@ -165,6 +161,7 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
165
161
  tags.map! { |tag| tag.to_s.gsub(',', '') }
166
162
  end
167
163
 
164
+ # Get the list of headings (h1, h2, etc) above the specified node
168
165
  def get_previous_hx(node, memo = { level: 7 })
169
166
  previous = node.previous_sibling
170
167
  # Stop if no previous element
@@ -189,7 +186,7 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
189
186
  memo[:level] = title_level
190
187
 
191
188
  # Add to the memo and continue
192
- memo[tag_name.to_sym] = previous.text
189
+ memo[tag_name.to_sym] = previous.content
193
190
  get_previous_hx(previous, memo)
194
191
  end
195
192
 
@@ -214,17 +211,47 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
214
211
  node.css_path.gsub('html > body > ', '')
215
212
  end
216
213
 
214
+ # Get a list of items representing the different paragraphs
217
215
  def get_paragraphs_from_html(html, base_data)
218
216
  doc = Nokogiri::HTML(html)
219
- doc.css('p').map.with_index do |p, index|
217
+ paragraphs = doc.css('p').map.with_index do |p, index|
218
+ next unless p.text.size > 0
220
219
  new_item = base_data.clone
221
220
  new_item.merge!(get_previous_hx(p))
222
221
  new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
223
222
  new_item[:css_selector] = get_css_selector(p)
224
- new_item[:content] = p.to_s
223
+ new_item[:raw_html] = p.to_s
224
+ new_item[:text] = p.content
225
225
  new_item[:title_weight] = get_title_weight(p.text, new_item)
226
226
  new_item
227
227
  end
228
+ paragraphs.compact
229
+ end
230
+
231
+ def push(items)
232
+ api_key = AlgoliaSearchJekyll.api_key
233
+ application_id = @config['algolia']['application_id']
234
+ index_name = @config['algolia']['index_name']
235
+ check_credentials(api_key, application_id, index_name)
236
+
237
+ Algolia.init(application_id: application_id, api_key: api_key)
238
+ index = Algolia::Index.new(index_name)
239
+ configure_index(index)
240
+ index.clear_index
241
+
242
+ items.each_slice(1000) do |batch|
243
+ Jekyll.logger.info "Indexing #{batch.size} items"
244
+ begin
245
+ index.add_objects(batch)
246
+ rescue StandardError => error
247
+ Jekyll.logger.error 'Algolia Error: HTTP Error'
248
+ Jekyll.logger.warn error.message
249
+ exit 1
250
+ end
251
+ end
252
+
253
+ Jekyll.logger.info "Indexing of #{items.size} items " \
254
+ "in #{index_name} done."
228
255
  end
229
256
  end
230
257
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: algoliasearch-jekyll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Carry
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-22 00:00:00.000000000 Z
11
+ date: 2015-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri