algoliasearch-jekyll 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/algoliasearch-jekyll.rb +53 -0
  3. data/lib/push.rb +226 -0
  4. metadata +102 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 300e5c91ad0901840959d34b5cbb65d85f8508a8
4
+ data.tar.gz: fb8ec8f5a6a9afb49c81adf20c0b09ddbc4da15f
5
+ SHA512:
6
+ metadata.gz: cee39f180e347887d52eaf144530aa146e057be93f395291d7f74e4e4ee608de2d8a804b7a6334b5d38247d5e4b1e5641fc192c7ae5b55a6f3dd85eb9852fde4
7
+ data.tar.gz: 2864b693fa1409dda9edfe64724c2e771ab5d9b6c778f8d1034dcd931471e5d02ce5c8bff2708db4711a5898bb266d3c6190428c2fab26d6e0576007973a997b
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require 'awesome_print'
5
+
6
+ require_relative './push.rb'
7
+
8
+ # `jekyll algolia` main entry
9
+ class AlgoliaSearchJekyll < Jekyll::Command
10
+ class << self
11
+ def init_with_program(prog)
12
+ prog.command(:algolia) do |command|
13
+ command.syntax 'algolia <command> [options]'
14
+ command.description 'Keep your content in sync with your Algolia index'
15
+
16
+ command.command(:push) do |subcommand|
17
+ subcommand.syntax 'push [options]'
18
+ subcommand.description 'Push your content to your index'
19
+
20
+ add_build_options(subcommand)
21
+
22
+ subcommand.action do |args, options|
23
+ @config = configuration_from_options(options)
24
+ AlgoliaSearchJekyllPush.process(args, options, @config)
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ # Allow a subset of the default `jekyll build` options
31
+ def add_build_options(command)
32
+ command.option 'config', '--config CONFIG_FILE[,CONFIG_FILE2,...]',
33
+ Array, 'Custom configuration file'
34
+ command.option 'future', '--future', 'Index posts with a future date'
35
+ command.option 'limit_posts', '--limit_posts MAX_POSTS', Integer,
36
+ 'Limits the number of posts to parse and index'
37
+ command.option 'show_drafts', '-D', '--drafts',
38
+ 'Index posts in the _drafts folder'
39
+ command.option 'unpublished', '--unpublished',
40
+ 'Index posts that were marked as unpublished'
41
+ end
42
+
43
+ def api_key
44
+ return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
45
+ key_file = File.join(@config['source'], '_algolia_api_key')
46
+
47
+ if File.exist?(key_file) && File.size(key_file) > 0
48
+ return File.open(key_file).read.strip
49
+ end
50
+ nil
51
+ end
52
+ end
53
+ end
data/lib/push.rb ADDED
@@ -0,0 +1,226 @@
1
+ require 'algoliasearch'
2
+ require 'nokogiri'
3
+ require 'json'
4
+
5
+ # `jekyll algolia push` command
6
+ class AlgoliaSearchJekyllPush < Jekyll::Command
7
+ class << self
8
+ def init_with_program(_prog)
9
+ end
10
+
11
+ def process(args = [], options = {}, config = {})
12
+ @args = args
13
+ @options = options
14
+ @config = config
15
+
16
+ index_name = args[0]
17
+
18
+ @config['algolia']['index_name'] = index_name if index_name
19
+ site = Jekyll::Site.new(@config)
20
+
21
+ # Instead of writing generated website to disk, we will push it to the
22
+ # index
23
+ def site.write
24
+ items = []
25
+ each_site_file do |file|
26
+ new_items = AlgoliaSearchJekyllPush.get_items_from_file(file)
27
+ next if new_items.nil?
28
+ items += new_items
29
+ end
30
+ AlgoliaSearchJekyllPush.push(items)
31
+ end
32
+
33
+ site.process
34
+ end
35
+
36
+ def markdown?(filename)
37
+ ext = File.extname(filename).delete('.')
38
+ @config['markdown_ext'].split(',').include?(ext)
39
+ end
40
+
41
+ def check_credentials(api_key, application_id, index_name)
42
+ unless api_key
43
+ Jekyll.logger.error 'Algolia Error: No API key defined'
44
+ Jekyll.logger.warn ' You have two ways to configure your API key:'
45
+ Jekyll.logger.warn ' - The ALGOLIA_API_KEY environment variable'
46
+ Jekyll.logger.warn ' - A file named ./_algolia_api_key in your '\
47
+ 'source folder'
48
+ exit 1
49
+ end
50
+
51
+ unless application_id
52
+ Jekyll.logger.error 'Algolia Error: No application ID defined'
53
+ Jekyll.logger.warn ' Please set your application id in the '\
54
+ '_config.yml file, like so:'
55
+ puts ''
56
+ # The spaces are needed otherwise the text is centered
57
+ Jekyll.logger.warn ' algolia: '
58
+ Jekyll.logger.warn ' application_id: \'{your_application_id}\''
59
+ puts ''
60
+ Jekyll.logger.warn ' Your application ID can be found in your algolia'\
61
+ ' dashboard'
62
+ Jekyll.logger.warn ' https://www.algolia.com/licensing'
63
+ exit 1
64
+ end
65
+
66
+ unless index_name
67
+ Jekyll.logger.error 'Algolia Error: No index name defined'
68
+ Jekyll.logger.warn ' Please set your index name in the _config.yml'\
69
+ ' file, like so:'
70
+ puts ''
71
+ # The spaces are needed otherwise the text is centered
72
+ Jekyll.logger.warn ' algolia: '
73
+ Jekyll.logger.warn ' index_name: \'{your_index_name}\''
74
+ puts ''
75
+ Jekyll.logger.warn ' You can edit your indices in your dashboard'
76
+ Jekyll.logger.warn ' https://www.algolia.com/explorer'
77
+ exit 1
78
+ end
79
+ true
80
+ end
81
+
82
+ def configure_index(index)
83
+ index.set_settings(
84
+ attributeForDistinct: 'parent_id',
85
+ attributesForFaceting: %w(tags type),
86
+ attributesToHighlight: %w(title content),
87
+ attributesToIndex: %w(title h1 h2 h3 h4 h5 h6 content tags),
88
+ attributesToRetrieve: %w(title posted_at content url css_selector),
89
+ customRanking: ['desc(posted_at)', 'desc(title_weight)'],
90
+ distinct: true,
91
+ highlightPreTag: '<span class="algolia__result-highlight">',
92
+ highlightPostTag: '</span>'
93
+ )
94
+ end
95
+
96
+ def push(items)
97
+ api_key = AlgoliaSearchJekyll.api_key
98
+ application_id = @config['algolia']['application_id']
99
+ index_name = @config['algolia']['index_name']
100
+ check_credentials(api_key, application_id, index_name)
101
+
102
+ Algolia.init(application_id: application_id, api_key: api_key)
103
+ index = Algolia::Index.new(index_name)
104
+ configure_index(index)
105
+ index.clear_index
106
+
107
+ items.each_slice(1000) do |batch|
108
+ Jekyll.logger.info "Indexing #{batch.size} items"
109
+ begin
110
+ index.add_objects(batch)
111
+ rescue StandardError => error
112
+ Jekyll.logger.error 'Algolia Error: HTTP Error'
113
+ Jekyll.logger.warn error.message
114
+ exit 1
115
+ end
116
+ end
117
+
118
+ Jekyll.logger.info "Indexing of #{items.size} items " \
119
+ "in #{index_name} done."
120
+ end
121
+
122
+ def get_items_from_file(file)
123
+ is_page = file.is_a?(Jekyll::Page)
124
+ is_post = file.is_a?(Jekyll::Post)
125
+
126
+ # We only index posts, and markdown pages
127
+ return nil unless is_page || is_post
128
+ return nil if is_page && !markdown?(file.path)
129
+
130
+ html = file.content.gsub("\n", ' ')
131
+
132
+ if is_post
133
+ tags = get_tags_from_post(file)
134
+ base_data = {
135
+ type: 'post',
136
+ parent_id: file.id,
137
+ url: file.url,
138
+ title: file.title,
139
+ tags: tags,
140
+ slug: file.slug,
141
+ posted_at: file.date.to_time.to_i
142
+ }
143
+ else
144
+ base_data = {
145
+ type: 'page',
146
+ parent_id: file.basename,
147
+ url: file.url,
148
+ title: file['title'],
149
+ slug: file.basename
150
+ }
151
+ end
152
+
153
+ get_paragraphs_from_html(html, base_data)
154
+ end
155
+
156
+ # Get a list of tags from a post. Handle both classic string tags or
157
+ # extended object tags
158
+ def get_tags_from_post(post)
159
+ tags = post.tags
160
+ return [] if tags.is_a?(Array) || tags.nil?
161
+ tags.map! { |tag| tag.to_s.gsub(',', '') }
162
+ end
163
+
164
+ def get_previous_hx(node, memo = { level: 7 })
165
+ previous = node.previous_sibling
166
+ # Stop if no previous element
167
+ unless previous
168
+ memo.delete(:level)
169
+ return memo
170
+ end
171
+
172
+ # Skip non-html elements
173
+ return get_previous_hx(previous, memo) unless previous.element?
174
+
175
+ # Skip non-title elements
176
+ tag_name = previous.name
177
+ possible_title_elements = %w(h1 h2 h3 h4 h5 h6)
178
+ unless possible_title_elements.include?(tag_name)
179
+ return get_previous_hx(previous, memo)
180
+ end
181
+
182
+ # Skip if item already as title of a higher level
183
+ title_level = tag_name.gsub('h', '').to_i
184
+ return get_previous_hx(previous, memo) if title_level >= memo[:level]
185
+ memo[:level] = title_level
186
+
187
+ # Add to the memo and continue
188
+ memo[tag_name.to_sym] = previous.text
189
+ get_previous_hx(previous, memo)
190
+ end
191
+
192
+ # Get a custom value representing the number of word occurence from the
193
+ # titles into the content
194
+ def get_title_weight(content, item)
195
+ # Get list of words
196
+ words = %i(title h1 h2 h3 h4 h5 h6)
197
+ .select { |title| item.key?(title) }
198
+ .map { |title| item[title].split(/\W+/) }
199
+ .flatten
200
+ .compact
201
+ .uniq
202
+ # Count how many words are in the text
203
+ weight = 0
204
+ words.each { |word| weight += 1 if content.include?(word) }
205
+ weight
206
+ end
207
+
208
+ # Will get a unique css selector for the node
209
+ def get_css_selector(node)
210
+ node.css_path.gsub('html > body > ', '')
211
+ end
212
+
213
+ def get_paragraphs_from_html(html, base_data)
214
+ doc = Nokogiri::HTML(html)
215
+ doc.css('p').map.with_index do |p, index|
216
+ new_item = base_data.clone
217
+ new_item.merge!(get_previous_hx(p))
218
+ new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
219
+ new_item[:css_selector] = get_css_selector(p)
220
+ new_item[:content] = p.to_s
221
+ new_item[:title_weight] = get_title_weight(p.text, new_item)
222
+ new_item
223
+ end
224
+ end
225
+ end
226
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: algoliasearch-jekyll
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Tim Carry
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: awesome_print
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: algoliasearch
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ description: Index all your pages and posts to an Algolia index with `jekyll algolia
70
+ index`
71
+ email: tim@pixelastic.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - lib/algoliasearch-jekyll.rb
77
+ - lib/push.rb
78
+ homepage: https://github.com/algolia/algoliasearch-jekyll
79
+ licenses:
80
+ - MIT
81
+ metadata: {}
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.4.6
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: AlgoliaSearch for Jekyll
102
+ test_files: []