algoliasearch-jekyll 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/algoliasearch-jekyll.rb +53 -0
  3. data/lib/push.rb +226 -0
  4. metadata +102 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 300e5c91ad0901840959d34b5cbb65d85f8508a8
4
+ data.tar.gz: fb8ec8f5a6a9afb49c81adf20c0b09ddbc4da15f
5
+ SHA512:
6
+ metadata.gz: cee39f180e347887d52eaf144530aa146e057be93f395291d7f74e4e4ee608de2d8a804b7a6334b5d38247d5e4b1e5641fc192c7ae5b55a6f3dd85eb9852fde4
7
+ data.tar.gz: 2864b693fa1409dda9edfe64724c2e771ab5d9b6c778f8d1034dcd931471e5d02ce5c8bff2708db4711a5898bb266d3c6190428c2fab26d6e0576007973a997b
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ require 'awesome_print'
5
+
6
+ require_relative './push.rb'
7
+
8
+ # `jekyll algolia` main entry
9
+ class AlgoliaSearchJekyll < Jekyll::Command
10
+ class << self
11
+ def init_with_program(prog)
12
+ prog.command(:algolia) do |command|
13
+ command.syntax 'algolia <command> [options]'
14
+ command.description 'Keep your content in sync with your Algolia index'
15
+
16
+ command.command(:push) do |subcommand|
17
+ subcommand.syntax 'push [options]'
18
+ subcommand.description 'Push your content to your index'
19
+
20
+ add_build_options(subcommand)
21
+
22
+ subcommand.action do |args, options|
23
+ @config = configuration_from_options(options)
24
+ AlgoliaSearchJekyllPush.process(args, options, @config)
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ # Allow a subset of the default `jekyll build` options
31
+ def add_build_options(command)
32
+ command.option 'config', '--config CONFIG_FILE[,CONFIG_FILE2,...]',
33
+ Array, 'Custom configuration file'
34
+ command.option 'future', '--future', 'Index posts with a future date'
35
+ command.option 'limit_posts', '--limit_posts MAX_POSTS', Integer,
36
+ 'Limits the number of posts to parse and index'
37
+ command.option 'show_drafts', '-D', '--drafts',
38
+ 'Index posts in the _drafts folder'
39
+ command.option 'unpublished', '--unpublished',
40
+ 'Index posts that were marked as unpublished'
41
+ end
42
+
43
+ def api_key
44
+ return ENV['ALGOLIA_API_KEY'] if ENV['ALGOLIA_API_KEY']
45
+ key_file = File.join(@config['source'], '_algolia_api_key')
46
+
47
+ if File.exist?(key_file) && File.size(key_file) > 0
48
+ return File.open(key_file).read.strip
49
+ end
50
+ nil
51
+ end
52
+ end
53
+ end
data/lib/push.rb ADDED
@@ -0,0 +1,226 @@
1
+ require 'algoliasearch'
2
+ require 'nokogiri'
3
+ require 'json'
4
+
5
+ # `jekyll algolia push` command
6
+ class AlgoliaSearchJekyllPush < Jekyll::Command
7
+ class << self
8
+ def init_with_program(_prog)
9
+ end
10
+
11
+ def process(args = [], options = {}, config = {})
12
+ @args = args
13
+ @options = options
14
+ @config = config
15
+
16
+ index_name = args[0]
17
+
18
+ @config['algolia']['index_name'] = index_name if index_name
19
+ site = Jekyll::Site.new(@config)
20
+
21
+ # Instead of writing generated website to disk, we will push it to the
22
+ # index
23
+ def site.write
24
+ items = []
25
+ each_site_file do |file|
26
+ new_items = AlgoliaSearchJekyllPush.get_items_from_file(file)
27
+ next if new_items.nil?
28
+ items += new_items
29
+ end
30
+ AlgoliaSearchJekyllPush.push(items)
31
+ end
32
+
33
+ site.process
34
+ end
35
+
36
+ def markdown?(filename)
37
+ ext = File.extname(filename).delete('.')
38
+ @config['markdown_ext'].split(',').include?(ext)
39
+ end
40
+
41
+ def check_credentials(api_key, application_id, index_name)
42
+ unless api_key
43
+ Jekyll.logger.error 'Algolia Error: No API key defined'
44
+ Jekyll.logger.warn ' You have two ways to configure your API key:'
45
+ Jekyll.logger.warn ' - The ALGOLIA_API_KEY environment variable'
46
+ Jekyll.logger.warn ' - A file named ./_algolia_api_key in your '\
47
+ 'source folder'
48
+ exit 1
49
+ end
50
+
51
+ unless application_id
52
+ Jekyll.logger.error 'Algolia Error: No application ID defined'
53
+ Jekyll.logger.warn ' Please set your application id in the '\
54
+ '_config.yml file, like so:'
55
+ puts ''
56
+ # The spaces are needed otherwise the text is centered
57
+ Jekyll.logger.warn ' algolia: '
58
+ Jekyll.logger.warn ' application_id: \'{your_application_id}\''
59
+ puts ''
60
+ Jekyll.logger.warn ' Your application ID can be found in your algolia'\
61
+ ' dashboard'
62
+ Jekyll.logger.warn ' https://www.algolia.com/licensing'
63
+ exit 1
64
+ end
65
+
66
+ unless index_name
67
+ Jekyll.logger.error 'Algolia Error: No index name defined'
68
+ Jekyll.logger.warn ' Please set your index name in the _config.yml'\
69
+ ' file, like so:'
70
+ puts ''
71
+ # The spaces are needed otherwise the text is centered
72
+ Jekyll.logger.warn ' algolia: '
73
+ Jekyll.logger.warn ' index_name: \'{your_index_name}\''
74
+ puts ''
75
+ Jekyll.logger.warn ' You can edit your indices in your dashboard'
76
+ Jekyll.logger.warn ' https://www.algolia.com/explorer'
77
+ exit 1
78
+ end
79
+ true
80
+ end
81
+
82
+ def configure_index(index)
83
+ index.set_settings(
84
+ attributeForDistinct: 'parent_id',
85
+ attributesForFaceting: %w(tags type),
86
+ attributesToHighlight: %w(title content),
87
+ attributesToIndex: %w(title h1 h2 h3 h4 h5 h6 content tags),
88
+ attributesToRetrieve: %w(title posted_at content url css_selector),
89
+ customRanking: ['desc(posted_at)', 'desc(title_weight)'],
90
+ distinct: true,
91
+ highlightPreTag: '<span class="algolia__result-highlight">',
92
+ highlightPostTag: '</span>'
93
+ )
94
+ end
95
+
96
+ def push(items)
97
+ api_key = AlgoliaSearchJekyll.api_key
98
+ application_id = @config['algolia']['application_id']
99
+ index_name = @config['algolia']['index_name']
100
+ check_credentials(api_key, application_id, index_name)
101
+
102
+ Algolia.init(application_id: application_id, api_key: api_key)
103
+ index = Algolia::Index.new(index_name)
104
+ configure_index(index)
105
+ index.clear_index
106
+
107
+ items.each_slice(1000) do |batch|
108
+ Jekyll.logger.info "Indexing #{batch.size} items"
109
+ begin
110
+ index.add_objects(batch)
111
+ rescue StandardError => error
112
+ Jekyll.logger.error 'Algolia Error: HTTP Error'
113
+ Jekyll.logger.warn error.message
114
+ exit 1
115
+ end
116
+ end
117
+
118
+ Jekyll.logger.info "Indexing of #{items.size} items " \
119
+ "in #{index_name} done."
120
+ end
121
+
122
+ def get_items_from_file(file)
123
+ is_page = file.is_a?(Jekyll::Page)
124
+ is_post = file.is_a?(Jekyll::Post)
125
+
126
+ # We only index posts, and markdown pages
127
+ return nil unless is_page || is_post
128
+ return nil if is_page && !markdown?(file.path)
129
+
130
+ html = file.content.gsub("\n", ' ')
131
+
132
+ if is_post
133
+ tags = get_tags_from_post(file)
134
+ base_data = {
135
+ type: 'post',
136
+ parent_id: file.id,
137
+ url: file.url,
138
+ title: file.title,
139
+ tags: tags,
140
+ slug: file.slug,
141
+ posted_at: file.date.to_time.to_i
142
+ }
143
+ else
144
+ base_data = {
145
+ type: 'page',
146
+ parent_id: file.basename,
147
+ url: file.url,
148
+ title: file['title'],
149
+ slug: file.basename
150
+ }
151
+ end
152
+
153
+ get_paragraphs_from_html(html, base_data)
154
+ end
155
+
156
+ # Get a list of tags from a post. Handle both classic string tags or
157
+ # extended object tags
158
+ def get_tags_from_post(post)
159
+ tags = post.tags
160
+ return [] if tags.is_a?(Array) || tags.nil?
161
+ tags.map! { |tag| tag.to_s.gsub(',', '') }
162
+ end
163
+
164
+ def get_previous_hx(node, memo = { level: 7 })
165
+ previous = node.previous_sibling
166
+ # Stop if no previous element
167
+ unless previous
168
+ memo.delete(:level)
169
+ return memo
170
+ end
171
+
172
+ # Skip non-html elements
173
+ return get_previous_hx(previous, memo) unless previous.element?
174
+
175
+ # Skip non-title elements
176
+ tag_name = previous.name
177
+ possible_title_elements = %w(h1 h2 h3 h4 h5 h6)
178
+ unless possible_title_elements.include?(tag_name)
179
+ return get_previous_hx(previous, memo)
180
+ end
181
+
182
+ # Skip if item already as title of a higher level
183
+ title_level = tag_name.gsub('h', '').to_i
184
+ return get_previous_hx(previous, memo) if title_level >= memo[:level]
185
+ memo[:level] = title_level
186
+
187
+ # Add to the memo and continue
188
+ memo[tag_name.to_sym] = previous.text
189
+ get_previous_hx(previous, memo)
190
+ end
191
+
192
+ # Get a custom value representing the number of word occurence from the
193
+ # titles into the content
194
+ def get_title_weight(content, item)
195
+ # Get list of words
196
+ words = %i(title h1 h2 h3 h4 h5 h6)
197
+ .select { |title| item.key?(title) }
198
+ .map { |title| item[title].split(/\W+/) }
199
+ .flatten
200
+ .compact
201
+ .uniq
202
+ # Count how many words are in the text
203
+ weight = 0
204
+ words.each { |word| weight += 1 if content.include?(word) }
205
+ weight
206
+ end
207
+
208
+ # Will get a unique css selector for the node
209
+ def get_css_selector(node)
210
+ node.css_path.gsub('html > body > ', '')
211
+ end
212
+
213
+ def get_paragraphs_from_html(html, base_data)
214
+ doc = Nokogiri::HTML(html)
215
+ doc.css('p').map.with_index do |p, index|
216
+ new_item = base_data.clone
217
+ new_item.merge!(get_previous_hx(p))
218
+ new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
219
+ new_item[:css_selector] = get_css_selector(p)
220
+ new_item[:content] = p.to_s
221
+ new_item[:title_weight] = get_title_weight(p.text, new_item)
222
+ new_item
223
+ end
224
+ end
225
+ end
226
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: algoliasearch-jekyll
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Tim Carry
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-06-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: awesome_print
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: algoliasearch
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ description: Index all your pages and posts to an Algolia index with `jekyll algolia
70
+ index`
71
+ email: tim@pixelastic.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - lib/algoliasearch-jekyll.rb
77
+ - lib/push.rb
78
+ homepage: https://github.com/algolia/algoliasearch-jekyll
79
+ licenses:
80
+ - MIT
81
+ metadata: {}
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.4.6
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: AlgoliaSearch for Jekyll
102
+ test_files: []