searchyll 0.10.0 → 0.10.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +231 -0
- data/CHANGELOG.md +62 -0
- data/lib/searchyll.rb +2 -2
- data/lib/searchyll/configuration.rb +1 -2
- data/lib/searchyll/generator.rb +6 -0
- data/lib/searchyll/indexer.rb +126 -61
- data/lib/searchyll/version.rb +1 -1
- data/searchyll.gemspec +0 -1
- metadata +6 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3adc41e5707f84efa1fcadb911c1fefb96849a23
|
4
|
+
data.tar.gz: f311750a8fbf77c24e6e32946da3540550f80a1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9fc8750c7e1768fcdd80aa658edfcb2371d13750989db37f43dccd9456b2567354bc954b267d327be8e2b3e54ad20be36dfd90314d024a46582ab32df2bb585f
|
7
|
+
data.tar.gz: 844f042d27f019131836c72672a2452555140dbfc821ed4385961b6043c6572c95b88820515e3106adf0d62e0de81340017a17e6c270df6baff3069e3922311c
|
data/.rubocop.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,231 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2018-06-30 11:54:09 -0500 using RuboCop version 0.56.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 2
|
10
|
+
# Cop supports --auto-correct.
|
11
|
+
# Configuration parameters: Include, TreatCommentsAsGroupSeparators.
|
12
|
+
# Include: **/*.gemspec
|
13
|
+
Gemspec/OrderedDependencies:
|
14
|
+
Exclude:
|
15
|
+
- 'searchyll.gemspec'
|
16
|
+
|
17
|
+
# Offense count: 1
|
18
|
+
# Cop supports --auto-correct.
|
19
|
+
Layout/EmptyLineAfterMagicComment:
|
20
|
+
Exclude:
|
21
|
+
- 'searchyll.gemspec'
|
22
|
+
|
23
|
+
# Offense count: 8
|
24
|
+
# Cop supports --auto-correct.
|
25
|
+
# Configuration parameters: EnforcedStyle.
|
26
|
+
# SupportedStyles: empty_lines, no_empty_lines
|
27
|
+
Layout/EmptyLinesAroundBlockBody:
|
28
|
+
Exclude:
|
29
|
+
- 'Guardfile'
|
30
|
+
- 'searchyll.gemspec'
|
31
|
+
- 'spec/searchyll/generator_spec.rb'
|
32
|
+
- 'spec/searchyll/indexer_spec.rb'
|
33
|
+
- 'spec/searchyll_spec.rb'
|
34
|
+
|
35
|
+
# Offense count: 2
|
36
|
+
# Cop supports --auto-correct.
|
37
|
+
# Configuration parameters: EnforcedStyle.
|
38
|
+
# SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines, beginning_only, ending_only
|
39
|
+
Layout/EmptyLinesAroundClassBody:
|
40
|
+
Exclude:
|
41
|
+
- 'lib/searchyll/generator.rb'
|
42
|
+
|
43
|
+
# Offense count: 1
|
44
|
+
# Cop supports --auto-correct.
|
45
|
+
Layout/EmptyLinesAroundExceptionHandlingKeywords:
|
46
|
+
Exclude:
|
47
|
+
- 'lib/searchyll.rb'
|
48
|
+
|
49
|
+
# Offense count: 1
|
50
|
+
# Cop supports --auto-correct.
|
51
|
+
Layout/EmptyLinesAroundMethodBody:
|
52
|
+
Exclude:
|
53
|
+
- 'lib/searchyll/generator.rb'
|
54
|
+
|
55
|
+
# Offense count: 2
|
56
|
+
# Cop supports --auto-correct.
|
57
|
+
# Configuration parameters: EnforcedStyle.
|
58
|
+
# SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines
|
59
|
+
Layout/EmptyLinesAroundModuleBody:
|
60
|
+
Exclude:
|
61
|
+
- 'lib/searchyll/generator.rb'
|
62
|
+
|
63
|
+
# Offense count: 4
|
64
|
+
# Cop supports --auto-correct.
|
65
|
+
# Configuration parameters: IndentationWidth.
|
66
|
+
# SupportedStyles: special_inside_parentheses, consistent, align_braces
|
67
|
+
Layout/IndentHash:
|
68
|
+
EnforcedStyle: consistent
|
69
|
+
|
70
|
+
# Offense count: 1
|
71
|
+
# Cop supports --auto-correct.
|
72
|
+
Layout/SpaceAfterSemicolon:
|
73
|
+
Exclude:
|
74
|
+
- 'lib/searchyll/generator.rb'
|
75
|
+
|
76
|
+
# Offense count: 2
|
77
|
+
# Cop supports --auto-correct.
|
78
|
+
# Configuration parameters: AllowForAlignment.
|
79
|
+
Layout/SpaceAroundOperators:
|
80
|
+
Exclude:
|
81
|
+
- 'lib/searchyll/configuration.rb'
|
82
|
+
|
83
|
+
# Offense count: 1
|
84
|
+
# Cop supports --auto-correct.
|
85
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
|
86
|
+
# SupportedStyles: space, no_space
|
87
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
88
|
+
Layout/SpaceBeforeBlockBraces:
|
89
|
+
Exclude:
|
90
|
+
- 'lib/searchyll/generator.rb'
|
91
|
+
|
92
|
+
# Offense count: 3
|
93
|
+
# Cop supports --auto-correct.
|
94
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBrackets.
|
95
|
+
# SupportedStyles: space, no_space, compact
|
96
|
+
# SupportedStylesForEmptyBrackets: space, no_space
|
97
|
+
Layout/SpaceInsideArrayLiteralBrackets:
|
98
|
+
Exclude:
|
99
|
+
- 'searchyll.gemspec'
|
100
|
+
|
101
|
+
# Offense count: 2
|
102
|
+
# Cop supports --auto-correct.
|
103
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
104
|
+
# SupportedStyles: space, no_space
|
105
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
106
|
+
Layout/SpaceInsideBlockBraces:
|
107
|
+
Exclude:
|
108
|
+
- 'lib/searchyll/generator.rb'
|
109
|
+
|
110
|
+
# Offense count: 1
|
111
|
+
# Cop supports --auto-correct.
|
112
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
|
113
|
+
# SupportedStyles: space, no_space, compact
|
114
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
115
|
+
Layout/SpaceInsideHashLiteralBraces:
|
116
|
+
Exclude:
|
117
|
+
- 'spec/searchyll/generator_spec.rb'
|
118
|
+
|
119
|
+
# Offense count: 1
|
120
|
+
# Cop supports --auto-correct.
|
121
|
+
Lint/LiteralInInterpolation:
|
122
|
+
Exclude:
|
123
|
+
- 'lib/searchyll/generator.rb'
|
124
|
+
|
125
|
+
# Offense count: 1
|
126
|
+
Metrics/AbcSize:
|
127
|
+
Max: 20
|
128
|
+
|
129
|
+
# Offense count: 1
|
130
|
+
# Configuration parameters: CountComments.
|
131
|
+
Metrics/MethodLength:
|
132
|
+
Max: 15
|
133
|
+
|
134
|
+
# Offense count: 2
|
135
|
+
# Cop supports --auto-correct.
|
136
|
+
# Configuration parameters: EnforcedStyle.
|
137
|
+
# SupportedStyles: braces, no_braces, context_dependent
|
138
|
+
Style/BracesAroundHashParameters:
|
139
|
+
Exclude:
|
140
|
+
- 'lib/searchyll.rb'
|
141
|
+
|
142
|
+
# Offense count: 2
|
143
|
+
Style/Documentation:
|
144
|
+
Exclude:
|
145
|
+
- 'spec/**/*'
|
146
|
+
- 'test/**/*'
|
147
|
+
- 'lib/searchyll/configuration.rb'
|
148
|
+
- 'lib/searchyll/generator.rb'
|
149
|
+
|
150
|
+
# Offense count: 1
|
151
|
+
# Cop supports --auto-correct.
|
152
|
+
Style/Encoding:
|
153
|
+
Exclude:
|
154
|
+
- 'searchyll.gemspec'
|
155
|
+
|
156
|
+
# Offense count: 2
|
157
|
+
# Cop supports --auto-correct.
|
158
|
+
Style/ExpandPathArguments:
|
159
|
+
Exclude:
|
160
|
+
- 'searchyll.gemspec'
|
161
|
+
- 'spec/spec_helper.rb'
|
162
|
+
|
163
|
+
# Offense count: 1
|
164
|
+
# Cop supports --auto-correct.
|
165
|
+
# Configuration parameters: EnforcedStyle, UseHashRocketsWithSymbolValues, PreferHashRocketsForNonAlnumEndingSymbols.
|
166
|
+
# SupportedStyles: ruby19, hash_rockets, no_mixed_keys, ruby19_no_mixed_keys
|
167
|
+
Style/HashSyntax:
|
168
|
+
Exclude:
|
169
|
+
- 'Rakefile'
|
170
|
+
|
171
|
+
# Offense count: 1
|
172
|
+
# Cop supports --auto-correct.
|
173
|
+
Style/MutableConstant:
|
174
|
+
Exclude:
|
175
|
+
- 'lib/searchyll/version.rb'
|
176
|
+
|
177
|
+
# Offense count: 1
|
178
|
+
# Cop supports --auto-correct.
|
179
|
+
# Configuration parameters: PreferredDelimiters.
|
180
|
+
Style/PercentLiteralDelimiters:
|
181
|
+
Exclude:
|
182
|
+
- 'searchyll.gemspec'
|
183
|
+
|
184
|
+
# Offense count: 2
|
185
|
+
# Cop supports --auto-correct.
|
186
|
+
# Configuration parameters: EnforcedStyle.
|
187
|
+
# SupportedStyles: implicit, explicit
|
188
|
+
Style/RescueStandardError:
|
189
|
+
Exclude:
|
190
|
+
- 'lib/searchyll.rb'
|
191
|
+
- 'lib/searchyll/generator.rb'
|
192
|
+
|
193
|
+
# Offense count: 1
|
194
|
+
# Cop supports --auto-correct.
|
195
|
+
# Configuration parameters: AllowAsExpressionSeparator.
|
196
|
+
Style/Semicolon:
|
197
|
+
Exclude:
|
198
|
+
- 'lib/searchyll/generator.rb'
|
199
|
+
|
200
|
+
# Offense count: 2
|
201
|
+
# Cop supports --auto-correct.
|
202
|
+
Style/StderrPuts:
|
203
|
+
Exclude:
|
204
|
+
- 'lib/searchyll/generator.rb'
|
205
|
+
|
206
|
+
# Offense count: 42
|
207
|
+
# Cop supports --auto-correct.
|
208
|
+
# Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
|
209
|
+
# SupportedStyles: single_quotes, double_quotes
|
210
|
+
Style/StringLiterals:
|
211
|
+
Exclude:
|
212
|
+
- 'Guardfile'
|
213
|
+
- 'Rakefile'
|
214
|
+
- 'bin/console'
|
215
|
+
- 'lib/searchyll.rb'
|
216
|
+
- 'lib/searchyll/configuration.rb'
|
217
|
+
- 'lib/searchyll/generator.rb'
|
218
|
+
- 'lib/searchyll/version.rb'
|
219
|
+
- 'searchyll.gemspec'
|
220
|
+
|
221
|
+
# Offense count: 1
|
222
|
+
# Cop supports --auto-correct.
|
223
|
+
Style/UnneededPercentQ:
|
224
|
+
Exclude:
|
225
|
+
- 'searchyll.gemspec'
|
226
|
+
|
227
|
+
# Offense count: 2
|
228
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
229
|
+
# URISchemes: http, https
|
230
|
+
Metrics/LineLength:
|
231
|
+
Max: 120
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# Changelog
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
|
4
|
+
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
5
|
+
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
## v0.10.2 - 2018-06-30
|
10
|
+
### Added
|
11
|
+
- Version bump: v0.10.2., by Allison Zadrozny <allison@zadrozny.com>, [7930a3e](https://github.com/omc/searchyll/commit/7930a3e)
|
12
|
+
- Add & backfill the Changelog., by Allison Zadrozny <allison@zadrozny.com>, [cafe45c](https://github.com/omc/searchyll/commit/cafe45c)
|
13
|
+
- Merge pull request #30 from omc/allizad/remove-elasticsearch-ruby, by Allison Zadrozny <allison@zadrozny.com>, [e05d874](https://github.com/omc/searchyll/commit/e05d874)
|
14
|
+
- Remove unecessary elasticsearch-ruby dependency., by Allison Zadrozny <allison@zadrozny.com>, [354b58b](https://github.com/omc/searchyll/commit/354b58b)
|
15
|
+
- Merge pull request #29 from omc/nz/reindex-cadence, by Allison Zadrozny <allison@zadrozny.com>, [9de8214](https://github.com/omc/searchyll/commit/9de8214)
|
16
|
+
- reindex with a cadence and a dynamic batch size, by Nick Zadrozny <nick@beyondthepath.com>, [a4aa544](https://github.com/omc/searchyll/commit/a4aa544)
|
17
|
+
- Rubocop and some light refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [150ea0d](https://github.com/omc/searchyll/commit/150ea0d)
|
18
|
+
- Merge pull request #28 from omc/robsears-patch-2, by Allison Zadrozny <allison@zadrozny.com>, [ffd360c](https://github.com/omc/searchyll/commit/ffd360c)
|
19
|
+
- Increment the version, by Rob Sears <secure@robsears.com>, [c4ee100](https://github.com/omc/searchyll/commit/c4ee100)
|
20
|
+
- Don't break site generation if the Elasticsearch URL is missing, by Rob Sears <secure@robsears.com>, [9ddd016](https://github.com/omc/searchyll/commit/9ddd016)
|
21
|
+
- Merge pull request #26 from omc/robsears-patch-1, by Allison Zadrozny <allison@zadrozny.com>, [bce8974](https://github.com/omc/searchyll/commit/bce8974)
|
22
|
+
- Fix a silly spelling error., by Rob <rc.sears@gmail.com>, [b280436](https://github.com/omc/searchyll/commit/b280436)
|
23
|
+
- Auth should be optional, by Rob <rc.sears@gmail.com>, [d7d694e](https://github.com/omc/searchyll/commit/d7d694e)
|
24
|
+
|
25
|
+
## v0.10.0 - 2018-06-07
|
26
|
+
### Added
|
27
|
+
- version bump to 0.10.0, by Nick Zadrozny <nick@beyondthepath.com>, [097ddb9](https://github.com/omc/searchyll/commit/097ddb9)
|
28
|
+
- Merge pull request #24 from omc/collections, by Dru Sellers <dru@drusellers.com>, [7d1f044](https://github.com/omc/searchyll/commit/7d1f044)
|
29
|
+
- Index collections as well as posts, by Dru Sellers <dru@drusellers.com>, [2cdaaa8](https://github.com/omc/searchyll/commit/2cdaaa8)
|
30
|
+
- Merge pull request #18 from omc/v5-accept-json, by Allison Zadrozny <allison@zadrozny.com>, [ee7e309](https://github.com/omc/searchyll/commit/ee7e309)
|
31
|
+
- Merge pull request #19 from matthewdu/fix-index-creation, by Allison Zadrozny <allison@zadrozny.com>, [0d34bb9](https://github.com/omc/searchyll/commit/0d34bb9)
|
32
|
+
- Use PUT instead of POST, by Matthew Du <du.matthew@gmail.com>, [b905d73](https://github.com/omc/searchyll/commit/b905d73)
|
33
|
+
- provide an Accept header with all requests, for v5 compat, by Nick Zadrozny <nick@beyondthepath.com>, [2f28fae](https://github.com/omc/searchyll/commit/2f28fae)
|
34
|
+
- Merge pull request #16 from omc/add-print-statement, by Allison Zadrozny <allison@zadrozny.com>, [e236c54](https://github.com/omc/searchyll/commit/e236c54)
|
35
|
+
- Skip index deletion if there are no old indices, by Allison Zadrozny <allison@zadrozny.com>, [b33546e](https://github.com/omc/searchyll/commit/b33546e)
|
36
|
+
- Add print statement to indexer, by Allison Zadrozny <allison@zadrozny.com>, [9af6f13](https://github.com/omc/searchyll/commit/9af6f13)
|
37
|
+
|
38
|
+
## v0.9.0 - 2018-05-07
|
39
|
+
### Added
|
40
|
+
- license, by Nick Zadrozny <nick@beyondthepath.com>, [b221803](https://github.com/omc/searchyll/commit/b221803)
|
41
|
+
- Ignore the packaged gem, by Nick Zadrozny <nick@beyondthepath.com>, [04a9764](https://github.com/omc/searchyll/commit/04a9764)
|
42
|
+
- GPLv3 license, by Nick Zadrozny <nick@beyondthepath.com>, [150f6ed](https://github.com/omc/searchyll/commit/150f6ed)
|
43
|
+
- Update gemspec, readme and version for first publish of the gem, by Nick Zadrozny <nick@beyondthepath.com>, [e30ba44](https://github.com/omc/searchyll/commit/e30ba44)
|
44
|
+
- Change version number, by Allison Zadrozny <allison@zadrozny.com>, [9cf886e](https://github.com/omc/searchyll/commit/9cf886e)
|
45
|
+
- Make searchyll into a hook, by Allison Zadrozny <allison@zadrozny.com>, [46d0232](https://github.com/omc/searchyll/commit/46d0232)
|
46
|
+
- Merge pull request #12 from nz/rename-searchyll, by Allison Zadrozny <allison@zadrozny.com>, [d6270d4](https://github.com/omc/searchyll/commit/d6270d4)
|
47
|
+
- Change Searchyou to Searchyll, by Rob Sears <secure@robsears.com>, [72466b9](https://github.com/omc/searchyll/commit/72466b9)
|
48
|
+
- Merge pull request #11 from nz/fresh-configs, by Nick Zadrozny <nick@onemorecloud.com>, [26d9cf5](https://github.com/omc/searchyll/commit/26d9cf5)
|
49
|
+
- Move configuration into its own file, by Rob Sears <secure@robsears.com>, [a008f11](https://github.com/omc/searchyll/commit/a008f11)
|
50
|
+
- Move the config options around, by Rob Sears <secure@robsears.com>, [485495c](https://github.com/omc/searchyll/commit/485495c)
|
51
|
+
- Implement notes from Nick, by Rob Sears <secure@robsears.com>, [0951991](https://github.com/omc/searchyll/commit/0951991)
|
52
|
+
- New configuration settings are in a Configuration class, additional documentation included, by Rob Sears <secure@robsears.com>, [4c00cc3](https://github.com/omc/searchyll/commit/4c00cc3)
|
53
|
+
- better index cleanup with more precise enumeration of old indices, by Nick Zadrozny <nick@beyondthepath.com>, [1d6d807](https://github.com/omc/searchyll/commit/1d6d807)
|
54
|
+
- Merge pull request #2 from allizad/edit-indexer0-content, by Nick Zadrozny <nick@onemorecloud.com>, [9149cdf](https://github.com/omc/searchyll/commit/9149cdf)
|
55
|
+
- Update generator.rb, by Allison Zadrozny <allison@zadrozny.com>, [76b650d](https://github.com/omc/searchyll/commit/76b650d)
|
56
|
+
- Edit indexer content, by Allison Zadrozny <allison@zadrozny.com>, [157f981](https://github.com/omc/searchyll/commit/157f981)
|
57
|
+
- more helper methods and cleanup, by Nick Zadrozny <nick@beyondthepath.com>, [e0913ad](https://github.com/omc/searchyll/commit/e0913ad)
|
58
|
+
- get the code pretty much to working, by Nick Zadrozny <nick@beyondthepath.com>, [5550a28](https://github.com/omc/searchyll/commit/5550a28)
|
59
|
+
- add some comments, by Nick Zadrozny <nick@beyondthepath.com>, [0be5381](https://github.com/omc/searchyll/commit/0be5381)
|
60
|
+
- make it run, by Nick Zadrozny <nick@beyondthepath.com>, [afa05a7](https://github.com/omc/searchyll/commit/afa05a7)
|
61
|
+
- start testing! with some refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [8e63e9f](https://github.com/omc/searchyll/commit/8e63e9f)
|
62
|
+
- sketching a jekyll indexer for ES, by Nick Zadrozny <nick@beyondthepath.com>, [9f4c3be](https://github.com/omc/searchyll/commit/9f4c3be)
|
data/lib/searchyll.rb
CHANGED
@@ -24,7 +24,7 @@ begin
|
|
24
24
|
# strip html
|
25
25
|
nokogiri_doc = Nokogiri::HTML(page.output)
|
26
26
|
|
27
|
-
puts %( indexing page #{page.url})
|
27
|
+
# puts %( indexing page #{page.url})
|
28
28
|
|
29
29
|
indexer = indexers[page.site]
|
30
30
|
indexer << page.data.merge({
|
@@ -39,7 +39,7 @@ begin
|
|
39
39
|
# strip html
|
40
40
|
nokogiri_doc = Nokogiri::HTML(document.output)
|
41
41
|
|
42
|
-
puts %( indexing document #{document.url})
|
42
|
+
# puts %( indexing document #{document.url})
|
43
43
|
|
44
44
|
indexer = indexers[document.site]
|
45
45
|
indexer << document.data.merge({
|
@@ -8,8 +8,7 @@ module Searchyll
|
|
8
8
|
# Determine a URL for the cluster, or fail with error
|
9
9
|
def elasticsearch_url
|
10
10
|
ENV['BONSAI_URL'] || ENV['ELASTICSEARCH_URL'] ||
|
11
|
-
((site.config||{})['elasticsearch']||{})['url']
|
12
|
-
raise(ArgumentError, "No Elasticsearch URL present, skipping indexing")
|
11
|
+
((site.config||{})['elasticsearch']||{})['url'].to_s
|
13
12
|
end
|
14
13
|
|
15
14
|
# Getter for the number of primary shards
|
data/lib/searchyll/generator.rb
CHANGED
@@ -14,6 +14,12 @@ module Searchyll
|
|
14
14
|
# Gather the configuration options
|
15
15
|
configuration = Configuration.new(site)
|
16
16
|
|
17
|
+
# Don't do anything if the Elasticsearch URL is missing
|
18
|
+
if configuration.elasticsearch_url.empty?
|
19
|
+
puts "No Elasticsearch URL present, skipping indexing"
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
17
23
|
# Prepare the indexer
|
18
24
|
indexer = Searchyll::Indexer.new(configuration)
|
19
25
|
indexer.start
|
data/lib/searchyll/indexer.rb
CHANGED
@@ -3,33 +3,95 @@ require 'net/http'
|
|
3
3
|
|
4
4
|
module Searchyll
|
5
5
|
class Indexer
|
6
|
-
|
6
|
+
# Initial size of document batches to send to ES _bulk API
|
7
7
|
BATCH_SIZE = 50
|
8
8
|
|
9
|
+
# Grow and shrink the batch size based on how long our bulk calls take
|
10
|
+
# relative to the tempo
|
11
|
+
BATCH_RESIZE_FACTOR = 1.2
|
12
|
+
|
13
|
+
# Requests per minute for updates to ES
|
14
|
+
TEMPO = 94
|
15
|
+
|
16
|
+
attr_accessor :batch_size
|
9
17
|
attr_accessor :configuration
|
10
18
|
attr_accessor :indexer_thread
|
11
|
-
attr_accessor :old_indices
|
12
19
|
attr_accessor :queue
|
13
20
|
attr_accessor :timestamp
|
14
21
|
attr_accessor :uri
|
15
22
|
attr_accessor :working
|
16
23
|
|
24
|
+
# Initialize a basic indexer, with a Jekyll site configuration, waiting
|
25
|
+
# to be supplied with documents for indexing.
|
17
26
|
def initialize(configuration)
|
18
27
|
self.configuration = configuration
|
19
|
-
self.uri
|
20
|
-
self.queue
|
21
|
-
self.working
|
22
|
-
self.timestamp
|
28
|
+
self.uri = URI(configuration.elasticsearch_url)
|
29
|
+
self.queue = Queue.new
|
30
|
+
self.working = true
|
31
|
+
self.timestamp = Time.now
|
32
|
+
self.batch_size = BATCH_SIZE
|
23
33
|
end
|
24
34
|
|
25
35
|
# Public: Add new documents for batch indexing.
|
26
36
|
def <<(doc)
|
27
|
-
|
37
|
+
queue << doc
|
38
|
+
end
|
39
|
+
|
40
|
+
# Public: start the indexer and wait for documents to index.
|
41
|
+
def start
|
42
|
+
prepare_index
|
43
|
+
|
44
|
+
self.indexer_thread = Thread.new do
|
45
|
+
http_start do |http|
|
46
|
+
indexer_loop(http)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Public: Indicate to the indexer that no new documents are being added.
|
52
|
+
def finish
|
53
|
+
self.working = false
|
54
|
+
indexer_thread.join
|
55
|
+
finalize!
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def indexer_loop(http)
|
61
|
+
tempo_loop do
|
62
|
+
break unless working?
|
63
|
+
es_bulk_insert!(http, current_batch)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Run a loop in the tempo specified by TEMPO.
|
68
|
+
def tempo_loop
|
69
|
+
loop do
|
70
|
+
t = Time.now
|
71
|
+
|
72
|
+
# Perform the work required
|
73
|
+
yield
|
74
|
+
|
75
|
+
# Adjust the batch size
|
76
|
+
if (Time.now - t) / (60.0 / TEMPO) < 0.5
|
77
|
+
self.batch_size = (batch_size * BATCH_RESIZE_FACTOR).round
|
78
|
+
puts "Increased batch to #{batch_size}"
|
79
|
+
elsif (Time.now - t) / (60.0 / TEMPO) > 0.9
|
80
|
+
self.batch_size = (batch_size / BATCH_RESIZE_FACTOR).round
|
81
|
+
puts "Decreased batch to #{batch_size}"
|
82
|
+
end
|
83
|
+
|
84
|
+
# Tight loop to sleep through any remaining time in the tempo
|
85
|
+
while (60.0 / TEMPO) - (Time.now - t) > 0
|
86
|
+
sleep [0.1, (60.0 / TEMPO) - (Time.now - t)].min
|
87
|
+
break unless working?
|
88
|
+
end
|
89
|
+
end
|
28
90
|
end
|
29
91
|
|
30
92
|
# Signal a stop condition for our batch indexing thread.
|
31
93
|
def working?
|
32
|
-
working || queue.
|
94
|
+
working || !queue.empty?
|
33
95
|
end
|
34
96
|
|
35
97
|
# A versioned index name, based on the time of the indexing run.
|
@@ -39,10 +101,10 @@ module Searchyll
|
|
39
101
|
end
|
40
102
|
|
41
103
|
# Prepare an HTTP connection
|
42
|
-
def http_start
|
104
|
+
def http_start
|
43
105
|
http = Net::HTTP.start(
|
44
106
|
uri.hostname, uri.port,
|
45
|
-
:
|
107
|
+
use_ssl: (uri.scheme == 'https')
|
46
108
|
)
|
47
109
|
yield(http)
|
48
110
|
end
|
@@ -59,24 +121,10 @@ module Searchyll
|
|
59
121
|
}.to_json # TODO: index settings
|
60
122
|
|
61
123
|
http_start do |http|
|
62
|
-
|
124
|
+
http.request(create_index)
|
63
125
|
end
|
64
126
|
|
65
|
-
#
|
66
|
-
end
|
67
|
-
|
68
|
-
# Public: start the indexer and wait for documents to index.
|
69
|
-
def start
|
70
|
-
prepare_index
|
71
|
-
|
72
|
-
self.indexer_thread = Thread.new do
|
73
|
-
http_start do |http|
|
74
|
-
loop do
|
75
|
-
break unless working?
|
76
|
-
es_bulk_insert!(http, current_batch)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
127
|
+
# TODO: mapping?
|
80
128
|
end
|
81
129
|
|
82
130
|
def http_put(path)
|
@@ -99,7 +147,8 @@ module Searchyll
|
|
99
147
|
req = klass.new(path)
|
100
148
|
req.content_type = 'application/json'
|
101
149
|
req['Accept'] = 'application/json'
|
102
|
-
|
150
|
+
# Append auth credentials if the exist
|
151
|
+
req.basic_auth(uri.user, uri.password) if uri.user && uri.password
|
103
152
|
req
|
104
153
|
end
|
105
154
|
|
@@ -109,7 +158,7 @@ module Searchyll
|
|
109
158
|
def es_bulk_insert!(http, batch)
|
110
159
|
bulk_insert = http_post("/#{elasticsearch_index_name}/#{configuration.elasticsearch_default_type}/_bulk")
|
111
160
|
bulk_insert.body = batch.map do |doc|
|
112
|
-
[
|
161
|
+
[{ index: {} }.to_json, doc.to_json].join("\n")
|
113
162
|
end.join("\n") + "\n"
|
114
163
|
http.request(bulk_insert)
|
115
164
|
end
|
@@ -119,61 +168,77 @@ module Searchyll
|
|
119
168
|
def current_batch
|
120
169
|
count = 0
|
121
170
|
batch = []
|
122
|
-
while count <
|
171
|
+
while count < batch_size && !queue.empty?
|
123
172
|
batch << queue.pop
|
124
173
|
count += 1
|
125
174
|
end
|
126
175
|
batch
|
127
176
|
end
|
128
177
|
|
129
|
-
#
|
130
|
-
def finish
|
131
|
-
self.working = false
|
132
|
-
indexer_thread.join
|
133
|
-
finalize!
|
134
|
-
end
|
135
|
-
|
178
|
+
# List the indices currently in the cluster, caching the call in an ivar
|
136
179
|
def old_indices
|
137
|
-
|
138
|
-
|
139
|
-
indices =
|
140
|
-
indices = indices
|
141
|
-
|
180
|
+
# return if defined?(@old_indices)
|
181
|
+
resp = http_start { |h| h.request(http_get('/_cat/indices?h=index')) }
|
182
|
+
indices = JSON.parse(resp.body).map { |i| i['index'] }
|
183
|
+
indices = indices.select { |i| i =~ /\A#{configuration.elasticsearch_index_base_name}/ }
|
184
|
+
indices -= [elasticsearch_index_name]
|
185
|
+
# @old_indices = indices
|
186
|
+
indices
|
142
187
|
end
|
143
188
|
|
144
189
|
# Once documents are done being indexed, finalize the process by adding
|
145
190
|
# the new index into an alias for searching.
|
146
191
|
def finalize!
|
147
|
-
#
|
192
|
+
# run the prepared requests
|
193
|
+
http_start do |http|
|
194
|
+
finalize_refresh(http)
|
195
|
+
finalize_replication(http)
|
196
|
+
finalize_aliases(http)
|
197
|
+
finalize_cleanup(http)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# refresh the index to make it searchable
|
202
|
+
def finalize_refresh(http)
|
148
203
|
refresh = http_post("/#{elasticsearch_index_name}/_refresh")
|
204
|
+
http.request(refresh)
|
205
|
+
end
|
149
206
|
|
150
|
-
|
207
|
+
# add replication to the new index
|
208
|
+
def finalize_replication(http)
|
151
209
|
add_replication = http_put("/#{elasticsearch_index_name}/_settings")
|
152
|
-
add_replication.body = {
|
210
|
+
add_replication.body = {
|
211
|
+
index: {
|
212
|
+
number_of_replicas: configuration.elasticsearch_number_of_replicas
|
213
|
+
}
|
214
|
+
}.to_json
|
215
|
+
http.request(add_replication)
|
216
|
+
end
|
153
217
|
|
154
|
-
|
155
|
-
|
218
|
+
# hot swap the index into the canonical alias
|
219
|
+
def finalize_aliases(http)
|
220
|
+
update_aliases = http_post('/_aliases')
|
156
221
|
update_aliases.body = {
|
157
|
-
|
158
|
-
{
|
159
|
-
|
222
|
+
actions: [
|
223
|
+
{ remove: {
|
224
|
+
index: old_indices.join(','),
|
225
|
+
alias: configuration.elasticsearch_index_base_name
|
226
|
+
} },
|
227
|
+
{ add: {
|
228
|
+
index: elasticsearch_index_name,
|
229
|
+
alias: configuration.elasticsearch_index_base_name
|
230
|
+
} }
|
160
231
|
]
|
161
232
|
}.to_json
|
233
|
+
http.request(update_aliases)
|
234
|
+
end
|
162
235
|
|
163
|
-
|
236
|
+
# delete old indices after a successful reindexing run
|
237
|
+
def finalize_cleanup(http)
|
238
|
+
return if old_indices.nil? || old_indices.empty?
|
164
239
|
cleanup_indices = http_delete("/#{old_indices.join(',')}")
|
165
240
|
puts %( Old indices: #{old_indices.join(', ')})
|
166
|
-
|
167
|
-
# run the prepared requests
|
168
|
-
http_start do |http|
|
169
|
-
http.request(refresh)
|
170
|
-
http.request(add_replication)
|
171
|
-
http.request(update_aliases)
|
172
|
-
if !old_indices.empty?
|
173
|
-
http.request(cleanup_indices)
|
174
|
-
end
|
175
|
-
end
|
241
|
+
http.request(cleanup_indices)
|
176
242
|
end
|
177
|
-
|
178
243
|
end
|
179
244
|
end
|
data/lib/searchyll/version.rb
CHANGED
data/searchyll.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: searchyll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Zadrozny
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2018-06-30 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|
@@ -82,20 +82,6 @@ dependencies:
|
|
82
82
|
- - ">="
|
83
83
|
- !ruby/object:Gem::Version
|
84
84
|
version: '3.0'
|
85
|
-
- !ruby/object:Gem::Dependency
|
86
|
-
name: elasticsearch-ruby
|
87
|
-
requirement: !ruby/object:Gem::Requirement
|
88
|
-
requirements:
|
89
|
-
- - ">="
|
90
|
-
- !ruby/object:Gem::Version
|
91
|
-
version: '0'
|
92
|
-
type: :runtime
|
93
|
-
prerelease: false
|
94
|
-
version_requirements: !ruby/object:Gem::Requirement
|
95
|
-
requirements:
|
96
|
-
- - ">="
|
97
|
-
- !ruby/object:Gem::Version
|
98
|
-
version: '0'
|
99
85
|
- !ruby/object:Gem::Dependency
|
100
86
|
name: nokogiri
|
101
87
|
requirement: !ruby/object:Gem::Requirement
|
@@ -121,7 +107,10 @@ extra_rdoc_files: []
|
|
121
107
|
files:
|
122
108
|
- ".gitignore"
|
123
109
|
- ".rspec"
|
110
|
+
- ".rubocop.yml"
|
111
|
+
- ".rubocop_todo.yml"
|
124
112
|
- ".travis.yml"
|
113
|
+
- CHANGELOG.md
|
125
114
|
- Gemfile
|
126
115
|
- Guardfile
|
127
116
|
- LICENSE
|
@@ -155,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
144
|
version: '0'
|
156
145
|
requirements: []
|
157
146
|
rubyforge_project:
|
158
|
-
rubygems_version: 2.
|
147
|
+
rubygems_version: 2.6.14
|
159
148
|
signing_key:
|
160
149
|
specification_version: 4
|
161
150
|
summary: A gem to index your Jekyll pages into Elasticsearch.
|