searchyll 0.10.0 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +231 -0
- data/CHANGELOG.md +62 -0
- data/lib/searchyll.rb +2 -2
- data/lib/searchyll/configuration.rb +1 -2
- data/lib/searchyll/generator.rb +6 -0
- data/lib/searchyll/indexer.rb +126 -61
- data/lib/searchyll/version.rb +1 -1
- data/searchyll.gemspec +0 -1
- metadata +6 -17
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3adc41e5707f84efa1fcadb911c1fefb96849a23
|
|
4
|
+
data.tar.gz: f311750a8fbf77c24e6e32946da3540550f80a1a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9fc8750c7e1768fcdd80aa658edfcb2371d13750989db37f43dccd9456b2567354bc954b267d327be8e2b3e54ad20be36dfd90314d024a46582ab32df2bb585f
|
|
7
|
+
data.tar.gz: 844f042d27f019131836c72672a2452555140dbfc821ed4385961b6043c6572c95b88820515e3106adf0d62e0de81340017a17e6c270df6baff3069e3922311c
|
data/.rubocop.yml
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
data/.rubocop_todo.yml
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# This configuration was generated by
|
|
2
|
+
# `rubocop --auto-gen-config`
|
|
3
|
+
# on 2018-06-30 11:54:09 -0500 using RuboCop version 0.56.0.
|
|
4
|
+
# The point is for the user to remove these configuration records
|
|
5
|
+
# one by one as the offenses are removed from the code base.
|
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
|
8
|
+
|
|
9
|
+
# Offense count: 2
|
|
10
|
+
# Cop supports --auto-correct.
|
|
11
|
+
# Configuration parameters: Include, TreatCommentsAsGroupSeparators.
|
|
12
|
+
# Include: **/*.gemspec
|
|
13
|
+
Gemspec/OrderedDependencies:
|
|
14
|
+
Exclude:
|
|
15
|
+
- 'searchyll.gemspec'
|
|
16
|
+
|
|
17
|
+
# Offense count: 1
|
|
18
|
+
# Cop supports --auto-correct.
|
|
19
|
+
Layout/EmptyLineAfterMagicComment:
|
|
20
|
+
Exclude:
|
|
21
|
+
- 'searchyll.gemspec'
|
|
22
|
+
|
|
23
|
+
# Offense count: 8
|
|
24
|
+
# Cop supports --auto-correct.
|
|
25
|
+
# Configuration parameters: EnforcedStyle.
|
|
26
|
+
# SupportedStyles: empty_lines, no_empty_lines
|
|
27
|
+
Layout/EmptyLinesAroundBlockBody:
|
|
28
|
+
Exclude:
|
|
29
|
+
- 'Guardfile'
|
|
30
|
+
- 'searchyll.gemspec'
|
|
31
|
+
- 'spec/searchyll/generator_spec.rb'
|
|
32
|
+
- 'spec/searchyll/indexer_spec.rb'
|
|
33
|
+
- 'spec/searchyll_spec.rb'
|
|
34
|
+
|
|
35
|
+
# Offense count: 2
|
|
36
|
+
# Cop supports --auto-correct.
|
|
37
|
+
# Configuration parameters: EnforcedStyle.
|
|
38
|
+
# SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines, beginning_only, ending_only
|
|
39
|
+
Layout/EmptyLinesAroundClassBody:
|
|
40
|
+
Exclude:
|
|
41
|
+
- 'lib/searchyll/generator.rb'
|
|
42
|
+
|
|
43
|
+
# Offense count: 1
|
|
44
|
+
# Cop supports --auto-correct.
|
|
45
|
+
Layout/EmptyLinesAroundExceptionHandlingKeywords:
|
|
46
|
+
Exclude:
|
|
47
|
+
- 'lib/searchyll.rb'
|
|
48
|
+
|
|
49
|
+
# Offense count: 1
|
|
50
|
+
# Cop supports --auto-correct.
|
|
51
|
+
Layout/EmptyLinesAroundMethodBody:
|
|
52
|
+
Exclude:
|
|
53
|
+
- 'lib/searchyll/generator.rb'
|
|
54
|
+
|
|
55
|
+
# Offense count: 2
|
|
56
|
+
# Cop supports --auto-correct.
|
|
57
|
+
# Configuration parameters: EnforcedStyle.
|
|
58
|
+
# SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines
|
|
59
|
+
Layout/EmptyLinesAroundModuleBody:
|
|
60
|
+
Exclude:
|
|
61
|
+
- 'lib/searchyll/generator.rb'
|
|
62
|
+
|
|
63
|
+
# Offense count: 4
|
|
64
|
+
# Cop supports --auto-correct.
|
|
65
|
+
# Configuration parameters: IndentationWidth.
|
|
66
|
+
# SupportedStyles: special_inside_parentheses, consistent, align_braces
|
|
67
|
+
Layout/IndentHash:
|
|
68
|
+
EnforcedStyle: consistent
|
|
69
|
+
|
|
70
|
+
# Offense count: 1
|
|
71
|
+
# Cop supports --auto-correct.
|
|
72
|
+
Layout/SpaceAfterSemicolon:
|
|
73
|
+
Exclude:
|
|
74
|
+
- 'lib/searchyll/generator.rb'
|
|
75
|
+
|
|
76
|
+
# Offense count: 2
|
|
77
|
+
# Cop supports --auto-correct.
|
|
78
|
+
# Configuration parameters: AllowForAlignment.
|
|
79
|
+
Layout/SpaceAroundOperators:
|
|
80
|
+
Exclude:
|
|
81
|
+
- 'lib/searchyll/configuration.rb'
|
|
82
|
+
|
|
83
|
+
# Offense count: 1
|
|
84
|
+
# Cop supports --auto-correct.
|
|
85
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
|
|
86
|
+
# SupportedStyles: space, no_space
|
|
87
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
|
88
|
+
Layout/SpaceBeforeBlockBraces:
|
|
89
|
+
Exclude:
|
|
90
|
+
- 'lib/searchyll/generator.rb'
|
|
91
|
+
|
|
92
|
+
# Offense count: 3
|
|
93
|
+
# Cop supports --auto-correct.
|
|
94
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBrackets.
|
|
95
|
+
# SupportedStyles: space, no_space, compact
|
|
96
|
+
# SupportedStylesForEmptyBrackets: space, no_space
|
|
97
|
+
Layout/SpaceInsideArrayLiteralBrackets:
|
|
98
|
+
Exclude:
|
|
99
|
+
- 'searchyll.gemspec'
|
|
100
|
+
|
|
101
|
+
# Offense count: 2
|
|
102
|
+
# Cop supports --auto-correct.
|
|
103
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
|
|
104
|
+
# SupportedStyles: space, no_space
|
|
105
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
|
106
|
+
Layout/SpaceInsideBlockBraces:
|
|
107
|
+
Exclude:
|
|
108
|
+
- 'lib/searchyll/generator.rb'
|
|
109
|
+
|
|
110
|
+
# Offense count: 1
|
|
111
|
+
# Cop supports --auto-correct.
|
|
112
|
+
# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
|
|
113
|
+
# SupportedStyles: space, no_space, compact
|
|
114
|
+
# SupportedStylesForEmptyBraces: space, no_space
|
|
115
|
+
Layout/SpaceInsideHashLiteralBraces:
|
|
116
|
+
Exclude:
|
|
117
|
+
- 'spec/searchyll/generator_spec.rb'
|
|
118
|
+
|
|
119
|
+
# Offense count: 1
|
|
120
|
+
# Cop supports --auto-correct.
|
|
121
|
+
Lint/LiteralInInterpolation:
|
|
122
|
+
Exclude:
|
|
123
|
+
- 'lib/searchyll/generator.rb'
|
|
124
|
+
|
|
125
|
+
# Offense count: 1
|
|
126
|
+
Metrics/AbcSize:
|
|
127
|
+
Max: 20
|
|
128
|
+
|
|
129
|
+
# Offense count: 1
|
|
130
|
+
# Configuration parameters: CountComments.
|
|
131
|
+
Metrics/MethodLength:
|
|
132
|
+
Max: 15
|
|
133
|
+
|
|
134
|
+
# Offense count: 2
|
|
135
|
+
# Cop supports --auto-correct.
|
|
136
|
+
# Configuration parameters: EnforcedStyle.
|
|
137
|
+
# SupportedStyles: braces, no_braces, context_dependent
|
|
138
|
+
Style/BracesAroundHashParameters:
|
|
139
|
+
Exclude:
|
|
140
|
+
- 'lib/searchyll.rb'
|
|
141
|
+
|
|
142
|
+
# Offense count: 2
|
|
143
|
+
Style/Documentation:
|
|
144
|
+
Exclude:
|
|
145
|
+
- 'spec/**/*'
|
|
146
|
+
- 'test/**/*'
|
|
147
|
+
- 'lib/searchyll/configuration.rb'
|
|
148
|
+
- 'lib/searchyll/generator.rb'
|
|
149
|
+
|
|
150
|
+
# Offense count: 1
|
|
151
|
+
# Cop supports --auto-correct.
|
|
152
|
+
Style/Encoding:
|
|
153
|
+
Exclude:
|
|
154
|
+
- 'searchyll.gemspec'
|
|
155
|
+
|
|
156
|
+
# Offense count: 2
|
|
157
|
+
# Cop supports --auto-correct.
|
|
158
|
+
Style/ExpandPathArguments:
|
|
159
|
+
Exclude:
|
|
160
|
+
- 'searchyll.gemspec'
|
|
161
|
+
- 'spec/spec_helper.rb'
|
|
162
|
+
|
|
163
|
+
# Offense count: 1
|
|
164
|
+
# Cop supports --auto-correct.
|
|
165
|
+
# Configuration parameters: EnforcedStyle, UseHashRocketsWithSymbolValues, PreferHashRocketsForNonAlnumEndingSymbols.
|
|
166
|
+
# SupportedStyles: ruby19, hash_rockets, no_mixed_keys, ruby19_no_mixed_keys
|
|
167
|
+
Style/HashSyntax:
|
|
168
|
+
Exclude:
|
|
169
|
+
- 'Rakefile'
|
|
170
|
+
|
|
171
|
+
# Offense count: 1
|
|
172
|
+
# Cop supports --auto-correct.
|
|
173
|
+
Style/MutableConstant:
|
|
174
|
+
Exclude:
|
|
175
|
+
- 'lib/searchyll/version.rb'
|
|
176
|
+
|
|
177
|
+
# Offense count: 1
|
|
178
|
+
# Cop supports --auto-correct.
|
|
179
|
+
# Configuration parameters: PreferredDelimiters.
|
|
180
|
+
Style/PercentLiteralDelimiters:
|
|
181
|
+
Exclude:
|
|
182
|
+
- 'searchyll.gemspec'
|
|
183
|
+
|
|
184
|
+
# Offense count: 2
|
|
185
|
+
# Cop supports --auto-correct.
|
|
186
|
+
# Configuration parameters: EnforcedStyle.
|
|
187
|
+
# SupportedStyles: implicit, explicit
|
|
188
|
+
Style/RescueStandardError:
|
|
189
|
+
Exclude:
|
|
190
|
+
- 'lib/searchyll.rb'
|
|
191
|
+
- 'lib/searchyll/generator.rb'
|
|
192
|
+
|
|
193
|
+
# Offense count: 1
|
|
194
|
+
# Cop supports --auto-correct.
|
|
195
|
+
# Configuration parameters: AllowAsExpressionSeparator.
|
|
196
|
+
Style/Semicolon:
|
|
197
|
+
Exclude:
|
|
198
|
+
- 'lib/searchyll/generator.rb'
|
|
199
|
+
|
|
200
|
+
# Offense count: 2
|
|
201
|
+
# Cop supports --auto-correct.
|
|
202
|
+
Style/StderrPuts:
|
|
203
|
+
Exclude:
|
|
204
|
+
- 'lib/searchyll/generator.rb'
|
|
205
|
+
|
|
206
|
+
# Offense count: 42
|
|
207
|
+
# Cop supports --auto-correct.
|
|
208
|
+
# Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
|
|
209
|
+
# SupportedStyles: single_quotes, double_quotes
|
|
210
|
+
Style/StringLiterals:
|
|
211
|
+
Exclude:
|
|
212
|
+
- 'Guardfile'
|
|
213
|
+
- 'Rakefile'
|
|
214
|
+
- 'bin/console'
|
|
215
|
+
- 'lib/searchyll.rb'
|
|
216
|
+
- 'lib/searchyll/configuration.rb'
|
|
217
|
+
- 'lib/searchyll/generator.rb'
|
|
218
|
+
- 'lib/searchyll/version.rb'
|
|
219
|
+
- 'searchyll.gemspec'
|
|
220
|
+
|
|
221
|
+
# Offense count: 1
|
|
222
|
+
# Cop supports --auto-correct.
|
|
223
|
+
Style/UnneededPercentQ:
|
|
224
|
+
Exclude:
|
|
225
|
+
- 'searchyll.gemspec'
|
|
226
|
+
|
|
227
|
+
# Offense count: 2
|
|
228
|
+
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
|
229
|
+
# URISchemes: http, https
|
|
230
|
+
Metrics/LineLength:
|
|
231
|
+
Max: 120
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
All notable changes to this project will be documented in this file.
|
|
3
|
+
|
|
4
|
+
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
5
|
+
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## v0.10.2 - 2018-06-30
|
|
10
|
+
### Added
|
|
11
|
+
- Version bump: v0.10.2., by Allison Zadrozny <allison@zadrozny.com>, [7930a3e](https://github.com/omc/searchyll/commit/7930a3e)
|
|
12
|
+
- Add & backfill the Changelog., by Allison Zadrozny <allison@zadrozny.com>, [cafe45c](https://github.com/omc/searchyll/commit/cafe45c)
|
|
13
|
+
- Merge pull request #30 from omc/allizad/remove-elasticsearch-ruby, by Allison Zadrozny <allison@zadrozny.com>, [e05d874](https://github.com/omc/searchyll/commit/e05d874)
|
|
14
|
+
- Remove unecessary elasticsearch-ruby dependency., by Allison Zadrozny <allison@zadrozny.com>, [354b58b](https://github.com/omc/searchyll/commit/354b58b)
|
|
15
|
+
- Merge pull request #29 from omc/nz/reindex-cadence, by Allison Zadrozny <allison@zadrozny.com>, [9de8214](https://github.com/omc/searchyll/commit/9de8214)
|
|
16
|
+
- reindex with a cadence and a dynamic batch size, by Nick Zadrozny <nick@beyondthepath.com>, [a4aa544](https://github.com/omc/searchyll/commit/a4aa544)
|
|
17
|
+
- Rubocop and some light refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [150ea0d](https://github.com/omc/searchyll/commit/150ea0d)
|
|
18
|
+
- Merge pull request #28 from omc/robsears-patch-2, by Allison Zadrozny <allison@zadrozny.com>, [ffd360c](https://github.com/omc/searchyll/commit/ffd360c)
|
|
19
|
+
- Increment the version, by Rob Sears <secure@robsears.com>, [c4ee100](https://github.com/omc/searchyll/commit/c4ee100)
|
|
20
|
+
- Don't break site generation if the Elasticsearch URL is missing, by Rob Sears <secure@robsears.com>, [9ddd016](https://github.com/omc/searchyll/commit/9ddd016)
|
|
21
|
+
- Merge pull request #26 from omc/robsears-patch-1, by Allison Zadrozny <allison@zadrozny.com>, [bce8974](https://github.com/omc/searchyll/commit/bce8974)
|
|
22
|
+
- Fix a silly spelling error., by Rob <rc.sears@gmail.com>, [b280436](https://github.com/omc/searchyll/commit/b280436)
|
|
23
|
+
- Auth should be optional, by Rob <rc.sears@gmail.com>, [d7d694e](https://github.com/omc/searchyll/commit/d7d694e)
|
|
24
|
+
|
|
25
|
+
## v0.10.0 - 2018-06-07
|
|
26
|
+
### Added
|
|
27
|
+
- version bump to 0.10.0, by Nick Zadrozny <nick@beyondthepath.com>, [097ddb9](https://github.com/omc/searchyll/commit/097ddb9)
|
|
28
|
+
- Merge pull request #24 from omc/collections, by Dru Sellers <dru@drusellers.com>, [7d1f044](https://github.com/omc/searchyll/commit/7d1f044)
|
|
29
|
+
- Index collections as well as posts, by Dru Sellers <dru@drusellers.com>, [2cdaaa8](https://github.com/omc/searchyll/commit/2cdaaa8)
|
|
30
|
+
- Merge pull request #18 from omc/v5-accept-json, by Allison Zadrozny <allison@zadrozny.com>, [ee7e309](https://github.com/omc/searchyll/commit/ee7e309)
|
|
31
|
+
- Merge pull request #19 from matthewdu/fix-index-creation, by Allison Zadrozny <allison@zadrozny.com>, [0d34bb9](https://github.com/omc/searchyll/commit/0d34bb9)
|
|
32
|
+
- Use PUT instead of POST, by Matthew Du <du.matthew@gmail.com>, [b905d73](https://github.com/omc/searchyll/commit/b905d73)
|
|
33
|
+
- provide an Accept header with all requests, for v5 compat, by Nick Zadrozny <nick@beyondthepath.com>, [2f28fae](https://github.com/omc/searchyll/commit/2f28fae)
|
|
34
|
+
- Merge pull request #16 from omc/add-print-statement, by Allison Zadrozny <allison@zadrozny.com>, [e236c54](https://github.com/omc/searchyll/commit/e236c54)
|
|
35
|
+
- Skip index deletion if there are no old indices, by Allison Zadrozny <allison@zadrozny.com>, [b33546e](https://github.com/omc/searchyll/commit/b33546e)
|
|
36
|
+
- Add print statement to indexer, by Allison Zadrozny <allison@zadrozny.com>, [9af6f13](https://github.com/omc/searchyll/commit/9af6f13)
|
|
37
|
+
|
|
38
|
+
## v0.9.0 - 2018-05-07
|
|
39
|
+
### Added
|
|
40
|
+
- license, by Nick Zadrozny <nick@beyondthepath.com>, [b221803](https://github.com/omc/searchyll/commit/b221803)
|
|
41
|
+
- Ignore the packaged gem, by Nick Zadrozny <nick@beyondthepath.com>, [04a9764](https://github.com/omc/searchyll/commit/04a9764)
|
|
42
|
+
- GPLv3 license, by Nick Zadrozny <nick@beyondthepath.com>, [150f6ed](https://github.com/omc/searchyll/commit/150f6ed)
|
|
43
|
+
- Update gemspec, readme and version for first publish of the gem, by Nick Zadrozny <nick@beyondthepath.com>, [e30ba44](https://github.com/omc/searchyll/commit/e30ba44)
|
|
44
|
+
- Change version number, by Allison Zadrozny <allison@zadrozny.com>, [9cf886e](https://github.com/omc/searchyll/commit/9cf886e)
|
|
45
|
+
- Make searchyll into a hook, by Allison Zadrozny <allison@zadrozny.com>, [46d0232](https://github.com/omc/searchyll/commit/46d0232)
|
|
46
|
+
- Merge pull request #12 from nz/rename-searchyll, by Allison Zadrozny <allison@zadrozny.com>, [d6270d4](https://github.com/omc/searchyll/commit/d6270d4)
|
|
47
|
+
- Change Searchyou to Searchyll, by Rob Sears <secure@robsears.com>, [72466b9](https://github.com/omc/searchyll/commit/72466b9)
|
|
48
|
+
- Merge pull request #11 from nz/fresh-configs, by Nick Zadrozny <nick@onemorecloud.com>, [26d9cf5](https://github.com/omc/searchyll/commit/26d9cf5)
|
|
49
|
+
- Move configuration into its own file, by Rob Sears <secure@robsears.com>, [a008f11](https://github.com/omc/searchyll/commit/a008f11)
|
|
50
|
+
- Move the config options around, by Rob Sears <secure@robsears.com>, [485495c](https://github.com/omc/searchyll/commit/485495c)
|
|
51
|
+
- Implement notes from Nick, by Rob Sears <secure@robsears.com>, [0951991](https://github.com/omc/searchyll/commit/0951991)
|
|
52
|
+
- New configuration settings are in a Configuration class, additional documentation included, by Rob Sears <secure@robsears.com>, [4c00cc3](https://github.com/omc/searchyll/commit/4c00cc3)
|
|
53
|
+
- better index cleanup with more precise enumeration of old indices, by Nick Zadrozny <nick@beyondthepath.com>, [1d6d807](https://github.com/omc/searchyll/commit/1d6d807)
|
|
54
|
+
- Merge pull request #2 from allizad/edit-indexer0-content, by Nick Zadrozny <nick@onemorecloud.com>, [9149cdf](https://github.com/omc/searchyll/commit/9149cdf)
|
|
55
|
+
- Update generator.rb, by Allison Zadrozny <allison@zadrozny.com>, [76b650d](https://github.com/omc/searchyll/commit/76b650d)
|
|
56
|
+
- Edit indexer content, by Allison Zadrozny <allison@zadrozny.com>, [157f981](https://github.com/omc/searchyll/commit/157f981)
|
|
57
|
+
- more helper methods and cleanup, by Nick Zadrozny <nick@beyondthepath.com>, [e0913ad](https://github.com/omc/searchyll/commit/e0913ad)
|
|
58
|
+
- get the code pretty much to working, by Nick Zadrozny <nick@beyondthepath.com>, [5550a28](https://github.com/omc/searchyll/commit/5550a28)
|
|
59
|
+
- add some comments, by Nick Zadrozny <nick@beyondthepath.com>, [0be5381](https://github.com/omc/searchyll/commit/0be5381)
|
|
60
|
+
- make it run, by Nick Zadrozny <nick@beyondthepath.com>, [afa05a7](https://github.com/omc/searchyll/commit/afa05a7)
|
|
61
|
+
- start testing! with some refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [8e63e9f](https://github.com/omc/searchyll/commit/8e63e9f)
|
|
62
|
+
- sketching a jekyll indexer for ES, by Nick Zadrozny <nick@beyondthepath.com>, [9f4c3be](https://github.com/omc/searchyll/commit/9f4c3be)
|
data/lib/searchyll.rb
CHANGED
|
@@ -24,7 +24,7 @@ begin
|
|
|
24
24
|
# strip html
|
|
25
25
|
nokogiri_doc = Nokogiri::HTML(page.output)
|
|
26
26
|
|
|
27
|
-
puts %( indexing page #{page.url})
|
|
27
|
+
# puts %( indexing page #{page.url})
|
|
28
28
|
|
|
29
29
|
indexer = indexers[page.site]
|
|
30
30
|
indexer << page.data.merge({
|
|
@@ -39,7 +39,7 @@ begin
|
|
|
39
39
|
# strip html
|
|
40
40
|
nokogiri_doc = Nokogiri::HTML(document.output)
|
|
41
41
|
|
|
42
|
-
puts %( indexing document #{document.url})
|
|
42
|
+
# puts %( indexing document #{document.url})
|
|
43
43
|
|
|
44
44
|
indexer = indexers[document.site]
|
|
45
45
|
indexer << document.data.merge({
|
|
@@ -8,8 +8,7 @@ module Searchyll
|
|
|
8
8
|
# Determine a URL for the cluster, or fail with error
|
|
9
9
|
def elasticsearch_url
|
|
10
10
|
ENV['BONSAI_URL'] || ENV['ELASTICSEARCH_URL'] ||
|
|
11
|
-
((site.config||{})['elasticsearch']||{})['url']
|
|
12
|
-
raise(ArgumentError, "No Elasticsearch URL present, skipping indexing")
|
|
11
|
+
((site.config||{})['elasticsearch']||{})['url'].to_s
|
|
13
12
|
end
|
|
14
13
|
|
|
15
14
|
# Getter for the number of primary shards
|
data/lib/searchyll/generator.rb
CHANGED
|
@@ -14,6 +14,12 @@ module Searchyll
|
|
|
14
14
|
# Gather the configuration options
|
|
15
15
|
configuration = Configuration.new(site)
|
|
16
16
|
|
|
17
|
+
# Don't do anything if the Elasticsearch URL is missing
|
|
18
|
+
if configuration.elasticsearch_url.empty?
|
|
19
|
+
puts "No Elasticsearch URL present, skipping indexing"
|
|
20
|
+
return
|
|
21
|
+
end
|
|
22
|
+
|
|
17
23
|
# Prepare the indexer
|
|
18
24
|
indexer = Searchyll::Indexer.new(configuration)
|
|
19
25
|
indexer.start
|
data/lib/searchyll/indexer.rb
CHANGED
|
@@ -3,33 +3,95 @@ require 'net/http'
|
|
|
3
3
|
|
|
4
4
|
module Searchyll
|
|
5
5
|
class Indexer
|
|
6
|
-
|
|
6
|
+
# Initial size of document batches to send to ES _bulk API
|
|
7
7
|
BATCH_SIZE = 50
|
|
8
8
|
|
|
9
|
+
# Grow and shrink the batch size based on how long our bulk calls take
|
|
10
|
+
# relative to the tempo
|
|
11
|
+
BATCH_RESIZE_FACTOR = 1.2
|
|
12
|
+
|
|
13
|
+
# Requests per minute for updates to ES
|
|
14
|
+
TEMPO = 94
|
|
15
|
+
|
|
16
|
+
attr_accessor :batch_size
|
|
9
17
|
attr_accessor :configuration
|
|
10
18
|
attr_accessor :indexer_thread
|
|
11
|
-
attr_accessor :old_indices
|
|
12
19
|
attr_accessor :queue
|
|
13
20
|
attr_accessor :timestamp
|
|
14
21
|
attr_accessor :uri
|
|
15
22
|
attr_accessor :working
|
|
16
23
|
|
|
24
|
+
# Initialize a basic indexer, with a Jekyll site configuration, waiting
|
|
25
|
+
# to be supplied with documents for indexing.
|
|
17
26
|
def initialize(configuration)
|
|
18
27
|
self.configuration = configuration
|
|
19
|
-
self.uri
|
|
20
|
-
self.queue
|
|
21
|
-
self.working
|
|
22
|
-
self.timestamp
|
|
28
|
+
self.uri = URI(configuration.elasticsearch_url)
|
|
29
|
+
self.queue = Queue.new
|
|
30
|
+
self.working = true
|
|
31
|
+
self.timestamp = Time.now
|
|
32
|
+
self.batch_size = BATCH_SIZE
|
|
23
33
|
end
|
|
24
34
|
|
|
25
35
|
# Public: Add new documents for batch indexing.
|
|
26
36
|
def <<(doc)
|
|
27
|
-
|
|
37
|
+
queue << doc
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Public: start the indexer and wait for documents to index.
|
|
41
|
+
def start
|
|
42
|
+
prepare_index
|
|
43
|
+
|
|
44
|
+
self.indexer_thread = Thread.new do
|
|
45
|
+
http_start do |http|
|
|
46
|
+
indexer_loop(http)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Public: Indicate to the indexer that no new documents are being added.
|
|
52
|
+
def finish
|
|
53
|
+
self.working = false
|
|
54
|
+
indexer_thread.join
|
|
55
|
+
finalize!
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def indexer_loop(http)
|
|
61
|
+
tempo_loop do
|
|
62
|
+
break unless working?
|
|
63
|
+
es_bulk_insert!(http, current_batch)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Run a loop in the tempo specified by TEMPO.
|
|
68
|
+
def tempo_loop
|
|
69
|
+
loop do
|
|
70
|
+
t = Time.now
|
|
71
|
+
|
|
72
|
+
# Perform the work required
|
|
73
|
+
yield
|
|
74
|
+
|
|
75
|
+
# Adjust the batch size
|
|
76
|
+
if (Time.now - t) / (60.0 / TEMPO) < 0.5
|
|
77
|
+
self.batch_size = (batch_size * BATCH_RESIZE_FACTOR).round
|
|
78
|
+
puts "Increased batch to #{batch_size}"
|
|
79
|
+
elsif (Time.now - t) / (60.0 / TEMPO) > 0.9
|
|
80
|
+
self.batch_size = (batch_size / BATCH_RESIZE_FACTOR).round
|
|
81
|
+
puts "Decreased batch to #{batch_size}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Tight loop to sleep through any remaining time in the tempo
|
|
85
|
+
while (60.0 / TEMPO) - (Time.now - t) > 0
|
|
86
|
+
sleep [0.1, (60.0 / TEMPO) - (Time.now - t)].min
|
|
87
|
+
break unless working?
|
|
88
|
+
end
|
|
89
|
+
end
|
|
28
90
|
end
|
|
29
91
|
|
|
30
92
|
# Signal a stop condition for our batch indexing thread.
|
|
31
93
|
def working?
|
|
32
|
-
working || queue.
|
|
94
|
+
working || !queue.empty?
|
|
33
95
|
end
|
|
34
96
|
|
|
35
97
|
# A versioned index name, based on the time of the indexing run.
|
|
@@ -39,10 +101,10 @@ module Searchyll
|
|
|
39
101
|
end
|
|
40
102
|
|
|
41
103
|
# Prepare an HTTP connection
|
|
42
|
-
def http_start
|
|
104
|
+
def http_start
|
|
43
105
|
http = Net::HTTP.start(
|
|
44
106
|
uri.hostname, uri.port,
|
|
45
|
-
:
|
|
107
|
+
use_ssl: (uri.scheme == 'https')
|
|
46
108
|
)
|
|
47
109
|
yield(http)
|
|
48
110
|
end
|
|
@@ -59,24 +121,10 @@ module Searchyll
|
|
|
59
121
|
}.to_json # TODO: index settings
|
|
60
122
|
|
|
61
123
|
http_start do |http|
|
|
62
|
-
|
|
124
|
+
http.request(create_index)
|
|
63
125
|
end
|
|
64
126
|
|
|
65
|
-
#
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# Public: start the indexer and wait for documents to index.
|
|
69
|
-
def start
|
|
70
|
-
prepare_index
|
|
71
|
-
|
|
72
|
-
self.indexer_thread = Thread.new do
|
|
73
|
-
http_start do |http|
|
|
74
|
-
loop do
|
|
75
|
-
break unless working?
|
|
76
|
-
es_bulk_insert!(http, current_batch)
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
end
|
|
127
|
+
# TODO: mapping?
|
|
80
128
|
end
|
|
81
129
|
|
|
82
130
|
def http_put(path)
|
|
@@ -99,7 +147,8 @@ module Searchyll
|
|
|
99
147
|
req = klass.new(path)
|
|
100
148
|
req.content_type = 'application/json'
|
|
101
149
|
req['Accept'] = 'application/json'
|
|
102
|
-
|
|
150
|
+
# Append auth credentials if the exist
|
|
151
|
+
req.basic_auth(uri.user, uri.password) if uri.user && uri.password
|
|
103
152
|
req
|
|
104
153
|
end
|
|
105
154
|
|
|
@@ -109,7 +158,7 @@ module Searchyll
|
|
|
109
158
|
def es_bulk_insert!(http, batch)
|
|
110
159
|
bulk_insert = http_post("/#{elasticsearch_index_name}/#{configuration.elasticsearch_default_type}/_bulk")
|
|
111
160
|
bulk_insert.body = batch.map do |doc|
|
|
112
|
-
[
|
|
161
|
+
[{ index: {} }.to_json, doc.to_json].join("\n")
|
|
113
162
|
end.join("\n") + "\n"
|
|
114
163
|
http.request(bulk_insert)
|
|
115
164
|
end
|
|
@@ -119,61 +168,77 @@ module Searchyll
|
|
|
119
168
|
def current_batch
|
|
120
169
|
count = 0
|
|
121
170
|
batch = []
|
|
122
|
-
while count <
|
|
171
|
+
while count < batch_size && !queue.empty?
|
|
123
172
|
batch << queue.pop
|
|
124
173
|
count += 1
|
|
125
174
|
end
|
|
126
175
|
batch
|
|
127
176
|
end
|
|
128
177
|
|
|
129
|
-
#
|
|
130
|
-
def finish
|
|
131
|
-
self.working = false
|
|
132
|
-
indexer_thread.join
|
|
133
|
-
finalize!
|
|
134
|
-
end
|
|
135
|
-
|
|
178
|
+
# List the indices currently in the cluster, caching the call in an ivar
|
|
136
179
|
def old_indices
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
indices =
|
|
140
|
-
indices = indices
|
|
141
|
-
|
|
180
|
+
# return if defined?(@old_indices)
|
|
181
|
+
resp = http_start { |h| h.request(http_get('/_cat/indices?h=index')) }
|
|
182
|
+
indices = JSON.parse(resp.body).map { |i| i['index'] }
|
|
183
|
+
indices = indices.select { |i| i =~ /\A#{configuration.elasticsearch_index_base_name}/ }
|
|
184
|
+
indices -= [elasticsearch_index_name]
|
|
185
|
+
# @old_indices = indices
|
|
186
|
+
indices
|
|
142
187
|
end
|
|
143
188
|
|
|
144
189
|
# Once documents are done being indexed, finalize the process by adding
|
|
145
190
|
# the new index into an alias for searching.
|
|
146
191
|
def finalize!
|
|
147
|
-
#
|
|
192
|
+
# run the prepared requests
|
|
193
|
+
http_start do |http|
|
|
194
|
+
finalize_refresh(http)
|
|
195
|
+
finalize_replication(http)
|
|
196
|
+
finalize_aliases(http)
|
|
197
|
+
finalize_cleanup(http)
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# refresh the index to make it searchable
|
|
202
|
+
def finalize_refresh(http)
|
|
148
203
|
refresh = http_post("/#{elasticsearch_index_name}/_refresh")
|
|
204
|
+
http.request(refresh)
|
|
205
|
+
end
|
|
149
206
|
|
|
150
|
-
|
|
207
|
+
# add replication to the new index
|
|
208
|
+
def finalize_replication(http)
|
|
151
209
|
add_replication = http_put("/#{elasticsearch_index_name}/_settings")
|
|
152
|
-
add_replication.body = {
|
|
210
|
+
add_replication.body = {
|
|
211
|
+
index: {
|
|
212
|
+
number_of_replicas: configuration.elasticsearch_number_of_replicas
|
|
213
|
+
}
|
|
214
|
+
}.to_json
|
|
215
|
+
http.request(add_replication)
|
|
216
|
+
end
|
|
153
217
|
|
|
154
|
-
|
|
155
|
-
|
|
218
|
+
# hot swap the index into the canonical alias
|
|
219
|
+
def finalize_aliases(http)
|
|
220
|
+
update_aliases = http_post('/_aliases')
|
|
156
221
|
update_aliases.body = {
|
|
157
|
-
|
|
158
|
-
{
|
|
159
|
-
|
|
222
|
+
actions: [
|
|
223
|
+
{ remove: {
|
|
224
|
+
index: old_indices.join(','),
|
|
225
|
+
alias: configuration.elasticsearch_index_base_name
|
|
226
|
+
} },
|
|
227
|
+
{ add: {
|
|
228
|
+
index: elasticsearch_index_name,
|
|
229
|
+
alias: configuration.elasticsearch_index_base_name
|
|
230
|
+
} }
|
|
160
231
|
]
|
|
161
232
|
}.to_json
|
|
233
|
+
http.request(update_aliases)
|
|
234
|
+
end
|
|
162
235
|
|
|
163
|
-
|
|
236
|
+
# delete old indices after a successful reindexing run
|
|
237
|
+
def finalize_cleanup(http)
|
|
238
|
+
return if old_indices.nil? || old_indices.empty?
|
|
164
239
|
cleanup_indices = http_delete("/#{old_indices.join(',')}")
|
|
165
240
|
puts %( Old indices: #{old_indices.join(', ')})
|
|
166
|
-
|
|
167
|
-
# run the prepared requests
|
|
168
|
-
http_start do |http|
|
|
169
|
-
http.request(refresh)
|
|
170
|
-
http.request(add_replication)
|
|
171
|
-
http.request(update_aliases)
|
|
172
|
-
if !old_indices.empty?
|
|
173
|
-
http.request(cleanup_indices)
|
|
174
|
-
end
|
|
175
|
-
end
|
|
241
|
+
http.request(cleanup_indices)
|
|
176
242
|
end
|
|
177
|
-
|
|
178
243
|
end
|
|
179
244
|
end
|
data/lib/searchyll/version.rb
CHANGED
data/searchyll.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: searchyll
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.10.
|
|
4
|
+
version: 0.10.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Nick Zadrozny
|
|
@@ -10,7 +10,7 @@ authors:
|
|
|
10
10
|
autorequire:
|
|
11
11
|
bindir: exe
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date:
|
|
13
|
+
date: 2018-06-30 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: bundler
|
|
@@ -82,20 +82,6 @@ dependencies:
|
|
|
82
82
|
- - ">="
|
|
83
83
|
- !ruby/object:Gem::Version
|
|
84
84
|
version: '3.0'
|
|
85
|
-
- !ruby/object:Gem::Dependency
|
|
86
|
-
name: elasticsearch-ruby
|
|
87
|
-
requirement: !ruby/object:Gem::Requirement
|
|
88
|
-
requirements:
|
|
89
|
-
- - ">="
|
|
90
|
-
- !ruby/object:Gem::Version
|
|
91
|
-
version: '0'
|
|
92
|
-
type: :runtime
|
|
93
|
-
prerelease: false
|
|
94
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
95
|
-
requirements:
|
|
96
|
-
- - ">="
|
|
97
|
-
- !ruby/object:Gem::Version
|
|
98
|
-
version: '0'
|
|
99
85
|
- !ruby/object:Gem::Dependency
|
|
100
86
|
name: nokogiri
|
|
101
87
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -121,7 +107,10 @@ extra_rdoc_files: []
|
|
|
121
107
|
files:
|
|
122
108
|
- ".gitignore"
|
|
123
109
|
- ".rspec"
|
|
110
|
+
- ".rubocop.yml"
|
|
111
|
+
- ".rubocop_todo.yml"
|
|
124
112
|
- ".travis.yml"
|
|
113
|
+
- CHANGELOG.md
|
|
125
114
|
- Gemfile
|
|
126
115
|
- Guardfile
|
|
127
116
|
- LICENSE
|
|
@@ -155,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
155
144
|
version: '0'
|
|
156
145
|
requirements: []
|
|
157
146
|
rubyforge_project:
|
|
158
|
-
rubygems_version: 2.
|
|
147
|
+
rubygems_version: 2.6.14
|
|
159
148
|
signing_key:
|
|
160
149
|
specification_version: 4
|
|
161
150
|
summary: A gem to index your Jekyll pages into Elasticsearch.
|