searchyll 0.10.0 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d90b594ea9e60eb7814951eb5dbd3c5cb4940c1
4
- data.tar.gz: 9aefa9b3395bf9164930749a55a86131f4ae4cf0
3
+ metadata.gz: 3adc41e5707f84efa1fcadb911c1fefb96849a23
4
+ data.tar.gz: f311750a8fbf77c24e6e32946da3540550f80a1a
5
5
  SHA512:
6
- metadata.gz: 4411dd6cba9f869a4dd0f485fdafd18f6fca7ca55e3d93c472a93acc903f9fdd10e3dc58dc187d8760372cf7682d2efaa495ac172081d5c1311db2e2e5c60a66
7
- data.tar.gz: eef95afba94a4177bfe5c19c9c37adb97cca12d755ebf83f6a9b742c5ade8f9353a1e200c589383ead6d7862d6da293e157465f478362a9b576e5e0ae8e3e377
6
+ metadata.gz: 9fc8750c7e1768fcdd80aa658edfcb2371d13750989db37f43dccd9456b2567354bc954b267d327be8e2b3e54ad20be36dfd90314d024a46582ab32df2bb585f
7
+ data.tar.gz: 844f042d27f019131836c72672a2452555140dbfc821ed4385961b6043c6572c95b88820515e3106adf0d62e0de81340017a17e6c270df6baff3069e3922311c
@@ -0,0 +1 @@
1
+ inherit_from: .rubocop_todo.yml
@@ -0,0 +1,231 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2018-06-30 11:54:09 -0500 using RuboCop version 0.56.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 2
10
+ # Cop supports --auto-correct.
11
+ # Configuration parameters: Include, TreatCommentsAsGroupSeparators.
12
+ # Include: **/*.gemspec
13
+ Gemspec/OrderedDependencies:
14
+ Exclude:
15
+ - 'searchyll.gemspec'
16
+
17
+ # Offense count: 1
18
+ # Cop supports --auto-correct.
19
+ Layout/EmptyLineAfterMagicComment:
20
+ Exclude:
21
+ - 'searchyll.gemspec'
22
+
23
+ # Offense count: 8
24
+ # Cop supports --auto-correct.
25
+ # Configuration parameters: EnforcedStyle.
26
+ # SupportedStyles: empty_lines, no_empty_lines
27
+ Layout/EmptyLinesAroundBlockBody:
28
+ Exclude:
29
+ - 'Guardfile'
30
+ - 'searchyll.gemspec'
31
+ - 'spec/searchyll/generator_spec.rb'
32
+ - 'spec/searchyll/indexer_spec.rb'
33
+ - 'spec/searchyll_spec.rb'
34
+
35
+ # Offense count: 2
36
+ # Cop supports --auto-correct.
37
+ # Configuration parameters: EnforcedStyle.
38
+ # SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines, beginning_only, ending_only
39
+ Layout/EmptyLinesAroundClassBody:
40
+ Exclude:
41
+ - 'lib/searchyll/generator.rb'
42
+
43
+ # Offense count: 1
44
+ # Cop supports --auto-correct.
45
+ Layout/EmptyLinesAroundExceptionHandlingKeywords:
46
+ Exclude:
47
+ - 'lib/searchyll.rb'
48
+
49
+ # Offense count: 1
50
+ # Cop supports --auto-correct.
51
+ Layout/EmptyLinesAroundMethodBody:
52
+ Exclude:
53
+ - 'lib/searchyll/generator.rb'
54
+
55
+ # Offense count: 2
56
+ # Cop supports --auto-correct.
57
+ # Configuration parameters: EnforcedStyle.
58
+ # SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines
59
+ Layout/EmptyLinesAroundModuleBody:
60
+ Exclude:
61
+ - 'lib/searchyll/generator.rb'
62
+
63
+ # Offense count: 4
64
+ # Cop supports --auto-correct.
65
+ # Configuration parameters: IndentationWidth.
66
+ # SupportedStyles: special_inside_parentheses, consistent, align_braces
67
+ Layout/IndentHash:
68
+ EnforcedStyle: consistent
69
+
70
+ # Offense count: 1
71
+ # Cop supports --auto-correct.
72
+ Layout/SpaceAfterSemicolon:
73
+ Exclude:
74
+ - 'lib/searchyll/generator.rb'
75
+
76
+ # Offense count: 2
77
+ # Cop supports --auto-correct.
78
+ # Configuration parameters: AllowForAlignment.
79
+ Layout/SpaceAroundOperators:
80
+ Exclude:
81
+ - 'lib/searchyll/configuration.rb'
82
+
83
+ # Offense count: 1
84
+ # Cop supports --auto-correct.
85
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
86
+ # SupportedStyles: space, no_space
87
+ # SupportedStylesForEmptyBraces: space, no_space
88
+ Layout/SpaceBeforeBlockBraces:
89
+ Exclude:
90
+ - 'lib/searchyll/generator.rb'
91
+
92
+ # Offense count: 3
93
+ # Cop supports --auto-correct.
94
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBrackets.
95
+ # SupportedStyles: space, no_space, compact
96
+ # SupportedStylesForEmptyBrackets: space, no_space
97
+ Layout/SpaceInsideArrayLiteralBrackets:
98
+ Exclude:
99
+ - 'searchyll.gemspec'
100
+
101
+ # Offense count: 2
102
+ # Cop supports --auto-correct.
103
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
104
+ # SupportedStyles: space, no_space
105
+ # SupportedStylesForEmptyBraces: space, no_space
106
+ Layout/SpaceInsideBlockBraces:
107
+ Exclude:
108
+ - 'lib/searchyll/generator.rb'
109
+
110
+ # Offense count: 1
111
+ # Cop supports --auto-correct.
112
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
113
+ # SupportedStyles: space, no_space, compact
114
+ # SupportedStylesForEmptyBraces: space, no_space
115
+ Layout/SpaceInsideHashLiteralBraces:
116
+ Exclude:
117
+ - 'spec/searchyll/generator_spec.rb'
118
+
119
+ # Offense count: 1
120
+ # Cop supports --auto-correct.
121
+ Lint/LiteralInInterpolation:
122
+ Exclude:
123
+ - 'lib/searchyll/generator.rb'
124
+
125
+ # Offense count: 1
126
+ Metrics/AbcSize:
127
+ Max: 20
128
+
129
+ # Offense count: 1
130
+ # Configuration parameters: CountComments.
131
+ Metrics/MethodLength:
132
+ Max: 15
133
+
134
+ # Offense count: 2
135
+ # Cop supports --auto-correct.
136
+ # Configuration parameters: EnforcedStyle.
137
+ # SupportedStyles: braces, no_braces, context_dependent
138
+ Style/BracesAroundHashParameters:
139
+ Exclude:
140
+ - 'lib/searchyll.rb'
141
+
142
+ # Offense count: 2
143
+ Style/Documentation:
144
+ Exclude:
145
+ - 'spec/**/*'
146
+ - 'test/**/*'
147
+ - 'lib/searchyll/configuration.rb'
148
+ - 'lib/searchyll/generator.rb'
149
+
150
+ # Offense count: 1
151
+ # Cop supports --auto-correct.
152
+ Style/Encoding:
153
+ Exclude:
154
+ - 'searchyll.gemspec'
155
+
156
+ # Offense count: 2
157
+ # Cop supports --auto-correct.
158
+ Style/ExpandPathArguments:
159
+ Exclude:
160
+ - 'searchyll.gemspec'
161
+ - 'spec/spec_helper.rb'
162
+
163
+ # Offense count: 1
164
+ # Cop supports --auto-correct.
165
+ # Configuration parameters: EnforcedStyle, UseHashRocketsWithSymbolValues, PreferHashRocketsForNonAlnumEndingSymbols.
166
+ # SupportedStyles: ruby19, hash_rockets, no_mixed_keys, ruby19_no_mixed_keys
167
+ Style/HashSyntax:
168
+ Exclude:
169
+ - 'Rakefile'
170
+
171
+ # Offense count: 1
172
+ # Cop supports --auto-correct.
173
+ Style/MutableConstant:
174
+ Exclude:
175
+ - 'lib/searchyll/version.rb'
176
+
177
+ # Offense count: 1
178
+ # Cop supports --auto-correct.
179
+ # Configuration parameters: PreferredDelimiters.
180
+ Style/PercentLiteralDelimiters:
181
+ Exclude:
182
+ - 'searchyll.gemspec'
183
+
184
+ # Offense count: 2
185
+ # Cop supports --auto-correct.
186
+ # Configuration parameters: EnforcedStyle.
187
+ # SupportedStyles: implicit, explicit
188
+ Style/RescueStandardError:
189
+ Exclude:
190
+ - 'lib/searchyll.rb'
191
+ - 'lib/searchyll/generator.rb'
192
+
193
+ # Offense count: 1
194
+ # Cop supports --auto-correct.
195
+ # Configuration parameters: AllowAsExpressionSeparator.
196
+ Style/Semicolon:
197
+ Exclude:
198
+ - 'lib/searchyll/generator.rb'
199
+
200
+ # Offense count: 2
201
+ # Cop supports --auto-correct.
202
+ Style/StderrPuts:
203
+ Exclude:
204
+ - 'lib/searchyll/generator.rb'
205
+
206
+ # Offense count: 42
207
+ # Cop supports --auto-correct.
208
+ # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
209
+ # SupportedStyles: single_quotes, double_quotes
210
+ Style/StringLiterals:
211
+ Exclude:
212
+ - 'Guardfile'
213
+ - 'Rakefile'
214
+ - 'bin/console'
215
+ - 'lib/searchyll.rb'
216
+ - 'lib/searchyll/configuration.rb'
217
+ - 'lib/searchyll/generator.rb'
218
+ - 'lib/searchyll/version.rb'
219
+ - 'searchyll.gemspec'
220
+
221
+ # Offense count: 1
222
+ # Cop supports --auto-correct.
223
+ Style/UnneededPercentQ:
224
+ Exclude:
225
+ - 'searchyll.gemspec'
226
+
227
+ # Offense count: 2
228
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
229
+ # URISchemes: http, https
230
+ Metrics/LineLength:
231
+ Max: 120
@@ -0,0 +1,62 @@
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
5
+ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
6
+
7
+
8
+
9
+ ## v0.10.2 - 2018-06-30
10
+ ### Added
11
+ - Version bump: v0.10.2., by Allison Zadrozny <allison@zadrozny.com>, [7930a3e](https://github.com/omc/searchyll/commit/7930a3e)
12
+ - Add & backfill the Changelog., by Allison Zadrozny <allison@zadrozny.com>, [cafe45c](https://github.com/omc/searchyll/commit/cafe45c)
13
+ - Merge pull request #30 from omc/allizad/remove-elasticsearch-ruby, by Allison Zadrozny <allison@zadrozny.com>, [e05d874](https://github.com/omc/searchyll/commit/e05d874)
14
+ - Remove unecessary elasticsearch-ruby dependency., by Allison Zadrozny <allison@zadrozny.com>, [354b58b](https://github.com/omc/searchyll/commit/354b58b)
15
+ - Merge pull request #29 from omc/nz/reindex-cadence, by Allison Zadrozny <allison@zadrozny.com>, [9de8214](https://github.com/omc/searchyll/commit/9de8214)
16
+ - reindex with a cadence and a dynamic batch size, by Nick Zadrozny <nick@beyondthepath.com>, [a4aa544](https://github.com/omc/searchyll/commit/a4aa544)
17
+ - Rubocop and some light refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [150ea0d](https://github.com/omc/searchyll/commit/150ea0d)
18
+ - Merge pull request #28 from omc/robsears-patch-2, by Allison Zadrozny <allison@zadrozny.com>, [ffd360c](https://github.com/omc/searchyll/commit/ffd360c)
19
+ - Increment the version, by Rob Sears <secure@robsears.com>, [c4ee100](https://github.com/omc/searchyll/commit/c4ee100)
20
+ - Don't break site generation if the Elasticsearch URL is missing, by Rob Sears <secure@robsears.com>, [9ddd016](https://github.com/omc/searchyll/commit/9ddd016)
21
+ - Merge pull request #26 from omc/robsears-patch-1, by Allison Zadrozny <allison@zadrozny.com>, [bce8974](https://github.com/omc/searchyll/commit/bce8974)
22
+ - Fix a silly spelling error., by Rob <rc.sears@gmail.com>, [b280436](https://github.com/omc/searchyll/commit/b280436)
23
+ - Auth should be optional, by Rob <rc.sears@gmail.com>, [d7d694e](https://github.com/omc/searchyll/commit/d7d694e)
24
+
25
+ ## v0.10.0 - 2018-06-07
26
+ ### Added
27
+ - version bump to 0.10.0, by Nick Zadrozny <nick@beyondthepath.com>, [097ddb9](https://github.com/omc/searchyll/commit/097ddb9)
28
+ - Merge pull request #24 from omc/collections, by Dru Sellers <dru@drusellers.com>, [7d1f044](https://github.com/omc/searchyll/commit/7d1f044)
29
+ - Index collections as well as posts, by Dru Sellers <dru@drusellers.com>, [2cdaaa8](https://github.com/omc/searchyll/commit/2cdaaa8)
30
+ - Merge pull request #18 from omc/v5-accept-json, by Allison Zadrozny <allison@zadrozny.com>, [ee7e309](https://github.com/omc/searchyll/commit/ee7e309)
31
+ - Merge pull request #19 from matthewdu/fix-index-creation, by Allison Zadrozny <allison@zadrozny.com>, [0d34bb9](https://github.com/omc/searchyll/commit/0d34bb9)
32
+ - Use PUT instead of POST, by Matthew Du <du.matthew@gmail.com>, [b905d73](https://github.com/omc/searchyll/commit/b905d73)
33
+ - provide an Accept header with all requests, for v5 compat, by Nick Zadrozny <nick@beyondthepath.com>, [2f28fae](https://github.com/omc/searchyll/commit/2f28fae)
34
+ - Merge pull request #16 from omc/add-print-statement, by Allison Zadrozny <allison@zadrozny.com>, [e236c54](https://github.com/omc/searchyll/commit/e236c54)
35
+ - Skip index deletion if there are no old indices, by Allison Zadrozny <allison@zadrozny.com>, [b33546e](https://github.com/omc/searchyll/commit/b33546e)
36
+ - Add print statement to indexer, by Allison Zadrozny <allison@zadrozny.com>, [9af6f13](https://github.com/omc/searchyll/commit/9af6f13)
37
+
38
+ ## v0.9.0 - 2018-05-07
39
+ ### Added
40
+ - license, by Nick Zadrozny <nick@beyondthepath.com>, [b221803](https://github.com/omc/searchyll/commit/b221803)
41
+ - Ignore the packaged gem, by Nick Zadrozny <nick@beyondthepath.com>, [04a9764](https://github.com/omc/searchyll/commit/04a9764)
42
+ - GPLv3 license, by Nick Zadrozny <nick@beyondthepath.com>, [150f6ed](https://github.com/omc/searchyll/commit/150f6ed)
43
+ - Update gemspec, readme and version for first publish of the gem, by Nick Zadrozny <nick@beyondthepath.com>, [e30ba44](https://github.com/omc/searchyll/commit/e30ba44)
44
+ - Change version number, by Allison Zadrozny <allison@zadrozny.com>, [9cf886e](https://github.com/omc/searchyll/commit/9cf886e)
45
+ - Make searchyll into a hook, by Allison Zadrozny <allison@zadrozny.com>, [46d0232](https://github.com/omc/searchyll/commit/46d0232)
46
+ - Merge pull request #12 from nz/rename-searchyll, by Allison Zadrozny <allison@zadrozny.com>, [d6270d4](https://github.com/omc/searchyll/commit/d6270d4)
47
+ - Change Searchyou to Searchyll, by Rob Sears <secure@robsears.com>, [72466b9](https://github.com/omc/searchyll/commit/72466b9)
48
+ - Merge pull request #11 from nz/fresh-configs, by Nick Zadrozny <nick@onemorecloud.com>, [26d9cf5](https://github.com/omc/searchyll/commit/26d9cf5)
49
+ - Move configuration into its own file, by Rob Sears <secure@robsears.com>, [a008f11](https://github.com/omc/searchyll/commit/a008f11)
50
+ - Move the config options around, by Rob Sears <secure@robsears.com>, [485495c](https://github.com/omc/searchyll/commit/485495c)
51
+ - Implement notes from Nick, by Rob Sears <secure@robsears.com>, [0951991](https://github.com/omc/searchyll/commit/0951991)
52
+ - New configuration settings are in a Configuration class, additional documentation included, by Rob Sears <secure@robsears.com>, [4c00cc3](https://github.com/omc/searchyll/commit/4c00cc3)
53
+ - better index cleanup with more precise enumeration of old indices, by Nick Zadrozny <nick@beyondthepath.com>, [1d6d807](https://github.com/omc/searchyll/commit/1d6d807)
54
+ - Merge pull request #2 from allizad/edit-indexer0-content, by Nick Zadrozny <nick@onemorecloud.com>, [9149cdf](https://github.com/omc/searchyll/commit/9149cdf)
55
+ - Update generator.rb, by Allison Zadrozny <allison@zadrozny.com>, [76b650d](https://github.com/omc/searchyll/commit/76b650d)
56
+ - Edit indexer content, by Allison Zadrozny <allison@zadrozny.com>, [157f981](https://github.com/omc/searchyll/commit/157f981)
57
+ - more helper methods and cleanup, by Nick Zadrozny <nick@beyondthepath.com>, [e0913ad](https://github.com/omc/searchyll/commit/e0913ad)
58
+ - get the code pretty much to working, by Nick Zadrozny <nick@beyondthepath.com>, [5550a28](https://github.com/omc/searchyll/commit/5550a28)
59
+ - add some comments, by Nick Zadrozny <nick@beyondthepath.com>, [0be5381](https://github.com/omc/searchyll/commit/0be5381)
60
+ - make it run, by Nick Zadrozny <nick@beyondthepath.com>, [afa05a7](https://github.com/omc/searchyll/commit/afa05a7)
61
+ - start testing! with some refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [8e63e9f](https://github.com/omc/searchyll/commit/8e63e9f)
62
+ - sketching a jekyll indexer for ES, by Nick Zadrozny <nick@beyondthepath.com>, [9f4c3be](https://github.com/omc/searchyll/commit/9f4c3be)
@@ -24,7 +24,7 @@ begin
24
24
  # strip html
25
25
  nokogiri_doc = Nokogiri::HTML(page.output)
26
26
 
27
- puts %( indexing page #{page.url})
27
+ # puts %( indexing page #{page.url})
28
28
 
29
29
  indexer = indexers[page.site]
30
30
  indexer << page.data.merge({
@@ -39,7 +39,7 @@ begin
39
39
  # strip html
40
40
  nokogiri_doc = Nokogiri::HTML(document.output)
41
41
 
42
- puts %( indexing document #{document.url})
42
+ # puts %( indexing document #{document.url})
43
43
 
44
44
  indexer = indexers[document.site]
45
45
  indexer << document.data.merge({
@@ -8,8 +8,7 @@ module Searchyll
8
8
  # Determine a URL for the cluster, or fail with error
9
9
  def elasticsearch_url
10
10
  ENV['BONSAI_URL'] || ENV['ELASTICSEARCH_URL'] ||
11
- ((site.config||{})['elasticsearch']||{})['url'] ||
12
- raise(ArgumentError, "No Elasticsearch URL present, skipping indexing")
11
+ ((site.config||{})['elasticsearch']||{})['url'].to_s
13
12
  end
14
13
 
15
14
  # Getter for the number of primary shards
@@ -14,6 +14,12 @@ module Searchyll
14
14
  # Gather the configuration options
15
15
  configuration = Configuration.new(site)
16
16
 
17
+ # Don't do anything if the Elasticsearch URL is missing
18
+ if configuration.elasticsearch_url.empty?
19
+ puts "No Elasticsearch URL present, skipping indexing"
20
+ return
21
+ end
22
+
17
23
  # Prepare the indexer
18
24
  indexer = Searchyll::Indexer.new(configuration)
19
25
  indexer.start
@@ -3,33 +3,95 @@ require 'net/http'
3
3
 
4
4
  module Searchyll
5
5
  class Indexer
6
-
6
+ # Initial size of document batches to send to ES _bulk API
7
7
  BATCH_SIZE = 50
8
8
 
9
+ # Grow and shrink the batch size based on how long our bulk calls take
10
+ # relative to the tempo
11
+ BATCH_RESIZE_FACTOR = 1.2
12
+
13
+ # Requests per minute for updates to ES
14
+ TEMPO = 94
15
+
16
+ attr_accessor :batch_size
9
17
  attr_accessor :configuration
10
18
  attr_accessor :indexer_thread
11
- attr_accessor :old_indices
12
19
  attr_accessor :queue
13
20
  attr_accessor :timestamp
14
21
  attr_accessor :uri
15
22
  attr_accessor :working
16
23
 
24
+ # Initialize a basic indexer, with a Jekyll site configuration, waiting
25
+ # to be supplied with documents for indexing.
17
26
  def initialize(configuration)
18
27
  self.configuration = configuration
19
- self.uri = URI(configuration.elasticsearch_url)
20
- self.queue = Queue.new
21
- self.working = true
22
- self.timestamp = Time.now
28
+ self.uri = URI(configuration.elasticsearch_url)
29
+ self.queue = Queue.new
30
+ self.working = true
31
+ self.timestamp = Time.now
32
+ self.batch_size = BATCH_SIZE
23
33
  end
24
34
 
25
35
  # Public: Add new documents for batch indexing.
26
36
  def <<(doc)
27
- self.queue << doc
37
+ queue << doc
38
+ end
39
+
40
+ # Public: start the indexer and wait for documents to index.
41
+ def start
42
+ prepare_index
43
+
44
+ self.indexer_thread = Thread.new do
45
+ http_start do |http|
46
+ indexer_loop(http)
47
+ end
48
+ end
49
+ end
50
+
51
+ # Public: Indicate to the indexer that no new documents are being added.
52
+ def finish
53
+ self.working = false
54
+ indexer_thread.join
55
+ finalize!
56
+ end
57
+
58
+ private
59
+
60
+ def indexer_loop(http)
61
+ tempo_loop do
62
+ break unless working?
63
+ es_bulk_insert!(http, current_batch)
64
+ end
65
+ end
66
+
67
+ # Run a loop in the tempo specified by TEMPO.
68
+ def tempo_loop
69
+ loop do
70
+ t = Time.now
71
+
72
+ # Perform the work required
73
+ yield
74
+
75
+ # Adjust the batch size
76
+ if (Time.now - t) / (60.0 / TEMPO) < 0.5
77
+ self.batch_size = (batch_size * BATCH_RESIZE_FACTOR).round
78
+ puts "Increased batch to #{batch_size}"
79
+ elsif (Time.now - t) / (60.0 / TEMPO) > 0.9
80
+ self.batch_size = (batch_size / BATCH_RESIZE_FACTOR).round
81
+ puts "Decreased batch to #{batch_size}"
82
+ end
83
+
84
+ # Tight loop to sleep through any remaining time in the tempo
85
+ while (60.0 / TEMPO) - (Time.now - t) > 0
86
+ sleep [0.1, (60.0 / TEMPO) - (Time.now - t)].min
87
+ break unless working?
88
+ end
89
+ end
28
90
  end
29
91
 
30
92
  # Signal a stop condition for our batch indexing thread.
31
93
  def working?
32
- working || queue.length > 0
94
+ working || !queue.empty?
33
95
  end
34
96
 
35
97
  # A versioned index name, based on the time of the indexing run.
@@ -39,10 +101,10 @@ module Searchyll
39
101
  end
40
102
 
41
103
  # Prepare an HTTP connection
42
- def http_start(&block)
104
+ def http_start
43
105
  http = Net::HTTP.start(
44
106
  uri.hostname, uri.port,
45
- :use_ssl => (uri.scheme == 'https')
107
+ use_ssl: (uri.scheme == 'https')
46
108
  )
47
109
  yield(http)
48
110
  end
@@ -59,24 +121,10 @@ module Searchyll
59
121
  }.to_json # TODO: index settings
60
122
 
61
123
  http_start do |http|
62
- resp = http.request(create_index)
124
+ http.request(create_index)
63
125
  end
64
126
 
65
- # todo: mapping?
66
- end
67
-
68
- # Public: start the indexer and wait for documents to index.
69
- def start
70
- prepare_index
71
-
72
- self.indexer_thread = Thread.new do
73
- http_start do |http|
74
- loop do
75
- break unless working?
76
- es_bulk_insert!(http, current_batch)
77
- end
78
- end
79
- end
127
+ # TODO: mapping?
80
128
  end
81
129
 
82
130
  def http_put(path)
@@ -99,7 +147,8 @@ module Searchyll
99
147
  req = klass.new(path)
100
148
  req.content_type = 'application/json'
101
149
  req['Accept'] = 'application/json'
102
- req.basic_auth(uri.user, uri.password)
150
+ # Append auth credentials if the exist
151
+ req.basic_auth(uri.user, uri.password) if uri.user && uri.password
103
152
  req
104
153
  end
105
154
 
@@ -109,7 +158,7 @@ module Searchyll
109
158
  def es_bulk_insert!(http, batch)
110
159
  bulk_insert = http_post("/#{elasticsearch_index_name}/#{configuration.elasticsearch_default_type}/_bulk")
111
160
  bulk_insert.body = batch.map do |doc|
112
- [ { :index => {} }.to_json, doc.to_json ].join("\n")
161
+ [{ index: {} }.to_json, doc.to_json].join("\n")
113
162
  end.join("\n") + "\n"
114
163
  http.request(bulk_insert)
115
164
  end
@@ -119,61 +168,77 @@ module Searchyll
119
168
  def current_batch
120
169
  count = 0
121
170
  batch = []
122
- while count < BATCH_SIZE && queue.length > 0
171
+ while count < batch_size && !queue.empty?
123
172
  batch << queue.pop
124
173
  count += 1
125
174
  end
126
175
  batch
127
176
  end
128
177
 
129
- # Public: Indicate to the indexer that no new documents are being added.
130
- def finish
131
- self.working = false
132
- indexer_thread.join
133
- finalize!
134
- end
135
-
178
+ # List the indices currently in the cluster, caching the call in an ivar
136
179
  def old_indices
137
- resp = http_start { |h| h.request(http_get("/_cat/indices?h=index")) }
138
- indices = JSON.parse(resp.body).map{|i|i['index']}
139
- indices = indices.select{|i| i =~ /\A#{configuration.elasticsearch_index_base_name}/ }
140
- indices = indices - [ elasticsearch_index_name ]
141
- self.old_indices = indices
180
+ # return if defined?(@old_indices)
181
+ resp = http_start { |h| h.request(http_get('/_cat/indices?h=index')) }
182
+ indices = JSON.parse(resp.body).map { |i| i['index'] }
183
+ indices = indices.select { |i| i =~ /\A#{configuration.elasticsearch_index_base_name}/ }
184
+ indices -= [elasticsearch_index_name]
185
+ # @old_indices = indices
186
+ indices
142
187
  end
143
188
 
144
189
  # Once documents are done being indexed, finalize the process by adding
145
190
  # the new index into an alias for searching.
146
191
  def finalize!
147
- # refresh the index to make it searchable
192
+ # run the prepared requests
193
+ http_start do |http|
194
+ finalize_refresh(http)
195
+ finalize_replication(http)
196
+ finalize_aliases(http)
197
+ finalize_cleanup(http)
198
+ end
199
+ end
200
+
201
+ # refresh the index to make it searchable
202
+ def finalize_refresh(http)
148
203
  refresh = http_post("/#{elasticsearch_index_name}/_refresh")
204
+ http.request(refresh)
205
+ end
149
206
 
150
- # add replication to the new index
207
+ # add replication to the new index
208
+ def finalize_replication(http)
151
209
  add_replication = http_put("/#{elasticsearch_index_name}/_settings")
152
- add_replication.body = { index: { number_of_replicas: configuration.elasticsearch_number_of_replicas }}.to_json
210
+ add_replication.body = {
211
+ index: {
212
+ number_of_replicas: configuration.elasticsearch_number_of_replicas
213
+ }
214
+ }.to_json
215
+ http.request(add_replication)
216
+ end
153
217
 
154
- # hot swap the index into the canonical alias
155
- update_aliases = http_post("/_aliases")
218
+ # hot swap the index into the canonical alias
219
+ def finalize_aliases(http)
220
+ update_aliases = http_post('/_aliases')
156
221
  update_aliases.body = {
157
- "actions": [
158
- { "remove": { "index": old_indices.join(','), "alias": configuration.elasticsearch_index_base_name }},
159
- { "add": { "index": elasticsearch_index_name, "alias": configuration.elasticsearch_index_base_name }}
222
+ actions: [
223
+ { remove: {
224
+ index: old_indices.join(','),
225
+ alias: configuration.elasticsearch_index_base_name
226
+ } },
227
+ { add: {
228
+ index: elasticsearch_index_name,
229
+ alias: configuration.elasticsearch_index_base_name
230
+ } }
160
231
  ]
161
232
  }.to_json
233
+ http.request(update_aliases)
234
+ end
162
235
 
163
- # delete old indices
236
+ # delete old indices after a successful reindexing run
237
+ def finalize_cleanup(http)
238
+ return if old_indices.nil? || old_indices.empty?
164
239
  cleanup_indices = http_delete("/#{old_indices.join(',')}")
165
240
  puts %( Old indices: #{old_indices.join(', ')})
166
-
167
- # run the prepared requests
168
- http_start do |http|
169
- http.request(refresh)
170
- http.request(add_replication)
171
- http.request(update_aliases)
172
- if !old_indices.empty?
173
- http.request(cleanup_indices)
174
- end
175
- end
241
+ http.request(cleanup_indices)
176
242
  end
177
-
178
243
  end
179
244
  end
@@ -1,3 +1,3 @@
1
1
  module Searchyll
2
- VERSION = "0.10.0"
2
+ VERSION = "0.10.2"
3
3
  end
@@ -24,7 +24,6 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "guard-rspec"
25
25
 
26
26
  spec.add_dependency "jekyll", ">= 3.0"
27
- spec.add_dependency "elasticsearch-ruby"
28
27
  spec.add_dependency "nokogiri"
29
28
 
30
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: searchyll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.10.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Zadrozny
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2017-06-07 00:00:00.000000000 Z
13
+ date: 2018-06-30 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -82,20 +82,6 @@ dependencies:
82
82
  - - ">="
83
83
  - !ruby/object:Gem::Version
84
84
  version: '3.0'
85
- - !ruby/object:Gem::Dependency
86
- name: elasticsearch-ruby
87
- requirement: !ruby/object:Gem::Requirement
88
- requirements:
89
- - - ">="
90
- - !ruby/object:Gem::Version
91
- version: '0'
92
- type: :runtime
93
- prerelease: false
94
- version_requirements: !ruby/object:Gem::Requirement
95
- requirements:
96
- - - ">="
97
- - !ruby/object:Gem::Version
98
- version: '0'
99
85
  - !ruby/object:Gem::Dependency
100
86
  name: nokogiri
101
87
  requirement: !ruby/object:Gem::Requirement
@@ -121,7 +107,10 @@ extra_rdoc_files: []
121
107
  files:
122
108
  - ".gitignore"
123
109
  - ".rspec"
110
+ - ".rubocop.yml"
111
+ - ".rubocop_todo.yml"
124
112
  - ".travis.yml"
113
+ - CHANGELOG.md
125
114
  - Gemfile
126
115
  - Guardfile
127
116
  - LICENSE
@@ -155,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
144
  version: '0'
156
145
  requirements: []
157
146
  rubyforge_project:
158
- rubygems_version: 2.5.1
147
+ rubygems_version: 2.6.14
159
148
  signing_key:
160
149
  specification_version: 4
161
150
  summary: A gem to index your Jekyll pages into Elasticsearch.