searchyll 0.10.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d90b594ea9e60eb7814951eb5dbd3c5cb4940c1
4
- data.tar.gz: 9aefa9b3395bf9164930749a55a86131f4ae4cf0
3
+ metadata.gz: 3adc41e5707f84efa1fcadb911c1fefb96849a23
4
+ data.tar.gz: f311750a8fbf77c24e6e32946da3540550f80a1a
5
5
  SHA512:
6
- metadata.gz: 4411dd6cba9f869a4dd0f485fdafd18f6fca7ca55e3d93c472a93acc903f9fdd10e3dc58dc187d8760372cf7682d2efaa495ac172081d5c1311db2e2e5c60a66
7
- data.tar.gz: eef95afba94a4177bfe5c19c9c37adb97cca12d755ebf83f6a9b742c5ade8f9353a1e200c589383ead6d7862d6da293e157465f478362a9b576e5e0ae8e3e377
6
+ metadata.gz: 9fc8750c7e1768fcdd80aa658edfcb2371d13750989db37f43dccd9456b2567354bc954b267d327be8e2b3e54ad20be36dfd90314d024a46582ab32df2bb585f
7
+ data.tar.gz: 844f042d27f019131836c72672a2452555140dbfc821ed4385961b6043c6572c95b88820515e3106adf0d62e0de81340017a17e6c270df6baff3069e3922311c
@@ -0,0 +1 @@
1
+ inherit_from: .rubocop_todo.yml
@@ -0,0 +1,231 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2018-06-30 11:54:09 -0500 using RuboCop version 0.56.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 2
10
+ # Cop supports --auto-correct.
11
+ # Configuration parameters: Include, TreatCommentsAsGroupSeparators.
12
+ # Include: **/*.gemspec
13
+ Gemspec/OrderedDependencies:
14
+ Exclude:
15
+ - 'searchyll.gemspec'
16
+
17
+ # Offense count: 1
18
+ # Cop supports --auto-correct.
19
+ Layout/EmptyLineAfterMagicComment:
20
+ Exclude:
21
+ - 'searchyll.gemspec'
22
+
23
+ # Offense count: 8
24
+ # Cop supports --auto-correct.
25
+ # Configuration parameters: EnforcedStyle.
26
+ # SupportedStyles: empty_lines, no_empty_lines
27
+ Layout/EmptyLinesAroundBlockBody:
28
+ Exclude:
29
+ - 'Guardfile'
30
+ - 'searchyll.gemspec'
31
+ - 'spec/searchyll/generator_spec.rb'
32
+ - 'spec/searchyll/indexer_spec.rb'
33
+ - 'spec/searchyll_spec.rb'
34
+
35
+ # Offense count: 2
36
+ # Cop supports --auto-correct.
37
+ # Configuration parameters: EnforcedStyle.
38
+ # SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines, beginning_only, ending_only
39
+ Layout/EmptyLinesAroundClassBody:
40
+ Exclude:
41
+ - 'lib/searchyll/generator.rb'
42
+
43
+ # Offense count: 1
44
+ # Cop supports --auto-correct.
45
+ Layout/EmptyLinesAroundExceptionHandlingKeywords:
46
+ Exclude:
47
+ - 'lib/searchyll.rb'
48
+
49
+ # Offense count: 1
50
+ # Cop supports --auto-correct.
51
+ Layout/EmptyLinesAroundMethodBody:
52
+ Exclude:
53
+ - 'lib/searchyll/generator.rb'
54
+
55
+ # Offense count: 2
56
+ # Cop supports --auto-correct.
57
+ # Configuration parameters: EnforcedStyle.
58
+ # SupportedStyles: empty_lines, empty_lines_except_namespace, empty_lines_special, no_empty_lines
59
+ Layout/EmptyLinesAroundModuleBody:
60
+ Exclude:
61
+ - 'lib/searchyll/generator.rb'
62
+
63
+ # Offense count: 4
64
+ # Cop supports --auto-correct.
65
+ # Configuration parameters: IndentationWidth.
66
+ # SupportedStyles: special_inside_parentheses, consistent, align_braces
67
+ Layout/IndentHash:
68
+ EnforcedStyle: consistent
69
+
70
+ # Offense count: 1
71
+ # Cop supports --auto-correct.
72
+ Layout/SpaceAfterSemicolon:
73
+ Exclude:
74
+ - 'lib/searchyll/generator.rb'
75
+
76
+ # Offense count: 2
77
+ # Cop supports --auto-correct.
78
+ # Configuration parameters: AllowForAlignment.
79
+ Layout/SpaceAroundOperators:
80
+ Exclude:
81
+ - 'lib/searchyll/configuration.rb'
82
+
83
+ # Offense count: 1
84
+ # Cop supports --auto-correct.
85
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
86
+ # SupportedStyles: space, no_space
87
+ # SupportedStylesForEmptyBraces: space, no_space
88
+ Layout/SpaceBeforeBlockBraces:
89
+ Exclude:
90
+ - 'lib/searchyll/generator.rb'
91
+
92
+ # Offense count: 3
93
+ # Cop supports --auto-correct.
94
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBrackets.
95
+ # SupportedStyles: space, no_space, compact
96
+ # SupportedStylesForEmptyBrackets: space, no_space
97
+ Layout/SpaceInsideArrayLiteralBrackets:
98
+ Exclude:
99
+ - 'searchyll.gemspec'
100
+
101
+ # Offense count: 2
102
+ # Cop supports --auto-correct.
103
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters.
104
+ # SupportedStyles: space, no_space
105
+ # SupportedStylesForEmptyBraces: space, no_space
106
+ Layout/SpaceInsideBlockBraces:
107
+ Exclude:
108
+ - 'lib/searchyll/generator.rb'
109
+
110
+ # Offense count: 1
111
+ # Cop supports --auto-correct.
112
+ # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces.
113
+ # SupportedStyles: space, no_space, compact
114
+ # SupportedStylesForEmptyBraces: space, no_space
115
+ Layout/SpaceInsideHashLiteralBraces:
116
+ Exclude:
117
+ - 'spec/searchyll/generator_spec.rb'
118
+
119
+ # Offense count: 1
120
+ # Cop supports --auto-correct.
121
+ Lint/LiteralInInterpolation:
122
+ Exclude:
123
+ - 'lib/searchyll/generator.rb'
124
+
125
+ # Offense count: 1
126
+ Metrics/AbcSize:
127
+ Max: 20
128
+
129
+ # Offense count: 1
130
+ # Configuration parameters: CountComments.
131
+ Metrics/MethodLength:
132
+ Max: 15
133
+
134
+ # Offense count: 2
135
+ # Cop supports --auto-correct.
136
+ # Configuration parameters: EnforcedStyle.
137
+ # SupportedStyles: braces, no_braces, context_dependent
138
+ Style/BracesAroundHashParameters:
139
+ Exclude:
140
+ - 'lib/searchyll.rb'
141
+
142
+ # Offense count: 2
143
+ Style/Documentation:
144
+ Exclude:
145
+ - 'spec/**/*'
146
+ - 'test/**/*'
147
+ - 'lib/searchyll/configuration.rb'
148
+ - 'lib/searchyll/generator.rb'
149
+
150
+ # Offense count: 1
151
+ # Cop supports --auto-correct.
152
+ Style/Encoding:
153
+ Exclude:
154
+ - 'searchyll.gemspec'
155
+
156
+ # Offense count: 2
157
+ # Cop supports --auto-correct.
158
+ Style/ExpandPathArguments:
159
+ Exclude:
160
+ - 'searchyll.gemspec'
161
+ - 'spec/spec_helper.rb'
162
+
163
+ # Offense count: 1
164
+ # Cop supports --auto-correct.
165
+ # Configuration parameters: EnforcedStyle, UseHashRocketsWithSymbolValues, PreferHashRocketsForNonAlnumEndingSymbols.
166
+ # SupportedStyles: ruby19, hash_rockets, no_mixed_keys, ruby19_no_mixed_keys
167
+ Style/HashSyntax:
168
+ Exclude:
169
+ - 'Rakefile'
170
+
171
+ # Offense count: 1
172
+ # Cop supports --auto-correct.
173
+ Style/MutableConstant:
174
+ Exclude:
175
+ - 'lib/searchyll/version.rb'
176
+
177
+ # Offense count: 1
178
+ # Cop supports --auto-correct.
179
+ # Configuration parameters: PreferredDelimiters.
180
+ Style/PercentLiteralDelimiters:
181
+ Exclude:
182
+ - 'searchyll.gemspec'
183
+
184
+ # Offense count: 2
185
+ # Cop supports --auto-correct.
186
+ # Configuration parameters: EnforcedStyle.
187
+ # SupportedStyles: implicit, explicit
188
+ Style/RescueStandardError:
189
+ Exclude:
190
+ - 'lib/searchyll.rb'
191
+ - 'lib/searchyll/generator.rb'
192
+
193
+ # Offense count: 1
194
+ # Cop supports --auto-correct.
195
+ # Configuration parameters: AllowAsExpressionSeparator.
196
+ Style/Semicolon:
197
+ Exclude:
198
+ - 'lib/searchyll/generator.rb'
199
+
200
+ # Offense count: 2
201
+ # Cop supports --auto-correct.
202
+ Style/StderrPuts:
203
+ Exclude:
204
+ - 'lib/searchyll/generator.rb'
205
+
206
+ # Offense count: 42
207
+ # Cop supports --auto-correct.
208
+ # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
209
+ # SupportedStyles: single_quotes, double_quotes
210
+ Style/StringLiterals:
211
+ Exclude:
212
+ - 'Guardfile'
213
+ - 'Rakefile'
214
+ - 'bin/console'
215
+ - 'lib/searchyll.rb'
216
+ - 'lib/searchyll/configuration.rb'
217
+ - 'lib/searchyll/generator.rb'
218
+ - 'lib/searchyll/version.rb'
219
+ - 'searchyll.gemspec'
220
+
221
+ # Offense count: 1
222
+ # Cop supports --auto-correct.
223
+ Style/UnneededPercentQ:
224
+ Exclude:
225
+ - 'searchyll.gemspec'
226
+
227
+ # Offense count: 2
228
+ # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
229
+ # URISchemes: http, https
230
+ Metrics/LineLength:
231
+ Max: 120
@@ -0,0 +1,62 @@
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
5
+ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
6
+
7
+
8
+
9
+ ## v0.10.2 - 2018-06-30
10
+ ### Added
11
+ - Version bump: v0.10.2., by Allison Zadrozny <allison@zadrozny.com>, [7930a3e](https://github.com/omc/searchyll/commit/7930a3e)
12
+ - Add & backfill the Changelog., by Allison Zadrozny <allison@zadrozny.com>, [cafe45c](https://github.com/omc/searchyll/commit/cafe45c)
13
+ - Merge pull request #30 from omc/allizad/remove-elasticsearch-ruby, by Allison Zadrozny <allison@zadrozny.com>, [e05d874](https://github.com/omc/searchyll/commit/e05d874)
14
+ - Remove unecessary elasticsearch-ruby dependency., by Allison Zadrozny <allison@zadrozny.com>, [354b58b](https://github.com/omc/searchyll/commit/354b58b)
15
+ - Merge pull request #29 from omc/nz/reindex-cadence, by Allison Zadrozny <allison@zadrozny.com>, [9de8214](https://github.com/omc/searchyll/commit/9de8214)
16
+ - reindex with a cadence and a dynamic batch size, by Nick Zadrozny <nick@beyondthepath.com>, [a4aa544](https://github.com/omc/searchyll/commit/a4aa544)
17
+ - Rubocop and some light refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [150ea0d](https://github.com/omc/searchyll/commit/150ea0d)
18
+ - Merge pull request #28 from omc/robsears-patch-2, by Allison Zadrozny <allison@zadrozny.com>, [ffd360c](https://github.com/omc/searchyll/commit/ffd360c)
19
+ - Increment the version, by Rob Sears <secure@robsears.com>, [c4ee100](https://github.com/omc/searchyll/commit/c4ee100)
20
+ - Don't break site generation if the Elasticsearch URL is missing, by Rob Sears <secure@robsears.com>, [9ddd016](https://github.com/omc/searchyll/commit/9ddd016)
21
+ - Merge pull request #26 from omc/robsears-patch-1, by Allison Zadrozny <allison@zadrozny.com>, [bce8974](https://github.com/omc/searchyll/commit/bce8974)
22
+ - Fix a silly spelling error., by Rob <rc.sears@gmail.com>, [b280436](https://github.com/omc/searchyll/commit/b280436)
23
+ - Auth should be optional, by Rob <rc.sears@gmail.com>, [d7d694e](https://github.com/omc/searchyll/commit/d7d694e)
24
+
25
+ ## v0.10.0 - 2018-06-07
26
+ ### Added
27
+ - version bump to 0.10.0, by Nick Zadrozny <nick@beyondthepath.com>, [097ddb9](https://github.com/omc/searchyll/commit/097ddb9)
28
+ - Merge pull request #24 from omc/collections, by Dru Sellers <dru@drusellers.com>, [7d1f044](https://github.com/omc/searchyll/commit/7d1f044)
29
+ - Index collections as well as posts, by Dru Sellers <dru@drusellers.com>, [2cdaaa8](https://github.com/omc/searchyll/commit/2cdaaa8)
30
+ - Merge pull request #18 from omc/v5-accept-json, by Allison Zadrozny <allison@zadrozny.com>, [ee7e309](https://github.com/omc/searchyll/commit/ee7e309)
31
+ - Merge pull request #19 from matthewdu/fix-index-creation, by Allison Zadrozny <allison@zadrozny.com>, [0d34bb9](https://github.com/omc/searchyll/commit/0d34bb9)
32
+ - Use PUT instead of POST, by Matthew Du <du.matthew@gmail.com>, [b905d73](https://github.com/omc/searchyll/commit/b905d73)
33
+ - provide an Accept header with all requests, for v5 compat, by Nick Zadrozny <nick@beyondthepath.com>, [2f28fae](https://github.com/omc/searchyll/commit/2f28fae)
34
+ - Merge pull request #16 from omc/add-print-statement, by Allison Zadrozny <allison@zadrozny.com>, [e236c54](https://github.com/omc/searchyll/commit/e236c54)
35
+ - Skip index deletion if there are no old indices, by Allison Zadrozny <allison@zadrozny.com>, [b33546e](https://github.com/omc/searchyll/commit/b33546e)
36
+ - Add print statement to indexer, by Allison Zadrozny <allison@zadrozny.com>, [9af6f13](https://github.com/omc/searchyll/commit/9af6f13)
37
+
38
+ ## v0.9.0 - 2018-05-07
39
+ ### Added
40
+ - license, by Nick Zadrozny <nick@beyondthepath.com>, [b221803](https://github.com/omc/searchyll/commit/b221803)
41
+ - Ignore the packaged gem, by Nick Zadrozny <nick@beyondthepath.com>, [04a9764](https://github.com/omc/searchyll/commit/04a9764)
42
+ - GPLv3 license, by Nick Zadrozny <nick@beyondthepath.com>, [150f6ed](https://github.com/omc/searchyll/commit/150f6ed)
43
+ - Update gemspec, readme and version for first publish of the gem, by Nick Zadrozny <nick@beyondthepath.com>, [e30ba44](https://github.com/omc/searchyll/commit/e30ba44)
44
+ - Change version number, by Allison Zadrozny <allison@zadrozny.com>, [9cf886e](https://github.com/omc/searchyll/commit/9cf886e)
45
+ - Make searchyll into a hook, by Allison Zadrozny <allison@zadrozny.com>, [46d0232](https://github.com/omc/searchyll/commit/46d0232)
46
+ - Merge pull request #12 from nz/rename-searchyll, by Allison Zadrozny <allison@zadrozny.com>, [d6270d4](https://github.com/omc/searchyll/commit/d6270d4)
47
+ - Change Searchyou to Searchyll, by Rob Sears <secure@robsears.com>, [72466b9](https://github.com/omc/searchyll/commit/72466b9)
48
+ - Merge pull request #11 from nz/fresh-configs, by Nick Zadrozny <nick@onemorecloud.com>, [26d9cf5](https://github.com/omc/searchyll/commit/26d9cf5)
49
+ - Move configuration into its own file, by Rob Sears <secure@robsears.com>, [a008f11](https://github.com/omc/searchyll/commit/a008f11)
50
+ - Move the config options around, by Rob Sears <secure@robsears.com>, [485495c](https://github.com/omc/searchyll/commit/485495c)
51
+ - Implement notes from Nick, by Rob Sears <secure@robsears.com>, [0951991](https://github.com/omc/searchyll/commit/0951991)
52
+ - New configuration settings are in a Configuration class, additional documentation included, by Rob Sears <secure@robsears.com>, [4c00cc3](https://github.com/omc/searchyll/commit/4c00cc3)
53
+ - better index cleanup with more precise enumeration of old indices, by Nick Zadrozny <nick@beyondthepath.com>, [1d6d807](https://github.com/omc/searchyll/commit/1d6d807)
54
+ - Merge pull request #2 from allizad/edit-indexer0-content, by Nick Zadrozny <nick@onemorecloud.com>, [9149cdf](https://github.com/omc/searchyll/commit/9149cdf)
55
+ - Update generator.rb, by Allison Zadrozny <allison@zadrozny.com>, [76b650d](https://github.com/omc/searchyll/commit/76b650d)
56
+ - Edit indexer content, by Allison Zadrozny <allison@zadrozny.com>, [157f981](https://github.com/omc/searchyll/commit/157f981)
57
+ - more helper methods and cleanup, by Nick Zadrozny <nick@beyondthepath.com>, [e0913ad](https://github.com/omc/searchyll/commit/e0913ad)
58
+ - get the code pretty much to working, by Nick Zadrozny <nick@beyondthepath.com>, [5550a28](https://github.com/omc/searchyll/commit/5550a28)
59
+ - add some comments, by Nick Zadrozny <nick@beyondthepath.com>, [0be5381](https://github.com/omc/searchyll/commit/0be5381)
60
+ - make it run, by Nick Zadrozny <nick@beyondthepath.com>, [afa05a7](https://github.com/omc/searchyll/commit/afa05a7)
61
+ - start testing! with some refactoring, by Nick Zadrozny <nick@beyondthepath.com>, [8e63e9f](https://github.com/omc/searchyll/commit/8e63e9f)
62
+ - sketching a jekyll indexer for ES, by Nick Zadrozny <nick@beyondthepath.com>, [9f4c3be](https://github.com/omc/searchyll/commit/9f4c3be)
@@ -24,7 +24,7 @@ begin
24
24
  # strip html
25
25
  nokogiri_doc = Nokogiri::HTML(page.output)
26
26
 
27
- puts %( indexing page #{page.url})
27
+ # puts %( indexing page #{page.url})
28
28
 
29
29
  indexer = indexers[page.site]
30
30
  indexer << page.data.merge({
@@ -39,7 +39,7 @@ begin
39
39
  # strip html
40
40
  nokogiri_doc = Nokogiri::HTML(document.output)
41
41
 
42
- puts %( indexing document #{document.url})
42
+ # puts %( indexing document #{document.url})
43
43
 
44
44
  indexer = indexers[document.site]
45
45
  indexer << document.data.merge({
@@ -8,8 +8,7 @@ module Searchyll
8
8
  # Determine a URL for the cluster, or fail with error
9
9
  def elasticsearch_url
10
10
  ENV['BONSAI_URL'] || ENV['ELASTICSEARCH_URL'] ||
11
- ((site.config||{})['elasticsearch']||{})['url'] ||
12
- raise(ArgumentError, "No Elasticsearch URL present, skipping indexing")
11
+ ((site.config||{})['elasticsearch']||{})['url'].to_s
13
12
  end
14
13
 
15
14
  # Getter for the number of primary shards
@@ -14,6 +14,12 @@ module Searchyll
14
14
  # Gather the configuration options
15
15
  configuration = Configuration.new(site)
16
16
 
17
+ # Don't do anything if the Elasticsearch URL is missing
18
+ if configuration.elasticsearch_url.empty?
19
+ puts "No Elasticsearch URL present, skipping indexing"
20
+ return
21
+ end
22
+
17
23
  # Prepare the indexer
18
24
  indexer = Searchyll::Indexer.new(configuration)
19
25
  indexer.start
@@ -3,33 +3,95 @@ require 'net/http'
3
3
 
4
4
  module Searchyll
5
5
  class Indexer
6
-
6
+ # Initial size of document batches to send to ES _bulk API
7
7
  BATCH_SIZE = 50
8
8
 
9
+ # Grow and shrink the batch size based on how long our bulk calls take
10
+ # relative to the tempo
11
+ BATCH_RESIZE_FACTOR = 1.2
12
+
13
+ # Requests per minute for updates to ES
14
+ TEMPO = 94
15
+
16
+ attr_accessor :batch_size
9
17
  attr_accessor :configuration
10
18
  attr_accessor :indexer_thread
11
- attr_accessor :old_indices
12
19
  attr_accessor :queue
13
20
  attr_accessor :timestamp
14
21
  attr_accessor :uri
15
22
  attr_accessor :working
16
23
 
24
+ # Initialize a basic indexer, with a Jekyll site configuration, waiting
25
+ # to be supplied with documents for indexing.
17
26
  def initialize(configuration)
18
27
  self.configuration = configuration
19
- self.uri = URI(configuration.elasticsearch_url)
20
- self.queue = Queue.new
21
- self.working = true
22
- self.timestamp = Time.now
28
+ self.uri = URI(configuration.elasticsearch_url)
29
+ self.queue = Queue.new
30
+ self.working = true
31
+ self.timestamp = Time.now
32
+ self.batch_size = BATCH_SIZE
23
33
  end
24
34
 
25
35
  # Public: Add new documents for batch indexing.
26
36
  def <<(doc)
27
- self.queue << doc
37
+ queue << doc
38
+ end
39
+
40
+ # Public: start the indexer and wait for documents to index.
41
+ def start
42
+ prepare_index
43
+
44
+ self.indexer_thread = Thread.new do
45
+ http_start do |http|
46
+ indexer_loop(http)
47
+ end
48
+ end
49
+ end
50
+
51
+ # Public: Indicate to the indexer that no new documents are being added.
52
+ def finish
53
+ self.working = false
54
+ indexer_thread.join
55
+ finalize!
56
+ end
57
+
58
+ private
59
+
60
+ def indexer_loop(http)
61
+ tempo_loop do
62
+ break unless working?
63
+ es_bulk_insert!(http, current_batch)
64
+ end
65
+ end
66
+
67
+ # Run a loop in the tempo specified by TEMPO.
68
+ def tempo_loop
69
+ loop do
70
+ t = Time.now
71
+
72
+ # Perform the work required
73
+ yield
74
+
75
+ # Adjust the batch size
76
+ if (Time.now - t) / (60.0 / TEMPO) < 0.5
77
+ self.batch_size = (batch_size * BATCH_RESIZE_FACTOR).round
78
+ puts "Increased batch to #{batch_size}"
79
+ elsif (Time.now - t) / (60.0 / TEMPO) > 0.9
80
+ self.batch_size = (batch_size / BATCH_RESIZE_FACTOR).round
81
+ puts "Decreased batch to #{batch_size}"
82
+ end
83
+
84
+ # Tight loop to sleep through any remaining time in the tempo
85
+ while (60.0 / TEMPO) - (Time.now - t) > 0
86
+ sleep [0.1, (60.0 / TEMPO) - (Time.now - t)].min
87
+ break unless working?
88
+ end
89
+ end
28
90
  end
29
91
 
30
92
  # Signal a stop condition for our batch indexing thread.
31
93
  def working?
32
- working || queue.length > 0
94
+ working || !queue.empty?
33
95
  end
34
96
 
35
97
  # A versioned index name, based on the time of the indexing run.
@@ -39,10 +101,10 @@ module Searchyll
39
101
  end
40
102
 
41
103
  # Prepare an HTTP connection
42
- def http_start(&block)
104
+ def http_start
43
105
  http = Net::HTTP.start(
44
106
  uri.hostname, uri.port,
45
- :use_ssl => (uri.scheme == 'https')
107
+ use_ssl: (uri.scheme == 'https')
46
108
  )
47
109
  yield(http)
48
110
  end
@@ -59,24 +121,10 @@ module Searchyll
59
121
  }.to_json # TODO: index settings
60
122
 
61
123
  http_start do |http|
62
- resp = http.request(create_index)
124
+ http.request(create_index)
63
125
  end
64
126
 
65
- # todo: mapping?
66
- end
67
-
68
- # Public: start the indexer and wait for documents to index.
69
- def start
70
- prepare_index
71
-
72
- self.indexer_thread = Thread.new do
73
- http_start do |http|
74
- loop do
75
- break unless working?
76
- es_bulk_insert!(http, current_batch)
77
- end
78
- end
79
- end
127
+ # TODO: mapping?
80
128
  end
81
129
 
82
130
  def http_put(path)
@@ -99,7 +147,8 @@ module Searchyll
99
147
  req = klass.new(path)
100
148
  req.content_type = 'application/json'
101
149
  req['Accept'] = 'application/json'
102
- req.basic_auth(uri.user, uri.password)
150
+ # Append auth credentials if the exist
151
+ req.basic_auth(uri.user, uri.password) if uri.user && uri.password
103
152
  req
104
153
  end
105
154
 
@@ -109,7 +158,7 @@ module Searchyll
109
158
  def es_bulk_insert!(http, batch)
110
159
  bulk_insert = http_post("/#{elasticsearch_index_name}/#{configuration.elasticsearch_default_type}/_bulk")
111
160
  bulk_insert.body = batch.map do |doc|
112
- [ { :index => {} }.to_json, doc.to_json ].join("\n")
161
+ [{ index: {} }.to_json, doc.to_json].join("\n")
113
162
  end.join("\n") + "\n"
114
163
  http.request(bulk_insert)
115
164
  end
@@ -119,61 +168,77 @@ module Searchyll
119
168
  def current_batch
120
169
  count = 0
121
170
  batch = []
122
- while count < BATCH_SIZE && queue.length > 0
171
+ while count < batch_size && !queue.empty?
123
172
  batch << queue.pop
124
173
  count += 1
125
174
  end
126
175
  batch
127
176
  end
128
177
 
129
- # Public: Indicate to the indexer that no new documents are being added.
130
- def finish
131
- self.working = false
132
- indexer_thread.join
133
- finalize!
134
- end
135
-
178
+ # List the indices currently in the cluster, caching the call in an ivar
136
179
  def old_indices
137
- resp = http_start { |h| h.request(http_get("/_cat/indices?h=index")) }
138
- indices = JSON.parse(resp.body).map{|i|i['index']}
139
- indices = indices.select{|i| i =~ /\A#{configuration.elasticsearch_index_base_name}/ }
140
- indices = indices - [ elasticsearch_index_name ]
141
- self.old_indices = indices
180
+ # return if defined?(@old_indices)
181
+ resp = http_start { |h| h.request(http_get('/_cat/indices?h=index')) }
182
+ indices = JSON.parse(resp.body).map { |i| i['index'] }
183
+ indices = indices.select { |i| i =~ /\A#{configuration.elasticsearch_index_base_name}/ }
184
+ indices -= [elasticsearch_index_name]
185
+ # @old_indices = indices
186
+ indices
142
187
  end
143
188
 
144
189
  # Once documents are done being indexed, finalize the process by adding
145
190
  # the new index into an alias for searching.
146
191
  def finalize!
147
- # refresh the index to make it searchable
192
+ # run the prepared requests
193
+ http_start do |http|
194
+ finalize_refresh(http)
195
+ finalize_replication(http)
196
+ finalize_aliases(http)
197
+ finalize_cleanup(http)
198
+ end
199
+ end
200
+
201
+ # refresh the index to make it searchable
202
+ def finalize_refresh(http)
148
203
  refresh = http_post("/#{elasticsearch_index_name}/_refresh")
204
+ http.request(refresh)
205
+ end
149
206
 
150
- # add replication to the new index
207
+ # add replication to the new index
208
+ def finalize_replication(http)
151
209
  add_replication = http_put("/#{elasticsearch_index_name}/_settings")
152
- add_replication.body = { index: { number_of_replicas: configuration.elasticsearch_number_of_replicas }}.to_json
210
+ add_replication.body = {
211
+ index: {
212
+ number_of_replicas: configuration.elasticsearch_number_of_replicas
213
+ }
214
+ }.to_json
215
+ http.request(add_replication)
216
+ end
153
217
 
154
- # hot swap the index into the canonical alias
155
- update_aliases = http_post("/_aliases")
218
+ # hot swap the index into the canonical alias
219
+ def finalize_aliases(http)
220
+ update_aliases = http_post('/_aliases')
156
221
  update_aliases.body = {
157
- "actions": [
158
- { "remove": { "index": old_indices.join(','), "alias": configuration.elasticsearch_index_base_name }},
159
- { "add": { "index": elasticsearch_index_name, "alias": configuration.elasticsearch_index_base_name }}
222
+ actions: [
223
+ { remove: {
224
+ index: old_indices.join(','),
225
+ alias: configuration.elasticsearch_index_base_name
226
+ } },
227
+ { add: {
228
+ index: elasticsearch_index_name,
229
+ alias: configuration.elasticsearch_index_base_name
230
+ } }
160
231
  ]
161
232
  }.to_json
233
+ http.request(update_aliases)
234
+ end
162
235
 
163
- # delete old indices
236
+ # delete old indices after a successful reindexing run
237
+ def finalize_cleanup(http)
238
+ return if old_indices.nil? || old_indices.empty?
164
239
  cleanup_indices = http_delete("/#{old_indices.join(',')}")
165
240
  puts %( Old indices: #{old_indices.join(', ')})
166
-
167
- # run the prepared requests
168
- http_start do |http|
169
- http.request(refresh)
170
- http.request(add_replication)
171
- http.request(update_aliases)
172
- if !old_indices.empty?
173
- http.request(cleanup_indices)
174
- end
175
- end
241
+ http.request(cleanup_indices)
176
242
  end
177
-
178
243
  end
179
244
  end
@@ -1,3 +1,3 @@
1
1
  module Searchyll
2
- VERSION = "0.10.0"
2
+ VERSION = "0.10.2"
3
3
  end
@@ -24,7 +24,6 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "guard-rspec"
25
25
 
26
26
  spec.add_dependency "jekyll", ">= 3.0"
27
- spec.add_dependency "elasticsearch-ruby"
28
27
  spec.add_dependency "nokogiri"
29
28
 
30
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: searchyll
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.10.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Zadrozny
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2017-06-07 00:00:00.000000000 Z
13
+ date: 2018-06-30 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -82,20 +82,6 @@ dependencies:
82
82
  - - ">="
83
83
  - !ruby/object:Gem::Version
84
84
  version: '3.0'
85
- - !ruby/object:Gem::Dependency
86
- name: elasticsearch-ruby
87
- requirement: !ruby/object:Gem::Requirement
88
- requirements:
89
- - - ">="
90
- - !ruby/object:Gem::Version
91
- version: '0'
92
- type: :runtime
93
- prerelease: false
94
- version_requirements: !ruby/object:Gem::Requirement
95
- requirements:
96
- - - ">="
97
- - !ruby/object:Gem::Version
98
- version: '0'
99
85
  - !ruby/object:Gem::Dependency
100
86
  name: nokogiri
101
87
  requirement: !ruby/object:Gem::Requirement
@@ -121,7 +107,10 @@ extra_rdoc_files: []
121
107
  files:
122
108
  - ".gitignore"
123
109
  - ".rspec"
110
+ - ".rubocop.yml"
111
+ - ".rubocop_todo.yml"
124
112
  - ".travis.yml"
113
+ - CHANGELOG.md
125
114
  - Gemfile
126
115
  - Guardfile
127
116
  - LICENSE
@@ -155,7 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
144
  version: '0'
156
145
  requirements: []
157
146
  rubyforge_project:
158
- rubygems_version: 2.5.1
147
+ rubygems_version: 2.6.14
159
148
  signing_key:
160
149
  specification_version: 4
161
150
  summary: A gem to index your Jekyll pages into Elasticsearch.