zelastic 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b45ad7dd15f213cc571c3da4f021ff80129dcccb375ae98aedde7b22b759b05
4
- data.tar.gz: '08328c62f6485a0b11300dc9c651ca3e8c99179c84ea2c3f5235a921ca676d9a'
3
+ metadata.gz: 57c75cdfd5b9131b9836f4fe62975e634b19e6fdaf203c43dfb04fa47cfab19e
4
+ data.tar.gz: a45bbd2a537e3363e3ab77690b5ad60a059f6e2526f7ce7410242e93c6b266bb
5
5
  SHA512:
6
- metadata.gz: a3e38047bf9f47bc8e7353f8e52387a7bd6722e291f96291d09a50257cd28a43cf7fba2abcd9681abb3797aa5acce67f38b09cabb6ecbaf8e4638b2a7e6e21b6
7
- data.tar.gz: 94572581d7fa5f6c6de6485cacd9232a310418379f981d12d0523183f6922f5bcbbeb290605e4452ccb51603f702aa5bf52a9540ed681002a1b2e389a2dff670
6
+ metadata.gz: b2090c1670134753ac7f3cab263c36d795f3b409b7e4a0d74cf9d27a57d1a80250ce7f2ec47dcacae564485a7645775010d57a55cd5ee61216af7584865b9b45
7
+ data.tar.gz: 05f156758b28a8c5198282567490ccfc93d3c499f785dd2eaf654b5243a3cada6e2f5362e7842bc9f32bda084f5d43a4fb775ecc91f90e3bbdbf8fab0680153b
@@ -0,0 +1,45 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+ branches: [main, master]
8
+
9
+ jobs:
10
+ test:
11
+ name: ES ${{ matrix.elasticsearch }} / Gem ${{ matrix.elasticsearch_gem }}
12
+ runs-on: ubuntu-latest
13
+
14
+ strategy:
15
+ fail-fast: false
16
+ matrix:
17
+ elasticsearch: ["8.19.15"]
18
+ elasticsearch_gem: ["~> 8", "~> 7"]
19
+ include:
20
+ - elasticsearch: "7.17.28"
21
+ elasticsearch_gem: "~> 7"
22
+
23
+ services:
24
+ elasticsearch:
25
+ image: elasticsearch:${{ matrix.elasticsearch }}
26
+ env:
27
+ discovery.type: single-node
28
+ xpack.security.enabled: "false"
29
+ ports:
30
+ - 9200:9200
31
+ options: --health-cmd "curl -s http://localhost:9200" --health-interval 5s --health-timeout 3s --health-retries 20
32
+
33
+ env:
34
+ ELASTICSEARCH_GEM_VERSION: ${{ matrix.elasticsearch_gem }}
35
+
36
+ steps:
37
+ - uses: actions/checkout@v4
38
+
39
+ - uses: ruby/setup-ruby@v1
40
+ with:
41
+ ruby-version: "3.4"
42
+ bundler-cache: true
43
+
44
+ - run: bin/rubocop
45
+ - run: bin/rspec
data/CHANGELOG.md CHANGED
@@ -31,6 +31,13 @@ and this project adheres to [Semantic Versioning].
31
31
 
32
32
  ## [Unreleased] - XXXX-XX-XX
33
33
 
34
+ ## [0.9.1]
35
+ ### Added
36
+ - `op_type` and `conflicts` kwargs on `IndexManager#reindex_from_local` and `#reindex_from_remote`. Defaults to `nil`,
37
+ which omits them from the request body and preserves Elasticsearch defaults. Pass `op_type: 'create'` and
38
+ `conflicts: 'proceed'` for safe concurrent backfills - this avoids overwriting fresher live writes at the destination, and
39
+ logs conflicts instead of aborting.
40
+
34
41
  ## [0.8.0] - 2022-08-19
35
42
  ### Added
36
43
  - Support for Ruby 3 (and keep support for 2.7).
data/CODEOWNERS ADDED
@@ -0,0 +1,5 @@
1
+ # Lines starting with '#' are comments.
2
+ # Each line is a file pattern followed by one or more owners.
3
+
4
+ # These owners will be the default owners for everything in the repo.
5
+ * @carwow/platform @carwow/tech-leads
data/README.md CHANGED
@@ -102,6 +102,45 @@ The `client` keyword argument to `Zelastic::IndexManager.new` is optional. It de
102
102
  passed to `Zelastic::Config.new`, if one client is passed, or the first client in the array, if an
103
103
  array is passed to `Zelastic::Config.new`.
104
104
 
105
+ ### Reindexing using Elasticsearch's Reindex API
106
+
107
+ For large indices, you can use Elasticsearch's native reindex API instead of `populate_index`:
108
+
109
+ ```ruby
110
+ index_manager = Zelastic::IndexManager.new(MyModelIndex)
111
+ source_index = index_manager.current_read_index
112
+ dest_index = index_manager.current_write_index # or any target index name
113
+ index_manager.reindex(source_index: source_index, dest_index: dest_index)
114
+ ```
115
+
116
+ ### Reindexing from a remote Elasticsearch instance
117
+
118
+ If you have multiple Elasticsearch instances (e.g., primary and secondary), you can copy data
119
+ from one to another using the `reindex_from_remote` method:
120
+
121
+ ```ruby
122
+ # On the destination ES instance
123
+ secondary_client = Elasticsearch::Client.new(url: ENV['SECONDARY_ELASTICSEARCH_URL'])
124
+ index_manager = Zelastic::IndexManager.new(MyModelIndex, client: secondary_client)
125
+
126
+ # Get the source index name from the primary instance
127
+ primary_client = Elasticsearch::Client.new(url: ENV['ELASTICSEARCH_URL'])
128
+ source_index = primary_client.indices.get_alias(name: MyModelIndex.read_alias).keys.first
129
+
130
+ dest_index = index_manager.current_write_index
131
+ index_manager.reindex_from_remote(
132
+ source_host: ENV['ELASTICSEARCH_URL'],
133
+ source_index: source_index,
134
+ dest_index: dest_index,
135
+ username: ENV['ELASTICSEARCH_USERNAME'], # optional
136
+ password: ENV['ELASTICSEARCH_PASSWORD'], # optional
137
+ wait_for_completion: false # runs asynchronously by default
138
+ )
139
+ ```
140
+
141
+ Note: For remote reindexing to work, the destination Elasticsearch cluster must have the source
142
+ host configured in `reindex.remote.whitelist` in `elasticsearch.yml`.
143
+
105
144
  ## Development
106
145
 
107
146
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -19,9 +19,9 @@ module Zelastic
19
19
  def populate_index(unique_name = nil, batch_size: 3000, refresh: false)
20
20
  index_name = index_name_from_unique(unique_name)
21
21
 
22
- config.data_source.find_in_batches(batch_size: batch_size).with_index do |batch, i|
23
- logger.info(populate_index_log(batch_size: batch_size, batch_number: i + 1))
24
- indexer.index_batch(batch, client: client, index_name: index_name, refresh: refresh)
22
+ config.data_source.find_in_batches(batch_size:).with_index do |batch, i|
23
+ logger.info(populate_index_log(batch_size:, batch_number: i + 1))
24
+ indexer.index_batch(batch, client:, index_name:, refresh:)
25
25
  end
26
26
  end
27
27
 
@@ -64,9 +64,9 @@ module Zelastic
64
64
  )
65
65
 
66
66
  actions = other_write_indices.map do |index|
67
- { remove: { index: index, alias: config.write_alias } }
67
+ { remove: { index:, alias: config.write_alias } }
68
68
  end
69
- client.indices.update_aliases(body: { actions: actions })
69
+ client.indices.update_aliases(body: { actions: })
70
70
  end
71
71
 
72
72
  def cleanup_old_indices
@@ -92,12 +92,65 @@ module Zelastic
92
92
  client.indices.delete(index: indices_to_delete)
93
93
  end
94
94
 
95
+ def reindex_from_local(source_index:, dest_index:, wait_for_completion: false, op_type: nil, conflicts: nil)
96
+ logger.info("Reindexing from #{source_index} to #{dest_index}")
97
+ reindex(
98
+ source: { index: source_index }, dest_index:,
99
+ wait_for_completion:,
100
+ op_type:,
101
+ conflicts:
102
+ )
103
+ end
104
+
105
+ def reindex_from_remote(
106
+ source_host:,
107
+ source_index:,
108
+ dest_index:,
109
+ username: nil,
110
+ password: nil,
111
+ wait_for_completion: false,
112
+ op_type: nil,
113
+ conflicts: nil
114
+ )
115
+ logger.info("Reindexing from remote #{source_host}/#{source_index} to #{dest_index}")
116
+
117
+ remote = { host: source_host }
118
+ remote[:username] = username if username
119
+ remote[:password] = password if password
120
+
121
+ reindex(
122
+ source: { remote:, index: source_index },
123
+ dest_index:,
124
+ wait_for_completion:,
125
+ op_type:,
126
+ conflicts:
127
+ )
128
+ end
129
+
130
+ def current_read_index
131
+ client.indices.get_alias(name: config.read_alias).keys.first
132
+ end
133
+
134
+ def current_write_index
135
+ client.indices.get_alias(name: config.write_alias).keys.first
136
+ end
137
+
95
138
  private
96
139
 
97
140
  attr_reader :config, :client
98
141
 
99
142
  def_delegators :config, :logger
100
143
 
144
+ def reindex(source:, dest_index:, wait_for_completion:, op_type: nil, conflicts: nil)
145
+ dest = { index: dest_index }
146
+ dest[:op_type] = op_type if op_type
147
+
148
+ body = { source:, dest: }
149
+ body[:conflicts] = conflicts if conflicts
150
+
151
+ client.reindex(body:, wait_for_completion:)
152
+ end
153
+
101
154
  def indexer
102
155
  @indexer ||= Indexer.new(config)
103
156
  end
@@ -13,15 +13,19 @@ module Zelastic
13
13
 
14
14
  extend Forwardable
15
15
 
16
+ VERSION_CONFLICT_ERROR_REGEXP =
17
+ /^\[\d+\]: version conflict, current version \[\d+\] is higher or equal to the one provided \[\d+\]$/
18
+ private_constant :VERSION_CONFLICT_ERROR_REGEXP
19
+
16
20
  def initialize(config)
17
21
  @config = config
18
22
  end
19
23
 
20
24
  def index_batch(batch, client: nil, index_name: nil, refresh: false)
21
25
  version = current_version
22
- execute_bulk(client: client, index_name: index_name, refresh: refresh) do |index|
26
+ execute_bulk(client:, index_name:, refresh:) do |index|
23
27
  batch.map do |record|
24
- index_command(index: index, version: version, record: record)
28
+ index_command(index:, version:, record:)
25
29
  end
26
30
  end
27
31
  end
@@ -29,8 +33,8 @@ module Zelastic
29
33
  def index_record(record, refresh: false)
30
34
  version = current_version
31
35
 
32
- execute_bulk(refresh: refresh) do |index_name|
33
- [index_command(index: index_name, version: version, record: record)]
36
+ execute_bulk(refresh:) do |index_name|
37
+ [index_command(index: index_name, version:, record:)]
34
38
  end
35
39
  end
36
40
 
@@ -52,7 +56,7 @@ module Zelastic
52
56
  logger.info('ES: Deleting batch records')
53
57
 
54
58
  config.clients.map do |client|
55
- client.delete_by_query(index: config.write_alias, body: { query: query })
59
+ client.delete_by_query(index: config.write_alias, body: { query: })
56
60
  end
57
61
  end
58
62
 
@@ -78,7 +82,7 @@ module Zelastic
78
82
  _index: index,
79
83
  _id: record.id,
80
84
  data: config.index_data(record),
81
- version: version,
85
+ version:,
82
86
  version_type: :external
83
87
  }
84
88
  }
@@ -92,7 +96,7 @@ module Zelastic
92
96
 
93
97
  commands = indices.flat_map(&block)
94
98
 
95
- current_client.bulk(body: commands, refresh: refresh).tap do |result|
99
+ current_client.bulk(body: commands, refresh:).tap do |result|
96
100
  check_errors!(result)
97
101
  end
98
102
  end
@@ -118,8 +122,5 @@ module Zelastic
118
122
  error['type'] == 'version_conflict_engine_exception' &&
119
123
  error['reason'] =~ VERSION_CONFLICT_ERROR_REGEXP
120
124
  end
121
-
122
- VERSION_CONFLICT_ERROR_REGEXP =
123
- /^\[\d+\]: version conflict, current version \[\d+\] is higher or equal to the one provided \[\d+\]$/.freeze
124
125
  end
125
126
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zelastic
4
- VERSION = '0.8.0'
4
+ VERSION = '0.9.1'
5
5
  end
data/zelastic.gemspec CHANGED
@@ -28,7 +28,7 @@ Gem::Specification.new do |spec|
28
28
  spec.add_dependency 'activesupport'
29
29
 
30
30
  spec.add_development_dependency 'bundler', '~> 2'
31
- spec.add_development_dependency 'carwow_rubocop', '~> 4'
31
+ spec.add_development_dependency 'carwow_rubocop', '~> 6'
32
32
  spec.add_development_dependency 'pry', '~> 0.14'
33
33
  spec.add_development_dependency 'rake', '~> 13'
34
34
  spec.add_development_dependency 'rspec', '~> 3'
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zelastic
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - carwow Developers
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2022-08-19 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: elasticsearch
@@ -78,14 +77,14 @@ dependencies:
78
77
  requirements:
79
78
  - - "~>"
80
79
  - !ruby/object:Gem::Version
81
- version: '4'
80
+ version: '6'
82
81
  type: :development
83
82
  prerelease: false
84
83
  version_requirements: !ruby/object:Gem::Requirement
85
84
  requirements:
86
85
  - - "~>"
87
86
  - !ruby/object:Gem::Version
88
- version: '4'
87
+ version: '6'
89
88
  - !ruby/object:Gem::Dependency
90
89
  name: pry
91
90
  requirement: !ruby/object:Gem::Requirement
@@ -135,12 +134,13 @@ executables: []
135
134
  extensions: []
136
135
  extra_rdoc_files: []
137
136
  files:
138
- - ".circleci/config.yml"
139
137
  - ".github/dependabot.yml"
138
+ - ".github/workflows/ci.yml"
140
139
  - ".gitignore"
141
140
  - ".rspec"
142
141
  - ".rubocop.yml"
143
142
  - CHANGELOG.md
143
+ - CODEOWNERS
144
144
  - Gemfile
145
145
  - LICENSE.txt
146
146
  - README.md
@@ -160,7 +160,6 @@ homepage: https://github.com/carwow/zelastic
160
160
  licenses:
161
161
  - MIT
162
162
  metadata: {}
163
- post_install_message:
164
163
  rdoc_options: []
165
164
  require_paths:
166
165
  - lib
@@ -175,8 +174,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
174
  - !ruby/object:Gem::Version
176
175
  version: '0'
177
176
  requirements: []
178
- rubygems_version: 3.3.11
179
- signing_key:
177
+ rubygems_version: 3.6.9
180
178
  specification_version: 4
181
179
  summary: Zero-downtime (re-)indexing of ActiveRecord models into Elasticsearch.
182
180
  test_files: []
data/.circleci/config.yml DELETED
@@ -1,48 +0,0 @@
1
- version: 2.1
2
-
3
- jobs:
4
- test:
5
- parameters:
6
- ruby_version:
7
- type: string
8
- elasticsearch_version:
9
- type: string
10
- elasticsearch_gem_version:
11
- type: string
12
- docker:
13
- - image: "cimg/ruby:<< parameters.ruby_version >>"
14
- - image: "elasticsearch:<< parameters.elasticsearch_version >>"
15
- environment:
16
- "discovery.type": single-node
17
- "xpack.security.enabled": false
18
- environment:
19
- ELASTICSEARCH_GEM_VERSION: "<< parameters.elasticsearch_gem_version >>"
20
- steps:
21
- - checkout
22
- - run: bin/setup
23
- - run: bin/rubocop
24
- - run: dockerize -wait http://localhost:9200 -timeout 1m
25
- - run: bin/rspec
26
-
27
- workflows:
28
- version: 2
29
- test:
30
- jobs:
31
- - test:
32
- name: "test with elasticsearch v8
33
- and gem << matrix.elasticsearch_gem_version >>
34
- and ruby v<< matrix.ruby_version >>"
35
- elasticsearch_version: "8.3.3"
36
- matrix:
37
- parameters:
38
- ruby_version: ["3.1", "2.7"]
39
- elasticsearch_gem_version: ["~> 8", "~> 7"]
40
- - test:
41
- name: "test with elasticsearch v7
42
- and gem << matrix.elasticsearch_gem_version >>
43
- and ruby v<< matrix.ruby_version >>"
44
- elasticsearch_version: "7.17.5"
45
- matrix:
46
- parameters:
47
- ruby_version: ["3.1", "2.7"]
48
- elasticsearch_gem_version: ["~> 7"]