kafka_replicator 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d5a3c9b1df23b6f577496c6bf1bfd19f7bac70b6
4
+ data.tar.gz: f06b89307df2da71928a16566ed14ac9ab771a7c
5
+ SHA512:
6
+ metadata.gz: f9f5763bacfe9d08578cd2bcda00c6631cd78b64a827ae5a06c468ba36739867fdb8cc35ee883da18e0a5ca475950c67bee96590d0b8abc3fc82e25c26f0479c
7
+ data.tar.gz: 4c4fdec9594496b2b7a757013c9037c2686c517d97911964ea7388524d4fef7b2d5fbc16f730d615a814a01efa2f1eea6ef0e545da8e65f30ad8b4e895d3d83c
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in kafka_replicator.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,26 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ kafka_replicator (0.1.0)
5
+ json
6
+ ruby-kafka (~> 0.7.5)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ digest-crc (0.4.1)
12
+ json (2.2.0)
13
+ rake (10.5.0)
14
+ ruby-kafka (0.7.6)
15
+ digest-crc
16
+
17
+ PLATFORMS
18
+ ruby
19
+
20
+ DEPENDENCIES
21
+ bundler (~> 1.17)
22
+ kafka_replicator!
23
+ rake (~> 10.0)
24
+
25
+ BUNDLED WITH
26
+ 1.17.3
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 Vachagan Gevorgyan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # KafkaReplicator
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/kafka_replicator`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'kafka_replicator'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install kafka_replicator
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/kafka_replicator.
36
+
37
+ ## License
38
+
39
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kafka_replicator"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,31 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "kafka_replicator/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kafka_replicator"
8
+ spec.version = KafkaReplicator::VERSION
9
+ spec.authors = ["Vachagan Gevorgyan"]
10
+ spec.email = ["v.gevorgyan@catawiki.nl"]
11
+
12
+ spec.summary = %q{Replicate topics from one kafka cluster to another}
13
+ spec.description = %q{Simple solution for organizing 2 way syncing between kafka clusters}
14
+ spec.homepage = "https://github.com/Vachman/kafka-replicator"
15
+ spec.license = "MIT"
16
+
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ end
22
+ spec.bindir = "exe"
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ["lib"]
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.17"
27
+ spec.add_development_dependency "rake", "~> 10.0"
28
+
29
+ spec.add_dependency 'ruby-kafka', '~> 0.6.0'
30
+ spec.add_dependency 'multi_json', '~> 1.0'
31
+ end
@@ -0,0 +1,11 @@
1
+ require "kafka"
2
+ require "kafka_replicator/offsets_sync"
3
+ require 'kafka_replicator/railtie' if defined?(Rails)
4
+ require "kafka_replicator/topics_replicator"
5
+ require "kafka_replicator/version"
6
+ require "logger"
7
+ require "multi_json"
8
+
9
+ module KafkaReplicator
10
+ class Error < StandardError; end
11
+ end
@@ -0,0 +1,119 @@
1
+ module KafkaReplicator
2
+ class OffsetsSync
3
+ attr_reader :source_kafka,
4
+ :destination_kafka,
5
+ :destination_consumer,
6
+ :consumer_group,
7
+ :logger,
8
+ :topics
9
+
10
+ def initialize(source_brokers:, destination_brokers:, consumer_group:)
11
+ @source_brokers = source_brokers
12
+ @destination_brokers = destination_brokers
13
+ @consumer_group = consumer_group
14
+ @topics = Hash.new { |h, k| h[k] = {} }
15
+ @logger = Logger.new(STDOUT)
16
+ end
17
+
18
+ def source_kafka
19
+ @source_kafka ||= Kafka.new(
20
+ @source_brokers,
21
+ client_id: "replicator_source"
22
+ )
23
+ end
24
+
25
+ def destination_kafka
26
+ @destination_kafka ||= Kafka.new(
27
+ @destination_brokers,
28
+ client_id: "replicator_destination"
29
+ )
30
+ end
31
+
32
+ def destination_consumer
33
+ @destination_consumer ||= destination_kafka.consumer(
34
+ group_id: consumer_group
35
+ )
36
+ end
37
+
38
+ def source_group_cordinator
39
+ source_kafka.instance_variable_get('@cluster').send(
40
+ :get_group_coordinator,
41
+ group_id: consumer_group
42
+ )
43
+ end
44
+
45
+ def source_consumer_offsets
46
+ Kafka::Protocol::OffsetFetchRequest.send(:define_method, "api_version") { 2 }
47
+ source_group_cordinator.fetch_offsets(group_id: consumer_group, topics: nil)
48
+ end
49
+
50
+ def load_source_consumer_offsets
51
+ logger.info "load_source_consumer_offsets"
52
+
53
+ source_consumer_offsets.topics.each do |topic, partitions|
54
+ partitions.map do |partition, info|
55
+ @topics[topic][partition] = { source_consumer_offset: info.offset }
56
+ end
57
+ end
58
+ end
59
+
60
+ def load_source_producer_offsets
61
+ logger.info "load_destination_producer_offsets"
62
+
63
+ source_kafka.last_offsets_for(*@topics.keys).each do |topic, partitions|
64
+ partitions.each do |partition, offset|
65
+ @topics[topic][partition][:source_producer_offset] = offset
66
+ end
67
+ end
68
+ end
69
+
70
+ def load_destination_producer_offsets
71
+ logger.info "load_source_producer_offsets"
72
+
73
+ destination_kafka.last_offsets_for(*@topics.keys).each do |topic, partitions|
74
+ partitions.each do |partition, offset|
75
+ @topics[topic][partition][:destination_producer_offset] = offset
76
+ end
77
+ end
78
+ end
79
+
80
+ def calculate_destination_consumer_offsets
81
+ logger.info "calculate_destination_consumer_offsets"
82
+
83
+ @topics.each do |topic, partitions|
84
+ partitions.each do |partition, info|
85
+ delta = info[:source_producer_offset] - info[:destination_producer_offset]
86
+ info[:destination_consumer_offsets] = info[:source_consumer_offset] - delta
87
+ end
88
+ end
89
+ end
90
+
91
+ def set_destination_consumer_offsets
92
+ logger.info "set_destination_consumer_offsets"
93
+
94
+ @topics.each do |topic, partitions|
95
+ destination_consumer.subscribe(topic)
96
+ partitions.each do |partition, info|
97
+ offset = info[:destination_consumer_offsets]
98
+ logger.info "Seeking consumer offset for: #{m.topic}/#{m.partition} to #{offset}"
99
+ destination_consumer.seek(topic, partition, offset)
100
+ end
101
+ end
102
+
103
+ opts = { automatically_mark_as_processed: false }
104
+ destination_consumer.each_message(opts) do |m|
105
+ logger.info "Setting consumer offset for: #{m.topic}/#{m.partition}"
106
+ break
107
+ end
108
+ end
109
+
110
+ def sync
111
+ load_source_consumer_offsets
112
+ load_destination_producer_offsets
113
+ load_source_producer_offsets
114
+
115
+ calculate_destination_consumer_offsets
116
+ set_destination_consumer_offsets
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,8 @@
1
+ module KafkaReplicator
2
+ class Railtie < Rails::Railtie
3
+ rake_tasks do
4
+ load 'tasks/kafka_replicator.rake'
5
+ end
6
+ end
7
+ end
8
+
@@ -0,0 +1,154 @@
1
+ module KafkaReplicator
2
+ class TopicsReplicator
3
+ SKIP_TOPICS = ['__consumer_offse', '__consumer_offsets', '_schemas']
4
+
5
+ attr_reader :source_kafka,
6
+ :destination_kafka,
7
+ :source_consumer,
8
+ :destination_producer,
9
+ :replicated_topics,
10
+ :skip_topics,
11
+ :logger,
12
+ :stopped
13
+
14
+ def initialize(source_brokers:, destination_brokers:, skip_topics: [])
15
+ @source_brokers = source_brokers
16
+ @destination_brokers = destination_brokers
17
+ @skip_topics = SKIP_TOPICS | skip_topics
18
+ @logger = Logger.new(STDOUT)
19
+ end
20
+
21
+ def setup
22
+ @stopped = false
23
+ @replicated_topics = Set[]
24
+ @source_consumer = nil
25
+ @destination_producer = nil
26
+ end
27
+
28
+ def source_kafka
29
+ @source_kafka ||= Kafka.new(
30
+ @source_brokers,
31
+ client_id: "replicator_source"
32
+ )
33
+ end
34
+
35
+ def destination_kafka
36
+ @destination_kafka ||= Kafka.new(
37
+ @destination_brokers,
38
+ client_id: "replicator_destination"
39
+ )
40
+ end
41
+
42
+ def source_consumer
43
+ @source_consumer ||= source_kafka.consumer(group_id: "replicator")
44
+ end
45
+
46
+ def destination_producer
47
+ @destination_producer ||= destination_kafka.producer
48
+ end
49
+
50
+ def start
51
+ loop do
52
+ break if stopped
53
+
54
+ logger.info 'Setting up configuration...'
55
+ setup
56
+
57
+ logger.info 'Adding topics for replication...'
58
+ subscribe_to_source_topics
59
+
60
+ logger.info 'Starting replication...'
61
+ replicate
62
+ end
63
+ end
64
+
65
+ def replicate
66
+ replicate
67
+ rescue => e
68
+ logger.error "Exception: #{e}"
69
+ logger.error "Exception.cause: #{e.cause.inspect}"
70
+ end
71
+
72
+ def stop
73
+ logger.info 'Stopping replication...'
74
+ source_consumer.stop
75
+ @stopped = true
76
+ end
77
+
78
+ private
79
+
80
+ def replicate
81
+ source_consumer.each_batch(automatically_mark_as_processed: false) do |batch|
82
+ logger.info 'New topics added, restarting...' && break unless unreplicated_topics.empty?
83
+
84
+ batch.messages.each_slice(100).each do |messages|
85
+ messages.each do |message|
86
+ value = parse_message(message.value)
87
+
88
+ # Currently we support only JSON messages so if for some reson there is a message
89
+ # which is not a json we just skip it in order to continue replication
90
+ next if value.kind_of?(Exception)
91
+
92
+ # skip already replicated messages
93
+ # prevents loops in two way replication scenario
94
+ if value.has_key?(:replica)
95
+ source_consumer.mark_message_as_processed(message)
96
+ print('-')
97
+ next
98
+ end
99
+
100
+ # mark message as a replica
101
+ value[:replica] = true
102
+
103
+ destination_producer.produce(
104
+ MultiJson.dump(value),
105
+ topic: message.topic,
106
+ partition: message.partition
107
+ )
108
+
109
+ source_consumer.mark_message_as_processed(message)
110
+ print '.'
111
+ end
112
+
113
+ destination_producer.deliver_messages
114
+ source_consumer.commit_offsets
115
+ end
116
+ end
117
+ end
118
+
119
+ def parse_message(value)
120
+ MultiJson.load(value, symbolize_keys: true)
121
+ rescue MultiJson::ParseError => exception
122
+ logger.error exception.cause
123
+
124
+ exception
125
+ end
126
+
127
+ def source_topics
128
+ source_kafka.topics.reject { |topic_name| skip_topics.include?(topic_name) }.to_set
129
+ end
130
+
131
+ def unreplicated_topics
132
+ source_topics - replicated_topics
133
+ end
134
+
135
+ def subscribe_to_source_topics
136
+ destination_topics = destination_kafka.topics
137
+
138
+ unreplicated_topics.each do |topic|
139
+ source_consumer.subscribe(topic, start_from_beginning: true)
140
+ replicated_topics << topic
141
+
142
+ unless destination_topics.include?(topic)
143
+ destination_kafka.create_topic(
144
+ topic,
145
+ num_partitions: source_kafka.partitions_for(topic),
146
+ replication_factor: 3 # Need to be specified because otherwise ruby-kafa driver will make it equal to 1
147
+ )
148
+ end
149
+
150
+ logger.info "Topic added: #{topic}"
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,3 @@
1
+ module KafkaReplicator
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,28 @@
1
+ require 'multi_json'
2
+
3
+ namespace :kafka_replicator do
4
+ desc 'Start topics replicator'
5
+ task replicate_topics: :environment do |task, _|
6
+ source_brokers = ENV['KAFKA_REPLICATOR_SOURCE_BROKERS'] && MultiJson.load(ENV['KAFKA_REPLICATOR_SOURCE_BROKERS'])
7
+ raise "KAFKA_REPLICATOR_SOURCE_BROKERS environment variable is not set" unless source_brokers
8
+
9
+ destination_brokers = ENV['KAFKA_REPLICATOR_DESTINATION_BROKERS'] && MultiJson.load(ENV['KAFKA_REPLICATOR_DESTINATION_BROKERS'])
10
+ raise "KAFKA_REPLICATOR_DESTINATION_BROKERS environment variable is not set" unless destination_brokers
11
+
12
+ skip_topics = (ENV['KAFKA_REPLICATOR_SKIP_TOPICS'] && MultiJson.load(ENV['KAFKA_REPLICATOR_SKIP_TOPICS'])) || []
13
+
14
+ puts "Replicating from #{source_brokers} to #{destination_brokers}"
15
+ puts "Skipping topics: #{(KafkaReplicator::TopicsReplicator::SKIP_TOPICS | skip_topics).sort}"
16
+
17
+ replicator = KafkaReplicator::TopicsReplicator.new(
18
+ source_brokers: source_brokers,
19
+ destination_brokers: destination_brokers,
20
+ skip_topics: skip_topics
21
+ )
22
+
23
+ trap("TERM") { replicator.stop }
24
+ trap("INT") { replicator.stop }
25
+
26
+ replicator.start
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kafka_replicator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Vachagan Gevorgyan
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-06-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-kafka
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.6.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.6.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: multi_json
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.0'
69
+ description: Simple solution for organizing 2 way syncing between kafka clusters
70
+ email:
71
+ - v.gevorgyan@catawiki.nl
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - Gemfile.lock
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/console
83
+ - bin/setup
84
+ - kafka_replicator.gemspec
85
+ - lib/kafka_replicator.rb
86
+ - lib/kafka_replicator/offsets_sync.rb
87
+ - lib/kafka_replicator/railtie.rb
88
+ - lib/kafka_replicator/topics_replicator.rb
89
+ - lib/kafka_replicator/version.rb
90
+ - lib/tasks/kafka_replicator.rake
91
+ homepage: https://github.com/Vachman/kafka-replicator
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.6.11
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Replicate topics from one kafka cluster to another
115
+ test_files: []