kafka_replicator 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d5a3c9b1df23b6f577496c6bf1bfd19f7bac70b6
4
+ data.tar.gz: f06b89307df2da71928a16566ed14ac9ab771a7c
5
+ SHA512:
6
+ metadata.gz: f9f5763bacfe9d08578cd2bcda00c6631cd78b64a827ae5a06c468ba36739867fdb8cc35ee883da18e0a5ca475950c67bee96590d0b8abc3fc82e25c26f0479c
7
+ data.tar.gz: 4c4fdec9594496b2b7a757013c9037c2686c517d97911964ea7388524d4fef7b2d5fbc16f730d615a814a01efa2f1eea6ef0e545da8e65f30ad8b4e895d3d83c
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in kafka_replicator.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,26 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ kafka_replicator (0.1.0)
5
+ json
6
+ ruby-kafka (~> 0.7.5)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ digest-crc (0.4.1)
12
+ json (2.2.0)
13
+ rake (10.5.0)
14
+ ruby-kafka (0.7.6)
15
+ digest-crc
16
+
17
+ PLATFORMS
18
+ ruby
19
+
20
+ DEPENDENCIES
21
+ bundler (~> 1.17)
22
+ kafka_replicator!
23
+ rake (~> 10.0)
24
+
25
+ BUNDLED WITH
26
+ 1.17.3
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2019 Vachagan Gevorgyan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # KafkaReplicator
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/kafka_replicator`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'kafka_replicator'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install kafka_replicator
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/kafka_replicator.
36
+
37
+ ## License
38
+
39
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kafka_replicator"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,31 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "kafka_replicator/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kafka_replicator"
8
+ spec.version = KafkaReplicator::VERSION
9
+ spec.authors = ["Vachagan Gevorgyan"]
10
+ spec.email = ["v.gevorgyan@catawiki.nl"]
11
+
12
+ spec.summary = %q{Replicate topics from one kafka cluster to another}
13
+ spec.description = %q{Simple solution for organizing 2 way syncing between kafka clusters}
14
+ spec.homepage = "https://github.com/Vachman/kafka-replicator"
15
+ spec.license = "MIT"
16
+
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ end
22
+ spec.bindir = "exe"
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ["lib"]
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.17"
27
+ spec.add_development_dependency "rake", "~> 10.0"
28
+
29
+ spec.add_dependency 'ruby-kafka', '~> 0.6.0'
30
+ spec.add_dependency 'multi_json', '~> 1.0'
31
+ end
@@ -0,0 +1,11 @@
1
+ require "kafka"
2
+ require "kafka_replicator/offsets_sync"
3
+ require 'kafka_replicator/railtie' if defined?(Rails)
4
+ require "kafka_replicator/topics_replicator"
5
+ require "kafka_replicator/version"
6
+ require "logger"
7
+ require "multi_json"
8
+
9
+ module KafkaReplicator
10
+ class Error < StandardError; end
11
+ end
@@ -0,0 +1,119 @@
1
+ module KafkaReplicator
2
+ class OffsetsSync
3
+ attr_reader :source_kafka,
4
+ :destination_kafka,
5
+ :destination_consumer,
6
+ :consumer_group,
7
+ :logger,
8
+ :topics
9
+
10
+ def initialize(source_brokers:, destination_brokers:, consumer_group:)
11
+ @source_brokers = source_brokers
12
+ @destination_brokers = destination_brokers
13
+ @consumer_group = consumer_group
14
+ @topics = Hash.new { |h, k| h[k] = {} }
15
+ @logger = Logger.new(STDOUT)
16
+ end
17
+
18
+ def source_kafka
19
+ @source_kafka ||= Kafka.new(
20
+ @source_brokers,
21
+ client_id: "replicator_source"
22
+ )
23
+ end
24
+
25
+ def destination_kafka
26
+ @destination_kafka ||= Kafka.new(
27
+ @destination_brokers,
28
+ client_id: "replicator_destination"
29
+ )
30
+ end
31
+
32
+ def destination_consumer
33
+ @destination_consumer ||= destination_kafka.consumer(
34
+ group_id: consumer_group
35
+ )
36
+ end
37
+
38
+ def source_group_cordinator
39
+ source_kafka.instance_variable_get('@cluster').send(
40
+ :get_group_coordinator,
41
+ group_id: consumer_group
42
+ )
43
+ end
44
+
45
+ def source_consumer_offsets
46
+ Kafka::Protocol::OffsetFetchRequest.send(:define_method, "api_version") { 2 }
47
+ source_group_cordinator.fetch_offsets(group_id: consumer_group, topics: nil)
48
+ end
49
+
50
+ def load_source_consumer_offsets
51
+ logger.info "load_source_consumer_offsets"
52
+
53
+ source_consumer_offsets.topics.each do |topic, partitions|
54
+ partitions.map do |partition, info|
55
+ @topics[topic][partition] = { source_consumer_offset: info.offset }
56
+ end
57
+ end
58
+ end
59
+
60
+ def load_source_producer_offsets
61
+ logger.info "load_destination_producer_offsets"
62
+
63
+ source_kafka.last_offsets_for(*@topics.keys).each do |topic, partitions|
64
+ partitions.each do |partition, offset|
65
+ @topics[topic][partition][:source_producer_offset] = offset
66
+ end
67
+ end
68
+ end
69
+
70
+ def load_destination_producer_offsets
71
+ logger.info "load_source_producer_offsets"
72
+
73
+ destination_kafka.last_offsets_for(*@topics.keys).each do |topic, partitions|
74
+ partitions.each do |partition, offset|
75
+ @topics[topic][partition][:destination_producer_offset] = offset
76
+ end
77
+ end
78
+ end
79
+
80
+ def calculate_destination_consumer_offsets
81
+ logger.info "calculate_destination_consumer_offsets"
82
+
83
+ @topics.each do |topic, partitions|
84
+ partitions.each do |partition, info|
85
+ delta = info[:source_producer_offset] - info[:destination_producer_offset]
86
+ info[:destination_consumer_offsets] = info[:source_consumer_offset] - delta
87
+ end
88
+ end
89
+ end
90
+
91
+ def set_destination_consumer_offsets
92
+ logger.info "set_destination_consumer_offsets"
93
+
94
+ @topics.each do |topic, partitions|
95
+ destination_consumer.subscribe(topic)
96
+ partitions.each do |partition, info|
97
+ offset = info[:destination_consumer_offsets]
98
+ logger.info "Seeking consumer offset for: #{m.topic}/#{m.partition} to #{offset}"
99
+ destination_consumer.seek(topic, partition, offset)
100
+ end
101
+ end
102
+
103
+ opts = { automatically_mark_as_processed: false }
104
+ destination_consumer.each_message(opts) do |m|
105
+ logger.info "Setting consumer offset for: #{m.topic}/#{m.partition}"
106
+ break
107
+ end
108
+ end
109
+
110
+ def sync
111
+ load_source_consumer_offsets
112
+ load_destination_producer_offsets
113
+ load_source_producer_offsets
114
+
115
+ calculate_destination_consumer_offsets
116
+ set_destination_consumer_offsets
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,8 @@
1
+ module KafkaReplicator
2
+ class Railtie < Rails::Railtie
3
+ rake_tasks do
4
+ load 'tasks/kafka_replicator.rake'
5
+ end
6
+ end
7
+ end
8
+
@@ -0,0 +1,154 @@
1
+ module KafkaReplicator
2
+ class TopicsReplicator
3
+ SKIP_TOPICS = ['__consumer_offse', '__consumer_offsets', '_schemas']
4
+
5
+ attr_reader :source_kafka,
6
+ :destination_kafka,
7
+ :source_consumer,
8
+ :destination_producer,
9
+ :replicated_topics,
10
+ :skip_topics,
11
+ :logger,
12
+ :stopped
13
+
14
+ def initialize(source_brokers:, destination_brokers:, skip_topics: [])
15
+ @source_brokers = source_brokers
16
+ @destination_brokers = destination_brokers
17
+ @skip_topics = SKIP_TOPICS | skip_topics
18
+ @logger = Logger.new(STDOUT)
19
+ end
20
+
21
+ def setup
22
+ @stopped = false
23
+ @replicated_topics = Set[]
24
+ @source_consumer = nil
25
+ @destination_producer = nil
26
+ end
27
+
28
+ def source_kafka
29
+ @source_kafka ||= Kafka.new(
30
+ @source_brokers,
31
+ client_id: "replicator_source"
32
+ )
33
+ end
34
+
35
+ def destination_kafka
36
+ @destination_kafka ||= Kafka.new(
37
+ @destination_brokers,
38
+ client_id: "replicator_destination"
39
+ )
40
+ end
41
+
42
+ def source_consumer
43
+ @source_consumer ||= source_kafka.consumer(group_id: "replicator")
44
+ end
45
+
46
+ def destination_producer
47
+ @destination_producer ||= destination_kafka.producer
48
+ end
49
+
50
+ def start
51
+ loop do
52
+ break if stopped
53
+
54
+ logger.info 'Setting up configuration...'
55
+ setup
56
+
57
+ logger.info 'Adding topics for replication...'
58
+ subscribe_to_source_topics
59
+
60
+ logger.info 'Starting replication...'
61
+ replicate
62
+ end
63
+ end
64
+
65
+ def replicate
66
+ replicate
67
+ rescue => e
68
+ logger.error "Exception: #{e}"
69
+ logger.error "Exception.cause: #{e.cause.inspect}"
70
+ end
71
+
72
+ def stop
73
+ logger.info 'Stopping replication...'
74
+ source_consumer.stop
75
+ @stopped = true
76
+ end
77
+
78
+ private
79
+
80
+ def replicate
81
+ source_consumer.each_batch(automatically_mark_as_processed: false) do |batch|
82
+ logger.info 'New topics added, restarting...' && break unless unreplicated_topics.empty?
83
+
84
+ batch.messages.each_slice(100).each do |messages|
85
+ messages.each do |message|
86
+ value = parse_message(message.value)
87
+
88
+ # Currently we support only JSON messages so if for some reson there is a message
89
+ # which is not a json we just skip it in order to continue replication
90
+ next if value.kind_of?(Exception)
91
+
92
+ # skip already replicated messages
93
+ # prevents loops in two way replication scenario
94
+ if value.has_key?(:replica)
95
+ source_consumer.mark_message_as_processed(message)
96
+ print('-')
97
+ next
98
+ end
99
+
100
+ # mark message as a replica
101
+ value[:replica] = true
102
+
103
+ destination_producer.produce(
104
+ MultiJson.dump(value),
105
+ topic: message.topic,
106
+ partition: message.partition
107
+ )
108
+
109
+ source_consumer.mark_message_as_processed(message)
110
+ print '.'
111
+ end
112
+
113
+ destination_producer.deliver_messages
114
+ source_consumer.commit_offsets
115
+ end
116
+ end
117
+ end
118
+
119
+ def parse_message(value)
120
+ MultiJson.load(value, symbolize_keys: true)
121
+ rescue MultiJson::ParseError => exception
122
+ logger.error exception.cause
123
+
124
+ exception
125
+ end
126
+
127
+ def source_topics
128
+ source_kafka.topics.reject { |topic_name| skip_topics.include?(topic_name) }.to_set
129
+ end
130
+
131
+ def unreplicated_topics
132
+ source_topics - replicated_topics
133
+ end
134
+
135
+ def subscribe_to_source_topics
136
+ destination_topics = destination_kafka.topics
137
+
138
+ unreplicated_topics.each do |topic|
139
+ source_consumer.subscribe(topic, start_from_beginning: true)
140
+ replicated_topics << topic
141
+
142
+ unless destination_topics.include?(topic)
143
+ destination_kafka.create_topic(
144
+ topic,
145
+ num_partitions: source_kafka.partitions_for(topic),
146
+ replication_factor: 3 # Need to be specified because otherwise ruby-kafa driver will make it equal to 1
147
+ )
148
+ end
149
+
150
+ logger.info "Topic added: #{topic}"
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,3 @@
1
+ module KafkaReplicator
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,28 @@
1
+ require 'multi_json'
2
+
3
+ namespace :kafka_replicator do
4
+ desc 'Start topics replicator'
5
+ task replicate_topics: :environment do |task, _|
6
+ source_brokers = ENV['KAFKA_REPLICATOR_SOURCE_BROKERS'] && MultiJson.load(ENV['KAFKA_REPLICATOR_SOURCE_BROKERS'])
7
+ raise "KAFKA_REPLICATOR_SOURCE_BROKERS environment variable is not set" unless source_brokers
8
+
9
+ destination_brokers = ENV['KAFKA_REPLICATOR_DESTINATION_BROKERS'] && MultiJson.load(ENV['KAFKA_REPLICATOR_DESTINATION_BROKERS'])
10
+ raise "KAFKA_REPLICATOR_DESTINATION_BROKERS environment variable is not set" unless destination_brokers
11
+
12
+ skip_topics = (ENV['KAFKA_REPLICATOR_SKIP_TOPICS'] && MultiJson.load(ENV['KAFKA_REPLICATOR_SKIP_TOPICS'])) || []
13
+
14
+ puts "Replicating from #{source_brokers} to #{destination_brokers}"
15
+ puts "Skipping topics: #{(KafkaReplicator::TopicsReplicator::SKIP_TOPICS | skip_topics).sort}"
16
+
17
+ replicator = KafkaReplicator::TopicsReplicator.new(
18
+ source_brokers: source_brokers,
19
+ destination_brokers: destination_brokers,
20
+ skip_topics: skip_topics
21
+ )
22
+
23
+ trap("TERM") { replicator.stop }
24
+ trap("INT") { replicator.stop }
25
+
26
+ replicator.start
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kafka_replicator
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Vachagan Gevorgyan
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2019-06-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-kafka
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.6.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.6.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: multi_json
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.0'
69
+ description: Simple solution for organizing 2 way syncing between kafka clusters
70
+ email:
71
+ - v.gevorgyan@catawiki.nl
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - Gemfile.lock
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/console
83
+ - bin/setup
84
+ - kafka_replicator.gemspec
85
+ - lib/kafka_replicator.rb
86
+ - lib/kafka_replicator/offsets_sync.rb
87
+ - lib/kafka_replicator/railtie.rb
88
+ - lib/kafka_replicator/topics_replicator.rb
89
+ - lib/kafka_replicator/version.rb
90
+ - lib/tasks/kafka_replicator.rake
91
+ homepage: https://github.com/Vachman/kafka-replicator
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.6.11
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Replicate topics from one kafka cluster to another
115
+ test_files: []