ruby-kafka-ec2 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +22 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +93 -0
- data/Rakefile +6 -0
- data/benchmark/.gitignore +4 -0
- data/benchmark/Dockerfile +13 -0
- data/benchmark/Dockerfile.dockerignore +1 -0
- data/benchmark/Gemfile +11 -0
- data/benchmark/README.md +99 -0
- data/benchmark/aws.tf +135 -0
- data/benchmark/consume_messages.rb +106 -0
- data/benchmark/consume_messages.sh +47 -0
- data/benchmark/produce_messages.rb +96 -0
- data/benchmark/produce_messages.sh +45 -0
- data/benchmark/register_docker_image.sh +20 -0
- data/benchmark/ruby-kafka-ec2.log +12 -0
- data/benchmark/ruby-kafka.log +12 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/kafka/ec2.rb +23 -0
- data/lib/kafka/ec2/ext/consumer_group.rb +33 -0
- data/lib/kafka/ec2/ext/protocol/join_group_request.rb +39 -0
- data/lib/kafka/ec2/mixed_instance_assignment_strategy.rb +95 -0
- data/lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb +24 -0
- data/lib/kafka/ec2/version.rb +5 -0
- data/ruby-kafka-ec2.gemspec +29 -0
- metadata +123 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a216c0064e93662929aa08a27aee3caad6eb7d4a7eb02a658b6edef1ced9fa33
|
4
|
+
data.tar.gz: 5982cfb402ea097fbc45580b3bf17c88d1ef6fef4c9f3fc3c06b71c0ba9fd8f7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 47ef0c231763ba3b9c8ee95417eba748a205e8791bd973bdebdb7e4b30aa103f32c965fbbc47cc8aa157f6ae80bd8e4f141ac153264eb3514accb569a375f8d5
|
7
|
+
data.tar.gz: '009ad55259d086a25252b8fd0aa6f9f3eecdbf0c25b79ac00c07a920b28fe7d4d1758043e7ef804364e56c869bd57e51b51690b44c16745fba45dc12ec2bdb40'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
pull_request:
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
build:
|
9
|
+
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v2
|
14
|
+
- name: Set up Ruby 2.6
|
15
|
+
uses: actions/setup-ruby@v1
|
16
|
+
with:
|
17
|
+
ruby-version: 2.6.x
|
18
|
+
- name: Build and test with Rake
|
19
|
+
run: |
|
20
|
+
gem install bundler
|
21
|
+
bundle install --jobs 4 --retry 3
|
22
|
+
bundle exec rake
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 abicky
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# ruby-kafka-ec2
|
2
|
+
|
3
|
+
![](https://github.com/abicky/ruby-kafka-ec2/workflows/CI/badge.svg?branch=master)
|
4
|
+
|
5
|
+
ruby-kafka-ec2 is an extension of ruby-kafka that provides useful features for EC2 like Kafka::EC2::MixedInstanceAssignmentStrategy.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'ruby-kafka-ec2'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle install
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install ruby-kafka-ec2
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
### Kafka::EC2::MixedInstanceAssignmentStrategy
|
26
|
+
|
27
|
+
`Kafka::EC2::MixedInstanceAssignmentStrategy` is an assignor for auto-scaling groups with mixed instance policies. The throughputs of consumers usually depend on instance families and availability zones. For example, if your application writes data to a database, the throughputs of consumers running on the same availability zone as the writer DB instance is higher.
|
28
|
+
|
29
|
+
To assign more partitions to consumers with high throughputs, you have to define `Kafka::EC2::MixedInstanceAssignmentStrategyFactory` first like below:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
require "aws-sdk-rds"
|
33
|
+
require "kafka"
|
34
|
+
require "kafka/ec2"
|
35
|
+
|
36
|
+
rds = Aws::RDS::Client.new(region: "ap-northeast-1")
|
37
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
38
|
+
instance_family_weights: {
|
39
|
+
"r4" => 1.00,
|
40
|
+
"r5" => 1.20,
|
41
|
+
"m5" => 1.35,
|
42
|
+
"c5" => 1.50,
|
43
|
+
},
|
44
|
+
availability_zone_weights: ->() {
|
45
|
+
db_cluster = rds.describe_db_clusters(filters: [
|
46
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
47
|
+
]).db_clusters.first
|
48
|
+
db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
|
49
|
+
db_instance = rds.describe_db_instances(filters: [
|
50
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
51
|
+
{ name: "db-instance-id", values: [db_instance_id] },
|
52
|
+
]).db_instances.first
|
53
|
+
|
54
|
+
if db_instance.availability_zone == "ap-northeast-1a"
|
55
|
+
{
|
56
|
+
"ap-northeast-1a" => 1,
|
57
|
+
"ap-northeast-1c" => 0.25,
|
58
|
+
}
|
59
|
+
else
|
60
|
+
{
|
61
|
+
"ap-northeast-1a" => 0.25,
|
62
|
+
"ap-northeast-1c" => 1,
|
63
|
+
}
|
64
|
+
end
|
65
|
+
},
|
66
|
+
)
|
67
|
+
```
|
68
|
+
|
69
|
+
In the preceding example, consumers running on c5 instances will have 1.5x as many partitions compared to consumers running on r4 instances. In a similar way, if the writer DB instance is in ap-northeast-1a, consumers in ap-northeast-1a will have 4x as many partitions compared to consumers in ap-northeast-1c.
|
70
|
+
|
71
|
+
You can use `Kafka::EC2::MixedInstanceAssignmentStrategy` by specifying the factory to `Kafka::EC2.with_assignment_strategy_factory` and creating a consumer in the block:
|
72
|
+
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_factory) do
|
76
|
+
kafka.consumer(group_id: ENV["KAFKA_CONSUMER_GROUP_ID"])
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
## Development
|
81
|
+
|
82
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
83
|
+
|
84
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
85
|
+
|
86
|
+
## Contributing
|
87
|
+
|
88
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/abicky/ruby-kafka-ec2.
|
89
|
+
|
90
|
+
|
91
|
+
## License
|
92
|
+
|
93
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
FROM ruby:2.7
|
2
|
+
|
3
|
+
WORKDIR /work
|
4
|
+
|
5
|
+
COPY Gemfile ruby-kafka-ec2.gemspec /tmp/ruby-kafka-ec2/
|
6
|
+
COPY lib /tmp/ruby-kafka-ec2/lib/
|
7
|
+
|
8
|
+
COPY benchmark/Gemfile /work/
|
9
|
+
RUN bundle install -j4
|
10
|
+
|
11
|
+
COPY benchmark/produce_messages.rb benchmark/consume_messages.rb /work/
|
12
|
+
|
13
|
+
ENTRYPOINT ["bundle", "exec", "ruby"]
|
@@ -0,0 +1 @@
|
|
1
|
+
.terraform
|
data/benchmark/Gemfile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
6
|
+
|
7
|
+
gem "ruby-kafka"
|
8
|
+
gem "concurrent-ruby" # cf. https://github.com/zendesk/ruby-kafka/pull/835
|
9
|
+
gem "mysql2"
|
10
|
+
gem "ruby-kafka-ec2", path: "/tmp/ruby-kafka-ec2"
|
11
|
+
gem "aws-sdk-rds"
|
data/benchmark/README.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# Benchmark
|
2
|
+
|
3
|
+
## Procedures
|
4
|
+
|
5
|
+
### 1. Create AWS resources
|
6
|
+
|
7
|
+
```
|
8
|
+
terraform init
|
9
|
+
terraform apply
|
10
|
+
```
|
11
|
+
|
12
|
+
### 2. Register docker image
|
13
|
+
|
14
|
+
```
|
15
|
+
./register_docker_image.sh
|
16
|
+
```
|
17
|
+
|
18
|
+
### 3. Produce messages and create database records
|
19
|
+
|
20
|
+
Set the environment variables `KAFKA_BROKERS`, `KAFKA_TOPIC`, `MYSQL_HOST`, `MYSQL_PASSWORD`, and `CLUSTER`, and execute the following command:
|
21
|
+
|
22
|
+
```
|
23
|
+
./produce_messages.sh
|
24
|
+
```
|
25
|
+
|
26
|
+
### 4. Consume messages
|
27
|
+
|
28
|
+
Set the environment variables `KAFKA_BROKERS`, `KAFKA_TOPIC`, `MYSQL_HOST`, `MYSQL_PASSWORD`, and `CLUSTER`, and execute the following command:
|
29
|
+
|
30
|
+
```
|
31
|
+
USE_KAFKA_EC2=false ./consume_messages.sh
|
32
|
+
```
|
33
|
+
|
34
|
+
Stop all the tasks if all lags become 0. You can check the lags by executing the following command in the kafka client instance:
|
35
|
+
|
36
|
+
```
|
37
|
+
./kafka-consumer-groups.sh \
|
38
|
+
--bootstrap-server <bootstrap-server> \
|
39
|
+
--describe \
|
40
|
+
--group net.abicky.ruby-kafka-ec2.benchmark
|
41
|
+
```
|
42
|
+
|
43
|
+
Reset the offsets in the kafka client instance:
|
44
|
+
|
45
|
+
```
|
46
|
+
./kafka-consumer-groups.sh \
|
47
|
+
--bootstrap-server <bootstrap-server> \
|
48
|
+
--group net.abicky.ruby-kafka-ec2.benchmark \
|
49
|
+
--reset-offsets \
|
50
|
+
--to-earliest \
|
51
|
+
--topic <topic> \
|
52
|
+
--execute
|
53
|
+
```
|
54
|
+
|
55
|
+
Set the environment variables `KAFKA_BROKERS`, `KAFKA_TOPIC`, `MYSQL_HOST`, `MYSQL_PASSWORD`, and `CLUSTER`, and execute the following command:
|
56
|
+
|
57
|
+
```
|
58
|
+
USE_KAFKA_EC2=true ./consume_messages.sh
|
59
|
+
```
|
60
|
+
|
61
|
+
## Result
|
62
|
+
|
63
|
+
### USE_KAFKA_EC2=false
|
64
|
+
|
65
|
+
No. | instance type | availability zone | partition count | processed count | duration (sec)
|
66
|
+
----|---------------|-------------------|-----------------|-----------------|-----------
|
67
|
+
1 | m5.large | ap-northeast-1a | 16 | 80000 | 224.1
|
68
|
+
2 | m5.large | ap-northeast-1a | 17 | 85000 | 237.7
|
69
|
+
3 | m5.large | ap-northeast-1c | 17 | 85000 | 56.8
|
70
|
+
4 | m5.large | ap-northeast-1c | 17 | 85000 | 57.3
|
71
|
+
5 | r4.large | ap-northeast-1a | 16 | 80000 | 257.6
|
72
|
+
6 | r4.large | ap-northeast-1a | 17 | 85000 | 258.9
|
73
|
+
7 | r4.large | ap-northeast-1c | 16 | 80000 | 74.2
|
74
|
+
8 | r4.large | ap-northeast-1c | 17 | 85000 | 76.6
|
75
|
+
9 | r5.large | ap-northeast-1a | 17 | 80000 | 238.0
|
76
|
+
10 | r5.large | ap-northeast-1a | 17 | 85000 | 238.1
|
77
|
+
11 | r5.large | ap-northeast-1c | 16 | 80000 | 54.1
|
78
|
+
12 | r5.large | ap-northeast-1c | 16 | 85000 | 55.1
|
79
|
+
|
80
|
+
See ruby-kafka.log for more details.
|
81
|
+
|
82
|
+
### USE_KAFKA_EC2=true
|
83
|
+
|
84
|
+
No. | instance type | availability zone | partition count | processed count | duration (sec)
|
85
|
+
----|---------------|-------------------|-----------------|-----------------|-----------
|
86
|
+
1 | m5.large | ap-northeast-1a | 7 | 35000 | 101.4
|
87
|
+
2 | m5.large | ap-northeast-1a | 8 | 40000 | 114.4
|
88
|
+
3 | m5.large | ap-northeast-1c | 31 | 155000 | 101.6
|
89
|
+
4 | m5.large | ap-northeast-1c | 30 | 150000 | 98.6
|
90
|
+
5 | r4.large | ap-northeast-1a | 5 | 25000 | 79.0
|
91
|
+
6 | r4.large | ap-northeast-1a | 6 | 30000 | 92.4
|
92
|
+
7 | r4.large | ap-northeast-1c | 23 | 115000 | 105.9
|
93
|
+
8 | r4.large | ap-northeast-1c | 22 | 110000 | 106.1
|
94
|
+
9 | r5.large | ap-northeast-1a | 7 | 35000 | 102.1
|
95
|
+
10 | r5.large | ap-northeast-1a | 7 | 35000 | 102.1
|
96
|
+
11 | r5.large | ap-northeast-1c | 27 | 135000 | 94.2
|
97
|
+
12 | r5.large | ap-northeast-1c | 27 | 135000 | 95.7
|
98
|
+
|
99
|
+
See ruby-kafka-ec2.log for more details.
|
data/benchmark/aws.tf
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
provider "aws" {
|
2
|
+
region = "ap-northeast-1"
|
3
|
+
}
|
4
|
+
|
5
|
+
variable "vpc_id" {}
|
6
|
+
variable "ec2_instance_security_group_id" {}
|
7
|
+
variable "ec2_key_name" {}
|
8
|
+
variable "rds_master_password" {}
|
9
|
+
variable "kafka_broker_subnet_ids" {}
|
10
|
+
|
11
|
+
# RDS
|
12
|
+
|
13
|
+
resource "aws_security_group" "rds" {
|
14
|
+
name = "ruby-kafka-ec2-rds"
|
15
|
+
vpc_id = var.vpc_id
|
16
|
+
|
17
|
+
egress {
|
18
|
+
from_port = 0
|
19
|
+
to_port = 0
|
20
|
+
protocol = "-1"
|
21
|
+
cidr_blocks = ["0.0.0.0/0"]
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
resource "aws_security_group_rule" "rds_access_from_ec2" {
|
26
|
+
security_group_id = aws_security_group.rds.id
|
27
|
+
type = "ingress"
|
28
|
+
from_port = 3306
|
29
|
+
to_port = 3306
|
30
|
+
protocol = "tcp"
|
31
|
+
source_security_group_id = var.ec2_instance_security_group_id
|
32
|
+
}
|
33
|
+
|
34
|
+
|
35
|
+
resource "aws_rds_cluster" "benchmark" {
|
36
|
+
cluster_identifier = "ruby-kafka-ec2-benchmark"
|
37
|
+
engine = "aurora"
|
38
|
+
engine_mode = "provisioned"
|
39
|
+
engine_version = "5.6.10a"
|
40
|
+
availability_zones = ["ap-northeast-1a", "ap-northeast-1c"]
|
41
|
+
master_username = "admin"
|
42
|
+
master_password = var.rds_master_password
|
43
|
+
backup_retention_period = 1
|
44
|
+
skip_final_snapshot = true
|
45
|
+
vpc_security_group_ids = [aws_security_group.rds.id]
|
46
|
+
}
|
47
|
+
|
48
|
+
resource "aws_rds_cluster_instance" "instances" {
|
49
|
+
count = 1
|
50
|
+
identifier = "${aws_rds_cluster.benchmark.cluster_identifier}-${count.index}"
|
51
|
+
cluster_identifier = aws_rds_cluster.benchmark.id
|
52
|
+
instance_class = "db.r5.large"
|
53
|
+
}
|
54
|
+
|
55
|
+
# Kafka
|
56
|
+
|
57
|
+
data "aws_kms_key" "kafka" {
|
58
|
+
key_id = "alias/aws/kafka"
|
59
|
+
}
|
60
|
+
|
61
|
+
resource "aws_security_group" "kafka_cluster" {
|
62
|
+
name = "ruby-kafka-ec2-kafka-cluster"
|
63
|
+
vpc_id = var.vpc_id
|
64
|
+
|
65
|
+
egress {
|
66
|
+
from_port = 0
|
67
|
+
to_port = 0
|
68
|
+
protocol = "-1"
|
69
|
+
cidr_blocks = ["0.0.0.0/0"]
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
resource "aws_security_group_rule" "kafka_cluster_from_ec2" {
|
74
|
+
security_group_id = aws_security_group.kafka_cluster.id
|
75
|
+
type = "ingress"
|
76
|
+
from_port = 0
|
77
|
+
to_port = 0
|
78
|
+
protocol = "-1"
|
79
|
+
source_security_group_id = var.ec2_instance_security_group_id
|
80
|
+
}
|
81
|
+
|
82
|
+
resource "aws_msk_cluster" "benchmark" {
|
83
|
+
cluster_name = "ruby-kafka-ec2-benchmark"
|
84
|
+
kafka_version = "2.2.1"
|
85
|
+
number_of_broker_nodes = 3
|
86
|
+
|
87
|
+
broker_node_group_info {
|
88
|
+
instance_type = "kafka.m5.large"
|
89
|
+
ebs_volume_size = "100"
|
90
|
+
client_subnets = var.kafka_broker_subnet_ids
|
91
|
+
security_groups = [aws_security_group.kafka_cluster.id]
|
92
|
+
}
|
93
|
+
|
94
|
+
encryption_info {
|
95
|
+
encryption_at_rest_kms_key_arn = data.aws_kms_key.kafka.arn
|
96
|
+
|
97
|
+
encryption_in_transit {
|
98
|
+
client_broker = "TLS_PLAINTEXT"
|
99
|
+
in_cluster = true
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
data "aws_ami" "most_recent_amazon_linux2" {
|
105
|
+
most_recent = true
|
106
|
+
owners = ["amazon"]
|
107
|
+
|
108
|
+
filter {
|
109
|
+
name = "name"
|
110
|
+
values = ["amzn2-ami-hvm-2.0.*-x86_64-gp2"]
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
resource "aws_instance" "kafka_client" {
|
115
|
+
ami = data.aws_ami.most_recent_amazon_linux2.image_id
|
116
|
+
instance_type = "t3.nano"
|
117
|
+
key_name = var.ec2_key_name
|
118
|
+
|
119
|
+
subnet_id = var.kafka_broker_subnet_ids[0]
|
120
|
+
vpc_security_group_ids = [var.ec2_instance_security_group_id]
|
121
|
+
|
122
|
+
associate_public_ip_address = true
|
123
|
+
|
124
|
+
# cf. https://docs.aws.amazon.com/msk/latest/developerguide/create-client-machine.html
|
125
|
+
user_data = <<EOF
|
126
|
+
#!/bin/bash
|
127
|
+
yum install -y java-1.8.0
|
128
|
+
wget https://archive.apache.org/dist/kafka/2.2.1/kafka_2.12-2.2.1.tgz
|
129
|
+
tar -xzf kafka_2.12-2.2.1.tgz
|
130
|
+
EOF
|
131
|
+
|
132
|
+
tags = {
|
133
|
+
Name = "kafka-client"
|
134
|
+
}
|
135
|
+
}
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require "json"
|
2
|
+
require "net/http"
|
3
|
+
require "time"
|
4
|
+
|
5
|
+
require "kafka"
|
6
|
+
require "concurrent/map" # cf. https://github.com/zendesk/ruby-kafka/pull/835
|
7
|
+
require "mysql2"
|
8
|
+
|
9
|
+
KAFKA_BROKERS = ENV.fetch("KAFKA_BROKERS", "localhost:9092").split(/\p{Space}*,\p{Space}*/)
|
10
|
+
KAFKA_CLIENT_ID = "net.abicky.ruby-kafka-ec2"
|
11
|
+
KAFKA_CONSUMER_GROUP_ID = "net.abicky.ruby-kafka-ec2.benchmark"
|
12
|
+
KAFKA_TOPIC = ENV.fetch("KAFKA_TOPIC") do
|
13
|
+
raise 'The environment variable "KAFKA_TOPIC" must be specified'
|
14
|
+
end
|
15
|
+
|
16
|
+
$stdout.sync = true
|
17
|
+
logger = Logger.new($stdout)
|
18
|
+
|
19
|
+
kafka = Kafka.new(KAFKA_BROKERS, client_id: KAFKA_CLIENT_ID)
|
20
|
+
if ENV["USE_KAFKA_EC2"] == "true"
|
21
|
+
logger.info "Use ruby-kafka-ec2"
|
22
|
+
require "aws-sdk-rds"
|
23
|
+
require "kafka/ec2"
|
24
|
+
|
25
|
+
rds = Aws::RDS::Client.new(region: "ap-northeast-1")
|
26
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
27
|
+
instance_family_weights: {
|
28
|
+
"r4" => 1.00,
|
29
|
+
"r5" => 1.20,
|
30
|
+
"m5" => 1.35,
|
31
|
+
},
|
32
|
+
availability_zone_weights: ->() {
|
33
|
+
db_cluster = rds.describe_db_clusters(filters: [
|
34
|
+
{ name: "db-cluster-id", values: ["ruby-kafka-ec2-benchmark"] },
|
35
|
+
]).db_clusters.first
|
36
|
+
db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
|
37
|
+
db_instance = rds.describe_db_instances(filters: [
|
38
|
+
{ name: "db-cluster-id", values: ["ruby-kafka-ec2-benchmark"] },
|
39
|
+
{ name: "db-instance-id", values: [db_instance_id] },
|
40
|
+
]).db_instances.first
|
41
|
+
|
42
|
+
if db_instance.availability_zone == "ap-northeast-1a"
|
43
|
+
{
|
44
|
+
"ap-northeast-1a" => 1,
|
45
|
+
"ap-northeast-1c" => 0.25,
|
46
|
+
}
|
47
|
+
else
|
48
|
+
{
|
49
|
+
"ap-northeast-1a" => 0.25,
|
50
|
+
"ap-northeast-1c" => 1,
|
51
|
+
}
|
52
|
+
end
|
53
|
+
},
|
54
|
+
)
|
55
|
+
consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_factory) do
|
56
|
+
kafka.consumer(group_id: KAFKA_CONSUMER_GROUP_ID)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
logger.info "Use ruby-kafka"
|
60
|
+
consumer = kafka.consumer(group_id: KAFKA_CONSUMER_GROUP_ID)
|
61
|
+
end
|
62
|
+
|
63
|
+
consumer.subscribe(KAFKA_TOPIC)
|
64
|
+
|
65
|
+
trap(:TERM) { consumer.stop }
|
66
|
+
|
67
|
+
metadata = Net::HTTP.start("169.254.169.254", 80) do |http|
|
68
|
+
{
|
69
|
+
instance_id: http.get("/latest/meta-data/instance-id").body,
|
70
|
+
instance_type: http.get("/latest/meta-data/instance-type").body,
|
71
|
+
availability_zone: http.get("/latest/meta-data/placement/availability-zone").body,
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
client = Mysql2::Client.new(
|
76
|
+
host: ENV["MYSQL_HOST"] || "localhost",
|
77
|
+
port: 3306,
|
78
|
+
username: "admin",
|
79
|
+
password: ENV["MYSQL_PASSWORD"],
|
80
|
+
)
|
81
|
+
|
82
|
+
logger.info "[#{metadata}] Consuming messages..."
|
83
|
+
|
84
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
85
|
+
processed_count = 0
|
86
|
+
partition_count = 0
|
87
|
+
end_time = nil
|
88
|
+
consumer.each_message do |message|
|
89
|
+
if message.value == "FIN"
|
90
|
+
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
91
|
+
partition_count += 1
|
92
|
+
logger.info "[#{metadata}] Received FIN message"
|
93
|
+
next
|
94
|
+
end
|
95
|
+
|
96
|
+
JSON.parse(message.value)["events"].each do |event|
|
97
|
+
Time.iso8601(event["timestamp"])
|
98
|
+
end
|
99
|
+
client.query("SELECT * FROM ruby_kafka_ec2_benchmark.events").to_a
|
100
|
+
|
101
|
+
processed_count += 1
|
102
|
+
logger.info "[#{metadata}] #{processed_count} messages were consumed" if (processed_count % 10_000).zero?
|
103
|
+
end
|
104
|
+
|
105
|
+
duration = end_time - start_time
|
106
|
+
logger.info "[#{metadata}] Complete (duration: #{duration}, partition_count: #{partition_count}, processed_count: #{processed_count})"
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -eo pipefail
|
4
|
+
|
5
|
+
REPOSITORY=ruby-kafka-ec2/benchmark
|
6
|
+
TASK_FAMILY=ruby-kafka-ec2-benchmark-consumer
|
7
|
+
|
8
|
+
repo_uri=$((aws ecr describe-repositories --repository-names $REPOSITORY | jq -r '.repositories[] | .repositoryUri') || true)
|
9
|
+
if [ -z "$repo_uri" ]; then
|
10
|
+
echo "The repository '$REPOSITORY' is not found. Execute register_docker_image.sh first, please." >&2
|
11
|
+
exit
|
12
|
+
fi
|
13
|
+
|
14
|
+
aws ecs register-task-definition --cli-input-json "$(cat <<JSON
|
15
|
+
{
|
16
|
+
"family": "$TASK_FAMILY",
|
17
|
+
"containerDefinitions": [
|
18
|
+
{
|
19
|
+
"name": "consume_messages",
|
20
|
+
"image": "$repo_uri",
|
21
|
+
"command": ["consume_messages.rb"],
|
22
|
+
"essential": true,
|
23
|
+
"environment": [
|
24
|
+
{"name": "KAFKA_BROKERS", "value": "$KAFKA_BROKERS"},
|
25
|
+
{"name": "KAFKA_TOPIC", "value": "$KAFKA_TOPIC"},
|
26
|
+
{"name": "MYSQL_HOST", "value": "$MYSQL_HOST"},
|
27
|
+
{"name": "MYSQL_PASSWORD", "value": "$MYSQL_PASSWORD"},
|
28
|
+
{"name": "USE_KAFKA_EC2", "value": "$USE_KAFKA_EC2"}
|
29
|
+
],
|
30
|
+
"logConfiguration": {
|
31
|
+
"logDriver": "awslogs",
|
32
|
+
"options": {
|
33
|
+
"awslogs-group": "/aws/ecs/ruby-kafka-ec2/benchmark",
|
34
|
+
"awslogs-region": "ap-northeast-1",
|
35
|
+
"awslogs-stream-prefix": "consume-messages"
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
],
|
40
|
+
"cpu": "1024",
|
41
|
+
"memory": "2048"
|
42
|
+
}
|
43
|
+
JSON
|
44
|
+
)"
|
45
|
+
|
46
|
+
aws ecs run-task --cluster $CLUSTER --task-definition $TASK_FAMILY --count 10
|
47
|
+
aws ecs run-task --cluster $CLUSTER --task-definition $TASK_FAMILY --count 2
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require "json"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
require "concurrent"
|
5
|
+
require "kafka"
|
6
|
+
require "mysql2"
|
7
|
+
|
8
|
+
KAFKA_BROKERS = ENV.fetch("KAFKA_BROKERS", "localhost:9092").split(/\p{Space}*,\p{Space}*/)
|
9
|
+
KAFKA_CLIENT_ID = "net.abicky.ruby-kafka-ec2"
|
10
|
+
KAFKA_TOPIC = ENV.fetch("KAFKA_TOPIC") do
|
11
|
+
raise 'The environment variable "KAFKA_TOPIC" must be specified'
|
12
|
+
end
|
13
|
+
PARTITION_COUNT = 200
|
14
|
+
MAX_BUFFER_SIZE = 1_000
|
15
|
+
MESSAGE_COUNT = 1_000_000
|
16
|
+
|
17
|
+
$stdout.sync = true
|
18
|
+
logger = Logger.new($stdout)
|
19
|
+
|
20
|
+
client = Mysql2::Client.new(
|
21
|
+
host: ENV["MYSQL_HOST"] || "localhost",
|
22
|
+
port: 3306,
|
23
|
+
username: "admin",
|
24
|
+
password: ENV["MYSQL_PASSWORD"],
|
25
|
+
)
|
26
|
+
client.query("CREATE DATABASE IF NOT EXISTS ruby_kafka_ec2_benchmark")
|
27
|
+
client.query(<<~SQL)
|
28
|
+
CREATE TABLE IF NOT EXISTS ruby_kafka_ec2_benchmark.events (
|
29
|
+
id bigint(20) NOT NULL AUTO_INCREMENT,
|
30
|
+
name varchar(255) NOT NULL,
|
31
|
+
created_at datetime NOT NULL,
|
32
|
+
PRIMARY KEY (id)
|
33
|
+
)
|
34
|
+
SQL
|
35
|
+
client.query(<<~SQL)
|
36
|
+
INSERT INTO ruby_kafka_ec2_benchmark.events (name, created_at) VALUES ('page_view', '#{Time.now.strftime("%F %T")}')
|
37
|
+
SQL
|
38
|
+
|
39
|
+
kafka = Kafka.new(KAFKA_BROKERS, client_id: KAFKA_CLIENT_ID)
|
40
|
+
|
41
|
+
unless kafka.topics.include?(KAFKA_TOPIC)
|
42
|
+
logger.info "Create the kafka topic '#{KAFKA_TOPIC}'"
|
43
|
+
kafka.create_topic(KAFKA_TOPIC, num_partitions: PARTITION_COUNT, replication_factor: 3)
|
44
|
+
end
|
45
|
+
|
46
|
+
now = Time.now.iso8601(3)
|
47
|
+
message = {
|
48
|
+
events: [{ name: "page_view", timestamp: now }] * 10,
|
49
|
+
}.to_json
|
50
|
+
|
51
|
+
|
52
|
+
logger.info "Producing messages..."
|
53
|
+
|
54
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
55
|
+
pool = Concurrent::FixedThreadPool.new(4)
|
56
|
+
producers = {}
|
57
|
+
current_processed_count = Concurrent::AtomicFixnum.new
|
58
|
+
futures = []
|
59
|
+
MESSAGE_COUNT.times do |i|
|
60
|
+
futures << Concurrent::Future.execute(executor: pool) do
|
61
|
+
producers[Thread.current.object_id] ||= kafka.producer(max_buffer_size: MAX_BUFFER_SIZE)
|
62
|
+
producer = producers[Thread.current.object_id]
|
63
|
+
producer.produce(message,
|
64
|
+
topic: KAFKA_TOPIC,
|
65
|
+
key: i.to_s,
|
66
|
+
partition: i % PARTITION_COUNT,
|
67
|
+
)
|
68
|
+
if producer.buffer_size == MAX_BUFFER_SIZE
|
69
|
+
producer.deliver_messages
|
70
|
+
end
|
71
|
+
processed_count = current_processed_count.increment
|
72
|
+
logger.info "#{processed_count} messages were produced" if (processed_count % 10_000).zero?
|
73
|
+
end
|
74
|
+
|
75
|
+
if futures.size == 10_000
|
76
|
+
futures.each(&:wait!)
|
77
|
+
futures.clear
|
78
|
+
end
|
79
|
+
end
|
80
|
+
futures.each(&:wait!)
|
81
|
+
|
82
|
+
producers.each_value(&:deliver_messages)
|
83
|
+
|
84
|
+
logger.info "Produce FIN messages"
|
85
|
+
producer = kafka.producer
|
86
|
+
PARTITION_COUNT.times do |i|
|
87
|
+
producer.produce("FIN",
|
88
|
+
topic: KAFKA_TOPIC,
|
89
|
+
key: "fin_#{i}",
|
90
|
+
partition: i % PARTITION_COUNT,
|
91
|
+
)
|
92
|
+
end
|
93
|
+
producer.deliver_messages
|
94
|
+
|
95
|
+
duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
96
|
+
logger.info "Complete (duration: #{duration})"
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -eo pipefail
|
4
|
+
|
5
|
+
REPOSITORY=ruby-kafka-ec2/benchmark
|
6
|
+
TASK_FAMILY=ruby-kafka-ec2-benchmark-producer
|
7
|
+
|
8
|
+
repo_uri=$((aws ecr describe-repositories --repository-names $REPOSITORY | jq -r '.repositories[] | .repositoryUri') || true)
|
9
|
+
if [ -z "$repo_uri" ]; then
|
10
|
+
echo "The repository '$REPOSITORY' is not found. Execute register_docker_image.sh first, please." >&2
|
11
|
+
exit
|
12
|
+
fi
|
13
|
+
|
14
|
+
aws ecs register-task-definition --cli-input-json "$(cat <<JSON
|
15
|
+
{
|
16
|
+
"family": "$TASK_FAMILY",
|
17
|
+
"containerDefinitions": [
|
18
|
+
{
|
19
|
+
"name": "produce_messages",
|
20
|
+
"image": "$repo_uri",
|
21
|
+
"command": ["produce_messages.rb"],
|
22
|
+
"essential": true,
|
23
|
+
"environment": [
|
24
|
+
{"name": "KAFKA_BROKERS", "value": "$KAFKA_BROKERS"},
|
25
|
+
{"name": "KAFKA_TOPIC", "value": "$KAFKA_TOPIC"},
|
26
|
+
{"name": "MYSQL_HOST", "value": "$MYSQL_HOST"},
|
27
|
+
{"name": "MYSQL_PASSWORD", "value": "$MYSQL_PASSWORD"}
|
28
|
+
],
|
29
|
+
"logConfiguration": {
|
30
|
+
"logDriver": "awslogs",
|
31
|
+
"options": {
|
32
|
+
"awslogs-group": "/aws/ecs/ruby-kafka-ec2/benchmark",
|
33
|
+
"awslogs-region": "ap-northeast-1",
|
34
|
+
"awslogs-stream-prefix": "produce-messages"
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
],
|
39
|
+
"cpu": "2048",
|
40
|
+
"memory": "4096"
|
41
|
+
}
|
42
|
+
JSON
|
43
|
+
)"
|
44
|
+
|
45
|
+
aws ecs run-task --cluster $CLUSTER --task-definition $TASK_FAMILY
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -eo pipefail
|
4
|
+
|
5
|
+
REPOSITORY=ruby-kafka-ec2/benchmark
|
6
|
+
|
7
|
+
cd $(dirname $(cd $(dirname $0) && pwd))
|
8
|
+
repo_uri=$((aws ecr describe-repositories --repository-names $REPOSITORY | jq -r '.repositories[] | .repositoryUri') || true)
|
9
|
+
if [ -z "$repo_uri" ]; then
|
10
|
+
echo "Create ECR repository '$REPOSITORY'"
|
11
|
+
repo_uri=$(aws ecr create-repository --repository-name $REPOSITORY | jq -r '.repository.repositoryUri')
|
12
|
+
fi
|
13
|
+
|
14
|
+
echo "Build docker image"
|
15
|
+
DOCKER_BUILDKIT=1 docker build -t $REPOSITORY . -f benchmark/Dockerfile
|
16
|
+
|
17
|
+
echo "Push $repo_uri:latest"
|
18
|
+
aws ecr get-login-password | docker login --username AWS --password-stdin $repo_uri
|
19
|
+
docker tag $REPOSITORY:latest $repo_uri:latest
|
20
|
+
docker push $repo_uri:latest
|
@@ -0,0 +1,12 @@
|
|
1
|
+
I, [2020-06-21T21:02:27.530399 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 94.20265760399889, partition_count: 27, processed_count: 135000)
|
2
|
+
I, [2020-06-21T21:02:28.532064 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 106.07931412400012, partition_count: 22, processed_count: 110000)
|
3
|
+
I, [2020-06-21T21:02:28.787485 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 98.62195983100173, partition_count: 30, processed_count: 150000)
|
4
|
+
I, [2020-06-21T21:02:28.838145 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 95.73462492099952, partition_count: 27, processed_count: 135000)
|
5
|
+
I, [2020-06-21T21:02:29.113526 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 79.04582024299998, partition_count: 5, processed_count: 25000)
|
6
|
+
I, [2020-06-21T21:02:29.150897 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 102.10359391399925, partition_count: 7, processed_count: 35000)
|
7
|
+
I, [2020-06-21T21:02:29.181021 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 101.41423055399991, partition_count: 7, processed_count: 35000)
|
8
|
+
I, [2020-06-21T21:02:29.250973 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 101.59283073200277, partition_count: 31, processed_count: 155000)
|
9
|
+
I, [2020-06-21T21:02:29.257868 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 92.3937842589985, partition_count: 6, processed_count: 30000)
|
10
|
+
I, [2020-06-21T21:02:29.255276 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 102.14144639099959, partition_count: 7, processed_count: 35000)
|
11
|
+
I, [2020-06-21T21:02:29.381202 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 105.87376681400019, partition_count: 23, processed_count: 115000)
|
12
|
+
I, [2020-06-21T21:02:29.386796 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 114.40461669500019, partition_count: 8, processed_count: 40000)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
I, [2020-06-21T20:46:00.436023 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 57.33392923200154, partition_count: 17, processed_count: 85000)
|
2
|
+
I, [2020-06-21T20:46:00.539574 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 238.05382897799973, partition_count: 17, processed_count: 85000)
|
3
|
+
I, [2020-06-21T20:46:00.607154 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 238.03822113800015, partition_count: 17, processed_count: 85000)
|
4
|
+
I, [2020-06-21T20:46:00.988614 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 56.78356462699958, partition_count: 17, processed_count: 85000)
|
5
|
+
I, [2020-06-21T20:46:01.308462 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 224.05910681800015, partition_count: 16, processed_count: 80000)
|
6
|
+
I, [2020-06-21T20:46:01.377471 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 237.7210560049998, partition_count: 17, processed_count: 85000)
|
7
|
+
I, [2020-06-21T20:46:01.431828 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 55.10812590900059, partition_count: 16, processed_count: 80000)
|
8
|
+
I, [2020-06-21T20:46:01.929500 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 257.61435206200076, partition_count: 17, processed_count: 85000)
|
9
|
+
I, [2020-06-21T20:46:01.980481 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 258.9417281019996, partition_count: 17, processed_count: 85000)
|
10
|
+
I, [2020-06-21T20:46:02.120333 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 76.57086077900021, partition_count: 17, processed_count: 85000)
|
11
|
+
I, [2020-06-21T20:46:02.319151 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 74.19314250700063, partition_count: 16, processed_count: 80000)
|
12
|
+
I, [2020-06-21T20:46:02.947805 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 54.1334114199999, partition_count: 16, processed_count: 80000)
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "kafka/ec2"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/kafka/ec2.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "kafka/ec2/ext/consumer_group"
|
2
|
+
require "kafka/ec2/ext/protocol/join_group_request"
|
3
|
+
require "kafka/ec2/mixed_instance_assignment_strategy_factory"
|
4
|
+
require "kafka/ec2/version"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
class EC2
|
8
|
+
class << self
|
9
|
+
attr_reader :assignment_strategy_factory
|
10
|
+
|
11
|
+
def with_assignment_strategy_factory(factory)
|
12
|
+
@assignment_strategy_factory = factory
|
13
|
+
yield
|
14
|
+
ensure
|
15
|
+
@assignment_strategy_factory = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def assignment_strategy_classes
|
19
|
+
@assignment_strategy_classes ||= {}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/consumer_group"
|
4
|
+
require "kafka/ec2/mixed_instance_assignment_strategy"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
class EC2
|
8
|
+
module Ext
|
9
|
+
module ConsumerGroup
|
10
|
+
def initialize(*args, **kwargs)
|
11
|
+
super
|
12
|
+
if Kafka::EC2.assignment_strategy_factory
|
13
|
+
@assignment_strategy = Kafka::EC2.assignment_strategy_factory.create(cluster: @cluster)
|
14
|
+
end
|
15
|
+
Kafka::EC2.assignment_strategy_classes[@group_id] = @assignment_strategy.class
|
16
|
+
end
|
17
|
+
|
18
|
+
def join_group
|
19
|
+
super
|
20
|
+
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
21
|
+
@assignment_strategy.member_id_to_metadata = @members
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module Kafka
|
30
|
+
class ConsumerGroup
|
31
|
+
prepend Kafka::EC2::Ext::ConsumerGroup
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "net/http"
|
4
|
+
|
5
|
+
require "kafka/protocol/consumer_group_protocol"
|
6
|
+
require "kafka/protocol/join_group_request"
|
7
|
+
|
8
|
+
module Kafka
|
9
|
+
class EC2
|
10
|
+
module Ext
|
11
|
+
module Protocol
|
12
|
+
module JoinGroupRequest
|
13
|
+
def initialize(*args, topics:, **kwargs)
|
14
|
+
super
|
15
|
+
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
16
|
+
user_data = Net::HTTP.start("169.254.169.254", 80) do |http|
|
17
|
+
instance_id = http.get("/latest/meta-data/instance-id").body
|
18
|
+
instance_type = http.get("/latest/meta-data/instance-type").body
|
19
|
+
az = http.get("/latest/meta-data/placement/availability-zone").body
|
20
|
+
"|#{instance_id},#{instance_type},#{az}"
|
21
|
+
end
|
22
|
+
@group_protocols = {
|
23
|
+
"mixedinstance" => Kafka::Protocol::ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Kafka
|
34
|
+
module Protocol
|
35
|
+
class JoinGroupRequest
|
36
|
+
prepend Kafka::EC2::Ext::Protocol::JoinGroupRequest
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka"
|
4
|
+
require "kafka/protocol/member_assignment"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
class EC2
|
8
|
+
class MixedInstanceAssignmentStrategy
|
9
|
+
# metadata is a byte sequence created by Kafka::Protocol::ConsumerGroupProtocol.encode
|
10
|
+
attr_accessor :member_id_to_metadata
|
11
|
+
|
12
|
+
# @param cluster [Kafka::Cluster]
|
13
|
+
# @param instance_family_weights [Hash{String => Numeric}, Proc] a hash whose the key
|
14
|
+
# is the instance family and whose value is the weight. If the object is a proc,
|
15
|
+
# it must returns such a hash and the proc is called every time the method "assign"
|
16
|
+
# is called.
|
17
|
+
# @param availability_zone_weights [Hash{String => Numeric}, Proc] a hash whose the key
|
18
|
+
# is the availability zone and whose value is the weight. If the object is a proc,
|
19
|
+
# it must returns such a hash and the proc is called every time the method "assign"
|
20
|
+
# is called.
|
21
|
+
def initialize(cluster:, instance_family_weights:, availability_zone_weights:)
|
22
|
+
@cluster = cluster
|
23
|
+
@instance_family_weights = instance_family_weights
|
24
|
+
@availability_zone_weights = availability_zone_weights
|
25
|
+
end
|
26
|
+
|
27
|
+
# Assign the topic partitions to the group members.
|
28
|
+
#
|
29
|
+
# @param members [Array<String>] member ids
|
30
|
+
# @param topics [Array<String>] topics
|
31
|
+
# @return [Hash{String => Protocol::MemberAssignment}] a hash mapping member
|
32
|
+
# ids to assignments.
|
33
|
+
def assign(members:, topics:)
|
34
|
+
group_assignment = {}
|
35
|
+
instance_id_to_capacity = Hash.new(0)
|
36
|
+
instance_id_to_member_ids = Hash.new { |h, k| h[k] = [] }
|
37
|
+
total_capacity = 0
|
38
|
+
|
39
|
+
instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
|
40
|
+
az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
|
41
|
+
members.each do |member_id|
|
42
|
+
group_assignment[member_id] = Protocol::MemberAssignment.new
|
43
|
+
|
44
|
+
instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
|
45
|
+
instance_id_to_member_ids[instance_id] << member_id
|
46
|
+
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
47
|
+
instance_id_to_capacity[instance_id] += capacity
|
48
|
+
total_capacity += capacity
|
49
|
+
end
|
50
|
+
|
51
|
+
topic_partitions = topics.flat_map do |topic|
|
52
|
+
begin
|
53
|
+
partitions = @cluster.partitions_for(topic).map(&:partition_id)
|
54
|
+
rescue UnknownTopicOrPartition
|
55
|
+
raise UnknownTopicOrPartition, "unknown topic #{topic}"
|
56
|
+
end
|
57
|
+
Array.new(partitions.count) { topic }.zip(partitions)
|
58
|
+
end
|
59
|
+
|
60
|
+
partition_count_per_capacity = topic_partitions.size / total_capacity
|
61
|
+
last_index = 0
|
62
|
+
instance_id_to_capacity.sort_by { |_, capacity| -capacity }.each do |instance_id, capacity|
|
63
|
+
partition_count = (capacity * partition_count_per_capacity).round
|
64
|
+
member_ids = instance_id_to_member_ids[instance_id]
|
65
|
+
topic_partitions[last_index, partition_count]&.each_with_index do |(topic, partition), index|
|
66
|
+
member_id = member_ids[index % member_ids.size]
|
67
|
+
group_assignment[member_id].assign(topic, [partition])
|
68
|
+
end
|
69
|
+
|
70
|
+
last_index += partition_count
|
71
|
+
end
|
72
|
+
|
73
|
+
if last_index < topic_partitions.size
|
74
|
+
member_ids = instance_id_to_member_ids.values.flatten
|
75
|
+
topic_partitions[last_index, topic_partitions.size].each_with_index do |(topic, partition), index|
|
76
|
+
member_id = member_ids[index % member_ids.size]
|
77
|
+
group_assignment[member_id].assign(topic, [partition])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
group_assignment
|
82
|
+
rescue Kafka::LeaderNotAvailable
|
83
|
+
sleep 1
|
84
|
+
retry
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
90
|
+
instance_family, _ = instance_type.split(".")
|
91
|
+
instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/ec2/mixed_instance_assignment_strategy"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class EC2
|
7
|
+
class MixedInstanceAssignmentStrategyFactory
|
8
|
+
# @param instance_family_weights [Hash, Proc]
|
9
|
+
# @param availability_zone_weights [Hash, Proc]
|
10
|
+
def initialize(instance_family_weights: {}, availability_zone_weights: {})
|
11
|
+
@instance_family_weights = instance_family_weights
|
12
|
+
@availability_zone_weights = availability_zone_weights
|
13
|
+
end
|
14
|
+
|
15
|
+
def create(cluster:)
|
16
|
+
Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
17
|
+
cluster: cluster,
|
18
|
+
instance_family_weights: @instance_family_weights,
|
19
|
+
availability_zone_weights: @availability_zone_weights,
|
20
|
+
)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require_relative 'lib/kafka/ec2/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "ruby-kafka-ec2"
|
5
|
+
spec.version = Kafka::EC2::VERSION
|
6
|
+
spec.authors = ["abicky"]
|
7
|
+
spec.email = ["takeshi.arabiki@gmail.com"]
|
8
|
+
|
9
|
+
spec.summary = %q{An extension of ruby-kafka for EC2}
|
10
|
+
spec.description = %q{Kafka::EC2 is an extension of ruby-kafka that provides useful features for EC2 like Kafka::EC2::MixedInstanceAssignmentStrategy.}
|
11
|
+
spec.homepage = "https://github.com/abicky/ruby-kafka-ec2"
|
12
|
+
spec.license = "MIT"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
+
|
18
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
|
25
|
+
spec.add_runtime_dependency "ruby-kafka", ">= 0.7", "< 2"
|
26
|
+
|
27
|
+
spec.add_development_dependency "webmock"
|
28
|
+
spec.add_development_dependency "concurrent-ruby" # cf. https://github.com/zendesk/ruby-kafka/pull/835
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-kafka-ec2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- abicky
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-06-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ruby-kafka
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.7'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '2'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.7'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: webmock
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :development
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: concurrent-ruby
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
description: Kafka::EC2 is an extension of ruby-kafka that provides useful features
|
62
|
+
for EC2 like Kafka::EC2::MixedInstanceAssignmentStrategy.
|
63
|
+
email:
|
64
|
+
- takeshi.arabiki@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- ".github/workflows/test.yml"
|
70
|
+
- ".gitignore"
|
71
|
+
- ".rspec"
|
72
|
+
- Gemfile
|
73
|
+
- LICENSE.txt
|
74
|
+
- README.md
|
75
|
+
- Rakefile
|
76
|
+
- benchmark/.gitignore
|
77
|
+
- benchmark/Dockerfile
|
78
|
+
- benchmark/Dockerfile.dockerignore
|
79
|
+
- benchmark/Gemfile
|
80
|
+
- benchmark/README.md
|
81
|
+
- benchmark/aws.tf
|
82
|
+
- benchmark/consume_messages.rb
|
83
|
+
- benchmark/consume_messages.sh
|
84
|
+
- benchmark/produce_messages.rb
|
85
|
+
- benchmark/produce_messages.sh
|
86
|
+
- benchmark/register_docker_image.sh
|
87
|
+
- benchmark/ruby-kafka-ec2.log
|
88
|
+
- benchmark/ruby-kafka.log
|
89
|
+
- bin/console
|
90
|
+
- bin/setup
|
91
|
+
- lib/kafka/ec2.rb
|
92
|
+
- lib/kafka/ec2/ext/consumer_group.rb
|
93
|
+
- lib/kafka/ec2/ext/protocol/join_group_request.rb
|
94
|
+
- lib/kafka/ec2/mixed_instance_assignment_strategy.rb
|
95
|
+
- lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb
|
96
|
+
- lib/kafka/ec2/version.rb
|
97
|
+
- ruby-kafka-ec2.gemspec
|
98
|
+
homepage: https://github.com/abicky/ruby-kafka-ec2
|
99
|
+
licenses:
|
100
|
+
- MIT
|
101
|
+
metadata:
|
102
|
+
homepage_uri: https://github.com/abicky/ruby-kafka-ec2
|
103
|
+
source_code_uri: https://github.com/abicky/ruby-kafka-ec2
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 2.3.0
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirements: []
|
119
|
+
rubygems_version: 3.1.2
|
120
|
+
signing_key:
|
121
|
+
specification_version: 4
|
122
|
+
summary: An extension of ruby-kafka for EC2
|
123
|
+
test_files: []
|