ruby-kafka-ec2 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +22 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +93 -0
- data/Rakefile +6 -0
- data/benchmark/.gitignore +4 -0
- data/benchmark/Dockerfile +13 -0
- data/benchmark/Dockerfile.dockerignore +1 -0
- data/benchmark/Gemfile +11 -0
- data/benchmark/README.md +99 -0
- data/benchmark/aws.tf +135 -0
- data/benchmark/consume_messages.rb +106 -0
- data/benchmark/consume_messages.sh +47 -0
- data/benchmark/produce_messages.rb +96 -0
- data/benchmark/produce_messages.sh +45 -0
- data/benchmark/register_docker_image.sh +20 -0
- data/benchmark/ruby-kafka-ec2.log +12 -0
- data/benchmark/ruby-kafka.log +12 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/kafka/ec2.rb +23 -0
- data/lib/kafka/ec2/ext/consumer_group.rb +33 -0
- data/lib/kafka/ec2/ext/protocol/join_group_request.rb +39 -0
- data/lib/kafka/ec2/mixed_instance_assignment_strategy.rb +95 -0
- data/lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb +24 -0
- data/lib/kafka/ec2/version.rb +5 -0
- data/ruby-kafka-ec2.gemspec +29 -0
- metadata +123 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a216c0064e93662929aa08a27aee3caad6eb7d4a7eb02a658b6edef1ced9fa33
|
4
|
+
data.tar.gz: 5982cfb402ea097fbc45580b3bf17c88d1ef6fef4c9f3fc3c06b71c0ba9fd8f7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 47ef0c231763ba3b9c8ee95417eba748a205e8791bd973bdebdb7e4b30aa103f32c965fbbc47cc8aa157f6ae80bd8e4f141ac153264eb3514accb569a375f8d5
|
7
|
+
data.tar.gz: '009ad55259d086a25252b8fd0aa6f9f3eecdbf0c25b79ac00c07a920b28fe7d4d1758043e7ef804364e56c869bd57e51b51690b44c16745fba45dc12ec2bdb40'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
pull_request:
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
build:
|
9
|
+
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
|
12
|
+
steps:
|
13
|
+
- uses: actions/checkout@v2
|
14
|
+
- name: Set up Ruby 2.6
|
15
|
+
uses: actions/setup-ruby@v1
|
16
|
+
with:
|
17
|
+
ruby-version: 2.6.x
|
18
|
+
- name: Build and test with Rake
|
19
|
+
run: |
|
20
|
+
gem install bundler
|
21
|
+
bundle install --jobs 4 --retry 3
|
22
|
+
bundle exec rake
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2020 abicky
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
# ruby-kafka-ec2
|
2
|
+
|
3
|
+

|
4
|
+
|
5
|
+
ruby-kafka-ec2 is an extension of ruby-kafka that provides useful features for EC2 like Kafka::EC2::MixedInstanceAssignmentStrategy.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'ruby-kafka-ec2'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle install
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install ruby-kafka-ec2
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
### Kafka::EC2::MixedInstanceAssignmentStrategy
|
26
|
+
|
27
|
+
`Kafka::EC2::MixedInstanceAssignmentStrategy` is an assignor for auto-scaling groups with mixed instance policies. The throughputs of consumers usually depend on instance families and availability zones. For example, if your application writes data to a database, the throughputs of consumers running on the same availability zone as the writer DB instance is higher.
|
28
|
+
|
29
|
+
To assign more partitions to consumers with high throughputs, you have to define `Kafka::EC2::MixedInstanceAssignmentStrategyFactory` first like below:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
require "aws-sdk-rds"
|
33
|
+
require "kafka"
|
34
|
+
require "kafka/ec2"
|
35
|
+
|
36
|
+
rds = Aws::RDS::Client.new(region: "ap-northeast-1")
|
37
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
38
|
+
instance_family_weights: {
|
39
|
+
"r4" => 1.00,
|
40
|
+
"r5" => 1.20,
|
41
|
+
"m5" => 1.35,
|
42
|
+
"c5" => 1.50,
|
43
|
+
},
|
44
|
+
availability_zone_weights: ->() {
|
45
|
+
db_cluster = rds.describe_db_clusters(filters: [
|
46
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
47
|
+
]).db_clusters.first
|
48
|
+
db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
|
49
|
+
db_instance = rds.describe_db_instances(filters: [
|
50
|
+
{ name: "db-cluster-id", values: [ENV["RDS_CLUSTER"]] },
|
51
|
+
{ name: "db-instance-id", values: [db_instance_id] },
|
52
|
+
]).db_instances.first
|
53
|
+
|
54
|
+
if db_instance.availability_zone == "ap-northeast-1a"
|
55
|
+
{
|
56
|
+
"ap-northeast-1a" => 1,
|
57
|
+
"ap-northeast-1c" => 0.25,
|
58
|
+
}
|
59
|
+
else
|
60
|
+
{
|
61
|
+
"ap-northeast-1a" => 0.25,
|
62
|
+
"ap-northeast-1c" => 1,
|
63
|
+
}
|
64
|
+
end
|
65
|
+
},
|
66
|
+
)
|
67
|
+
```
|
68
|
+
|
69
|
+
In the preceding example, consumers running on c5 instances will have 1.5x as many partitions compared to consumers running on r4 instances. In a similar way, if the writer DB instance is in ap-northeast-1a, consumers in ap-northeast-1a will have 4x as many partitions compared to consumers in ap-northeast-1c.
|
70
|
+
|
71
|
+
You can use `Kafka::EC2::MixedInstanceAssignmentStrategy` by specifying the factory to `Kafka::EC2.with_assignment_strategy_factory` and creating a consumer in the block:
|
72
|
+
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_factory) do
|
76
|
+
kafka.consumer(group_id: ENV["KAFKA_CONSUMER_GROUP_ID"])
|
77
|
+
end
|
78
|
+
```
|
79
|
+
|
80
|
+
## Development
|
81
|
+
|
82
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
83
|
+
|
84
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
85
|
+
|
86
|
+
## Contributing
|
87
|
+
|
88
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/abicky/ruby-kafka-ec2.
|
89
|
+
|
90
|
+
|
91
|
+
## License
|
92
|
+
|
93
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
FROM ruby:2.7
|
2
|
+
|
3
|
+
WORKDIR /work
|
4
|
+
|
5
|
+
COPY Gemfile ruby-kafka-ec2.gemspec /tmp/ruby-kafka-ec2/
|
6
|
+
COPY lib /tmp/ruby-kafka-ec2/lib/
|
7
|
+
|
8
|
+
COPY benchmark/Gemfile /work/
|
9
|
+
RUN bundle install -j4
|
10
|
+
|
11
|
+
COPY benchmark/produce_messages.rb benchmark/consume_messages.rb /work/
|
12
|
+
|
13
|
+
ENTRYPOINT ["bundle", "exec", "ruby"]
|
@@ -0,0 +1 @@
|
|
1
|
+
.terraform
|
data/benchmark/Gemfile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
6
|
+
|
7
|
+
gem "ruby-kafka"
|
8
|
+
gem "concurrent-ruby" # cf. https://github.com/zendesk/ruby-kafka/pull/835
|
9
|
+
gem "mysql2"
|
10
|
+
gem "ruby-kafka-ec2", path: "/tmp/ruby-kafka-ec2"
|
11
|
+
gem "aws-sdk-rds"
|
data/benchmark/README.md
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# Benchmark
|
2
|
+
|
3
|
+
## Procedures
|
4
|
+
|
5
|
+
### 1. Create AWS resources
|
6
|
+
|
7
|
+
```
|
8
|
+
terraform init
|
9
|
+
terraform apply
|
10
|
+
```
|
11
|
+
|
12
|
+
### 2. Register docker image
|
13
|
+
|
14
|
+
```
|
15
|
+
./register_docker_image.sh
|
16
|
+
```
|
17
|
+
|
18
|
+
### 3. Produce messages and create database records
|
19
|
+
|
20
|
+
Set the environment variables `KAFKA_BROKERS`, `KAFKA_TOPIC`, `MYSQL_HOST`, `MYSQL_PASSWORD`, and `CLUSTER`, and execute the following command:
|
21
|
+
|
22
|
+
```
|
23
|
+
./produce_messages.sh
|
24
|
+
```
|
25
|
+
|
26
|
+
### 4. Consume messages
|
27
|
+
|
28
|
+
Set the environment variables `KAFKA_BROKERS`, `KAFKA_TOPIC`, `MYSQL_HOST`, `MYSQL_PASSWORD`, and `CLUSTER`, and execute the following command:
|
29
|
+
|
30
|
+
```
|
31
|
+
USE_KAFKA_EC2=false ./consume_messages.sh
|
32
|
+
```
|
33
|
+
|
34
|
+
Stop all the tasks if all lags become 0. You can check the lags by executing the following command in the kafka client instance:
|
35
|
+
|
36
|
+
```
|
37
|
+
./kafka-consumer-groups.sh \
|
38
|
+
--bootstrap-server <bootstrap-server> \
|
39
|
+
--describe \
|
40
|
+
--group net.abicky.ruby-kafka-ec2.benchmark
|
41
|
+
```
|
42
|
+
|
43
|
+
Reset the offsets in the kafka client instance:
|
44
|
+
|
45
|
+
```
|
46
|
+
./kafka-consumer-groups.sh \
|
47
|
+
--bootstrap-server <bootstrap-server> \
|
48
|
+
--group net.abicky.ruby-kafka-ec2.benchmark \
|
49
|
+
--reset-offsets \
|
50
|
+
--to-earliest \
|
51
|
+
--topic <topic> \
|
52
|
+
--execute
|
53
|
+
```
|
54
|
+
|
55
|
+
Set the environment variables `KAFKA_BROKERS`, `KAFKA_TOPIC`, `MYSQL_HOST`, `MYSQL_PASSWORD`, and `CLUSTER`, and execute the following command:
|
56
|
+
|
57
|
+
```
|
58
|
+
USE_KAFKA_EC2=true ./consume_messages.sh
|
59
|
+
```
|
60
|
+
|
61
|
+
## Result
|
62
|
+
|
63
|
+
### USE_KAFKA_EC2=false
|
64
|
+
|
65
|
+
No. | instance type | availability zone | partition count | processed count | duration (sec)
|
66
|
+
----|---------------|-------------------|-----------------|-----------------|-----------
|
67
|
+
1 | m5.large | ap-northeast-1a | 16 | 80000 | 224.1
|
68
|
+
2 | m5.large | ap-northeast-1a | 17 | 85000 | 237.7
|
69
|
+
3 | m5.large | ap-northeast-1c | 17 | 85000 | 56.8
|
70
|
+
4 | m5.large | ap-northeast-1c | 17 | 85000 | 57.3
|
71
|
+
5 | r4.large | ap-northeast-1a | 16 | 80000 | 257.6
|
72
|
+
6 | r4.large | ap-northeast-1a | 17 | 85000 | 258.9
|
73
|
+
7 | r4.large | ap-northeast-1c | 16 | 80000 | 74.2
|
74
|
+
8 | r4.large | ap-northeast-1c | 17 | 85000 | 76.6
|
75
|
+
9 | r5.large | ap-northeast-1a | 17 | 80000 | 238.0
|
76
|
+
10 | r5.large | ap-northeast-1a | 17 | 85000 | 238.1
|
77
|
+
11 | r5.large | ap-northeast-1c | 16 | 80000 | 54.1
|
78
|
+
12 | r5.large | ap-northeast-1c | 16 | 85000 | 55.1
|
79
|
+
|
80
|
+
See ruby-kafka.log for more details.
|
81
|
+
|
82
|
+
### USE_KAFKA_EC2=true
|
83
|
+
|
84
|
+
No. | instance type | availability zone | partition count | processed count | duration (sec)
|
85
|
+
----|---------------|-------------------|-----------------|-----------------|-----------
|
86
|
+
1 | m5.large | ap-northeast-1a | 7 | 35000 | 101.4
|
87
|
+
2 | m5.large | ap-northeast-1a | 8 | 40000 | 114.4
|
88
|
+
3 | m5.large | ap-northeast-1c | 31 | 155000 | 101.6
|
89
|
+
4 | m5.large | ap-northeast-1c | 30 | 150000 | 98.6
|
90
|
+
5 | r4.large | ap-northeast-1a | 5 | 25000 | 79.0
|
91
|
+
6 | r4.large | ap-northeast-1a | 6 | 30000 | 92.4
|
92
|
+
7 | r4.large | ap-northeast-1c | 23 | 115000 | 105.9
|
93
|
+
8 | r4.large | ap-northeast-1c | 22 | 110000 | 106.1
|
94
|
+
9 | r5.large | ap-northeast-1a | 7 | 35000 | 102.1
|
95
|
+
10 | r5.large | ap-northeast-1a | 7 | 35000 | 102.1
|
96
|
+
11 | r5.large | ap-northeast-1c | 27 | 135000 | 94.2
|
97
|
+
12 | r5.large | ap-northeast-1c | 27 | 135000 | 95.7
|
98
|
+
|
99
|
+
See ruby-kafka-ec2.log for more details.
|
data/benchmark/aws.tf
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
provider "aws" {
|
2
|
+
region = "ap-northeast-1"
|
3
|
+
}
|
4
|
+
|
5
|
+
variable "vpc_id" {}
|
6
|
+
variable "ec2_instance_security_group_id" {}
|
7
|
+
variable "ec2_key_name" {}
|
8
|
+
variable "rds_master_password" {}
|
9
|
+
variable "kafka_broker_subnet_ids" {}
|
10
|
+
|
11
|
+
# RDS
|
12
|
+
|
13
|
+
resource "aws_security_group" "rds" {
|
14
|
+
name = "ruby-kafka-ec2-rds"
|
15
|
+
vpc_id = var.vpc_id
|
16
|
+
|
17
|
+
egress {
|
18
|
+
from_port = 0
|
19
|
+
to_port = 0
|
20
|
+
protocol = "-1"
|
21
|
+
cidr_blocks = ["0.0.0.0/0"]
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
resource "aws_security_group_rule" "rds_access_from_ec2" {
|
26
|
+
security_group_id = aws_security_group.rds.id
|
27
|
+
type = "ingress"
|
28
|
+
from_port = 3306
|
29
|
+
to_port = 3306
|
30
|
+
protocol = "tcp"
|
31
|
+
source_security_group_id = var.ec2_instance_security_group_id
|
32
|
+
}
|
33
|
+
|
34
|
+
|
35
|
+
resource "aws_rds_cluster" "benchmark" {
|
36
|
+
cluster_identifier = "ruby-kafka-ec2-benchmark"
|
37
|
+
engine = "aurora"
|
38
|
+
engine_mode = "provisioned"
|
39
|
+
engine_version = "5.6.10a"
|
40
|
+
availability_zones = ["ap-northeast-1a", "ap-northeast-1c"]
|
41
|
+
master_username = "admin"
|
42
|
+
master_password = var.rds_master_password
|
43
|
+
backup_retention_period = 1
|
44
|
+
skip_final_snapshot = true
|
45
|
+
vpc_security_group_ids = [aws_security_group.rds.id]
|
46
|
+
}
|
47
|
+
|
48
|
+
resource "aws_rds_cluster_instance" "instances" {
|
49
|
+
count = 1
|
50
|
+
identifier = "${aws_rds_cluster.benchmark.cluster_identifier}-${count.index}"
|
51
|
+
cluster_identifier = aws_rds_cluster.benchmark.id
|
52
|
+
instance_class = "db.r5.large"
|
53
|
+
}
|
54
|
+
|
55
|
+
# Kafka
|
56
|
+
|
57
|
+
data "aws_kms_key" "kafka" {
|
58
|
+
key_id = "alias/aws/kafka"
|
59
|
+
}
|
60
|
+
|
61
|
+
resource "aws_security_group" "kafka_cluster" {
|
62
|
+
name = "ruby-kafka-ec2-kafka-cluster"
|
63
|
+
vpc_id = var.vpc_id
|
64
|
+
|
65
|
+
egress {
|
66
|
+
from_port = 0
|
67
|
+
to_port = 0
|
68
|
+
protocol = "-1"
|
69
|
+
cidr_blocks = ["0.0.0.0/0"]
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
resource "aws_security_group_rule" "kafka_cluster_from_ec2" {
|
74
|
+
security_group_id = aws_security_group.kafka_cluster.id
|
75
|
+
type = "ingress"
|
76
|
+
from_port = 0
|
77
|
+
to_port = 0
|
78
|
+
protocol = "-1"
|
79
|
+
source_security_group_id = var.ec2_instance_security_group_id
|
80
|
+
}
|
81
|
+
|
82
|
+
resource "aws_msk_cluster" "benchmark" {
|
83
|
+
cluster_name = "ruby-kafka-ec2-benchmark"
|
84
|
+
kafka_version = "2.2.1"
|
85
|
+
number_of_broker_nodes = 3
|
86
|
+
|
87
|
+
broker_node_group_info {
|
88
|
+
instance_type = "kafka.m5.large"
|
89
|
+
ebs_volume_size = "100"
|
90
|
+
client_subnets = var.kafka_broker_subnet_ids
|
91
|
+
security_groups = [aws_security_group.kafka_cluster.id]
|
92
|
+
}
|
93
|
+
|
94
|
+
encryption_info {
|
95
|
+
encryption_at_rest_kms_key_arn = data.aws_kms_key.kafka.arn
|
96
|
+
|
97
|
+
encryption_in_transit {
|
98
|
+
client_broker = "TLS_PLAINTEXT"
|
99
|
+
in_cluster = true
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
data "aws_ami" "most_recent_amazon_linux2" {
|
105
|
+
most_recent = true
|
106
|
+
owners = ["amazon"]
|
107
|
+
|
108
|
+
filter {
|
109
|
+
name = "name"
|
110
|
+
values = ["amzn2-ami-hvm-2.0.*-x86_64-gp2"]
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
resource "aws_instance" "kafka_client" {
|
115
|
+
ami = data.aws_ami.most_recent_amazon_linux2.image_id
|
116
|
+
instance_type = "t3.nano"
|
117
|
+
key_name = var.ec2_key_name
|
118
|
+
|
119
|
+
subnet_id = var.kafka_broker_subnet_ids[0]
|
120
|
+
vpc_security_group_ids = [var.ec2_instance_security_group_id]
|
121
|
+
|
122
|
+
associate_public_ip_address = true
|
123
|
+
|
124
|
+
# cf. https://docs.aws.amazon.com/msk/latest/developerguide/create-client-machine.html
|
125
|
+
user_data = <<EOF
|
126
|
+
#!/bin/bash
|
127
|
+
yum install -y java-1.8.0
|
128
|
+
wget https://archive.apache.org/dist/kafka/2.2.1/kafka_2.12-2.2.1.tgz
|
129
|
+
tar -xzf kafka_2.12-2.2.1.tgz
|
130
|
+
EOF
|
131
|
+
|
132
|
+
tags = {
|
133
|
+
Name = "kafka-client"
|
134
|
+
}
|
135
|
+
}
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require "json"
|
2
|
+
require "net/http"
|
3
|
+
require "time"
|
4
|
+
|
5
|
+
require "kafka"
|
6
|
+
require "concurrent/map" # cf. https://github.com/zendesk/ruby-kafka/pull/835
|
7
|
+
require "mysql2"
|
8
|
+
|
9
|
+
KAFKA_BROKERS = ENV.fetch("KAFKA_BROKERS", "localhost:9092").split(/\p{Space}*,\p{Space}*/)
|
10
|
+
KAFKA_CLIENT_ID = "net.abicky.ruby-kafka-ec2"
|
11
|
+
KAFKA_CONSUMER_GROUP_ID = "net.abicky.ruby-kafka-ec2.benchmark"
|
12
|
+
KAFKA_TOPIC = ENV.fetch("KAFKA_TOPIC") do
|
13
|
+
raise 'The environment variable "KAFKA_TOPIC" must be specified'
|
14
|
+
end
|
15
|
+
|
16
|
+
$stdout.sync = true
|
17
|
+
logger = Logger.new($stdout)
|
18
|
+
|
19
|
+
kafka = Kafka.new(KAFKA_BROKERS, client_id: KAFKA_CLIENT_ID)
|
20
|
+
if ENV["USE_KAFKA_EC2"] == "true"
|
21
|
+
logger.info "Use ruby-kafka-ec2"
|
22
|
+
require "aws-sdk-rds"
|
23
|
+
require "kafka/ec2"
|
24
|
+
|
25
|
+
rds = Aws::RDS::Client.new(region: "ap-northeast-1")
|
26
|
+
assignment_strategy_factory = Kafka::EC2::MixedInstanceAssignmentStrategyFactory.new(
|
27
|
+
instance_family_weights: {
|
28
|
+
"r4" => 1.00,
|
29
|
+
"r5" => 1.20,
|
30
|
+
"m5" => 1.35,
|
31
|
+
},
|
32
|
+
availability_zone_weights: ->() {
|
33
|
+
db_cluster = rds.describe_db_clusters(filters: [
|
34
|
+
{ name: "db-cluster-id", values: ["ruby-kafka-ec2-benchmark"] },
|
35
|
+
]).db_clusters.first
|
36
|
+
db_instance_id = db_cluster.db_cluster_members.find { |m| m.is_cluster_writer }.db_instance_identifier
|
37
|
+
db_instance = rds.describe_db_instances(filters: [
|
38
|
+
{ name: "db-cluster-id", values: ["ruby-kafka-ec2-benchmark"] },
|
39
|
+
{ name: "db-instance-id", values: [db_instance_id] },
|
40
|
+
]).db_instances.first
|
41
|
+
|
42
|
+
if db_instance.availability_zone == "ap-northeast-1a"
|
43
|
+
{
|
44
|
+
"ap-northeast-1a" => 1,
|
45
|
+
"ap-northeast-1c" => 0.25,
|
46
|
+
}
|
47
|
+
else
|
48
|
+
{
|
49
|
+
"ap-northeast-1a" => 0.25,
|
50
|
+
"ap-northeast-1c" => 1,
|
51
|
+
}
|
52
|
+
end
|
53
|
+
},
|
54
|
+
)
|
55
|
+
consumer = Kafka::EC2.with_assignment_strategy_factory(assignment_strategy_factory) do
|
56
|
+
kafka.consumer(group_id: KAFKA_CONSUMER_GROUP_ID)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
logger.info "Use ruby-kafka"
|
60
|
+
consumer = kafka.consumer(group_id: KAFKA_CONSUMER_GROUP_ID)
|
61
|
+
end
|
62
|
+
|
63
|
+
consumer.subscribe(KAFKA_TOPIC)
|
64
|
+
|
65
|
+
trap(:TERM) { consumer.stop }
|
66
|
+
|
67
|
+
metadata = Net::HTTP.start("169.254.169.254", 80) do |http|
|
68
|
+
{
|
69
|
+
instance_id: http.get("/latest/meta-data/instance-id").body,
|
70
|
+
instance_type: http.get("/latest/meta-data/instance-type").body,
|
71
|
+
availability_zone: http.get("/latest/meta-data/placement/availability-zone").body,
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
client = Mysql2::Client.new(
|
76
|
+
host: ENV["MYSQL_HOST"] || "localhost",
|
77
|
+
port: 3306,
|
78
|
+
username: "admin",
|
79
|
+
password: ENV["MYSQL_PASSWORD"],
|
80
|
+
)
|
81
|
+
|
82
|
+
logger.info "[#{metadata}] Consuming messages..."
|
83
|
+
|
84
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
85
|
+
processed_count = 0
|
86
|
+
partition_count = 0
|
87
|
+
end_time = nil
|
88
|
+
consumer.each_message do |message|
|
89
|
+
if message.value == "FIN"
|
90
|
+
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
91
|
+
partition_count += 1
|
92
|
+
logger.info "[#{metadata}] Received FIN message"
|
93
|
+
next
|
94
|
+
end
|
95
|
+
|
96
|
+
JSON.parse(message.value)["events"].each do |event|
|
97
|
+
Time.iso8601(event["timestamp"])
|
98
|
+
end
|
99
|
+
client.query("SELECT * FROM ruby_kafka_ec2_benchmark.events").to_a
|
100
|
+
|
101
|
+
processed_count += 1
|
102
|
+
logger.info "[#{metadata}] #{processed_count} messages were consumed" if (processed_count % 10_000).zero?
|
103
|
+
end
|
104
|
+
|
105
|
+
duration = end_time - start_time
|
106
|
+
logger.info "[#{metadata}] Complete (duration: #{duration}, partition_count: #{partition_count}, processed_count: #{processed_count})"
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -eo pipefail
|
4
|
+
|
5
|
+
REPOSITORY=ruby-kafka-ec2/benchmark
|
6
|
+
TASK_FAMILY=ruby-kafka-ec2-benchmark-consumer
|
7
|
+
|
8
|
+
repo_uri=$((aws ecr describe-repositories --repository-names $REPOSITORY | jq -r '.repositories[] | .repositoryUri') || true)
|
9
|
+
if [ -z "$repo_uri" ]; then
|
10
|
+
echo "The repository '$REPOSITORY' is not found. Execute register_docker_image.sh first, please." >&2
|
11
|
+
exit
|
12
|
+
fi
|
13
|
+
|
14
|
+
aws ecs register-task-definition --cli-input-json "$(cat <<JSON
|
15
|
+
{
|
16
|
+
"family": "$TASK_FAMILY",
|
17
|
+
"containerDefinitions": [
|
18
|
+
{
|
19
|
+
"name": "consume_messages",
|
20
|
+
"image": "$repo_uri",
|
21
|
+
"command": ["consume_messages.rb"],
|
22
|
+
"essential": true,
|
23
|
+
"environment": [
|
24
|
+
{"name": "KAFKA_BROKERS", "value": "$KAFKA_BROKERS"},
|
25
|
+
{"name": "KAFKA_TOPIC", "value": "$KAFKA_TOPIC"},
|
26
|
+
{"name": "MYSQL_HOST", "value": "$MYSQL_HOST"},
|
27
|
+
{"name": "MYSQL_PASSWORD", "value": "$MYSQL_PASSWORD"},
|
28
|
+
{"name": "USE_KAFKA_EC2", "value": "$USE_KAFKA_EC2"}
|
29
|
+
],
|
30
|
+
"logConfiguration": {
|
31
|
+
"logDriver": "awslogs",
|
32
|
+
"options": {
|
33
|
+
"awslogs-group": "/aws/ecs/ruby-kafka-ec2/benchmark",
|
34
|
+
"awslogs-region": "ap-northeast-1",
|
35
|
+
"awslogs-stream-prefix": "consume-messages"
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
],
|
40
|
+
"cpu": "1024",
|
41
|
+
"memory": "2048"
|
42
|
+
}
|
43
|
+
JSON
|
44
|
+
)"
|
45
|
+
|
46
|
+
aws ecs run-task --cluster $CLUSTER --task-definition $TASK_FAMILY --count 10
|
47
|
+
aws ecs run-task --cluster $CLUSTER --task-definition $TASK_FAMILY --count 2
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require "json"
|
2
|
+
require "time"
|
3
|
+
|
4
|
+
require "concurrent"
|
5
|
+
require "kafka"
|
6
|
+
require "mysql2"
|
7
|
+
|
8
|
+
KAFKA_BROKERS = ENV.fetch("KAFKA_BROKERS", "localhost:9092").split(/\p{Space}*,\p{Space}*/)
|
9
|
+
KAFKA_CLIENT_ID = "net.abicky.ruby-kafka-ec2"
|
10
|
+
KAFKA_TOPIC = ENV.fetch("KAFKA_TOPIC") do
|
11
|
+
raise 'The environment variable "KAFKA_TOPIC" must be specified'
|
12
|
+
end
|
13
|
+
PARTITION_COUNT = 200
|
14
|
+
MAX_BUFFER_SIZE = 1_000
|
15
|
+
MESSAGE_COUNT = 1_000_000
|
16
|
+
|
17
|
+
$stdout.sync = true
|
18
|
+
logger = Logger.new($stdout)
|
19
|
+
|
20
|
+
client = Mysql2::Client.new(
|
21
|
+
host: ENV["MYSQL_HOST"] || "localhost",
|
22
|
+
port: 3306,
|
23
|
+
username: "admin",
|
24
|
+
password: ENV["MYSQL_PASSWORD"],
|
25
|
+
)
|
26
|
+
client.query("CREATE DATABASE IF NOT EXISTS ruby_kafka_ec2_benchmark")
|
27
|
+
client.query(<<~SQL)
|
28
|
+
CREATE TABLE IF NOT EXISTS ruby_kafka_ec2_benchmark.events (
|
29
|
+
id bigint(20) NOT NULL AUTO_INCREMENT,
|
30
|
+
name varchar(255) NOT NULL,
|
31
|
+
created_at datetime NOT NULL,
|
32
|
+
PRIMARY KEY (id)
|
33
|
+
)
|
34
|
+
SQL
|
35
|
+
client.query(<<~SQL)
|
36
|
+
INSERT INTO ruby_kafka_ec2_benchmark.events (name, created_at) VALUES ('page_view', '#{Time.now.strftime("%F %T")}')
|
37
|
+
SQL
|
38
|
+
|
39
|
+
kafka = Kafka.new(KAFKA_BROKERS, client_id: KAFKA_CLIENT_ID)
|
40
|
+
|
41
|
+
unless kafka.topics.include?(KAFKA_TOPIC)
|
42
|
+
logger.info "Create the kafka topic '#{KAFKA_TOPIC}'"
|
43
|
+
kafka.create_topic(KAFKA_TOPIC, num_partitions: PARTITION_COUNT, replication_factor: 3)
|
44
|
+
end
|
45
|
+
|
46
|
+
now = Time.now.iso8601(3)
|
47
|
+
message = {
|
48
|
+
events: [{ name: "page_view", timestamp: now }] * 10,
|
49
|
+
}.to_json
|
50
|
+
|
51
|
+
|
52
|
+
logger.info "Producing messages..."
|
53
|
+
|
54
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
55
|
+
pool = Concurrent::FixedThreadPool.new(4)
|
56
|
+
producers = {}
|
57
|
+
current_processed_count = Concurrent::AtomicFixnum.new
|
58
|
+
futures = []
|
59
|
+
MESSAGE_COUNT.times do |i|
|
60
|
+
futures << Concurrent::Future.execute(executor: pool) do
|
61
|
+
producers[Thread.current.object_id] ||= kafka.producer(max_buffer_size: MAX_BUFFER_SIZE)
|
62
|
+
producer = producers[Thread.current.object_id]
|
63
|
+
producer.produce(message,
|
64
|
+
topic: KAFKA_TOPIC,
|
65
|
+
key: i.to_s,
|
66
|
+
partition: i % PARTITION_COUNT,
|
67
|
+
)
|
68
|
+
if producer.buffer_size == MAX_BUFFER_SIZE
|
69
|
+
producer.deliver_messages
|
70
|
+
end
|
71
|
+
processed_count = current_processed_count.increment
|
72
|
+
logger.info "#{processed_count} messages were produced" if (processed_count % 10_000).zero?
|
73
|
+
end
|
74
|
+
|
75
|
+
if futures.size == 10_000
|
76
|
+
futures.each(&:wait!)
|
77
|
+
futures.clear
|
78
|
+
end
|
79
|
+
end
|
80
|
+
futures.each(&:wait!)
|
81
|
+
|
82
|
+
producers.each_value(&:deliver_messages)
|
83
|
+
|
84
|
+
logger.info "Produce FIN messages"
|
85
|
+
producer = kafka.producer
|
86
|
+
PARTITION_COUNT.times do |i|
|
87
|
+
producer.produce("FIN",
|
88
|
+
topic: KAFKA_TOPIC,
|
89
|
+
key: "fin_#{i}",
|
90
|
+
partition: i % PARTITION_COUNT,
|
91
|
+
)
|
92
|
+
end
|
93
|
+
producer.deliver_messages
|
94
|
+
|
95
|
+
duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
96
|
+
logger.info "Complete (duration: #{duration})"
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -eo pipefail
|
4
|
+
|
5
|
+
REPOSITORY=ruby-kafka-ec2/benchmark
|
6
|
+
TASK_FAMILY=ruby-kafka-ec2-benchmark-producer
|
7
|
+
|
8
|
+
repo_uri=$((aws ecr describe-repositories --repository-names $REPOSITORY | jq -r '.repositories[] | .repositoryUri') || true)
|
9
|
+
if [ -z "$repo_uri" ]; then
|
10
|
+
echo "The repository '$REPOSITORY' is not found. Execute register_docker_image.sh first, please." >&2
|
11
|
+
exit
|
12
|
+
fi
|
13
|
+
|
14
|
+
aws ecs register-task-definition --cli-input-json "$(cat <<JSON
|
15
|
+
{
|
16
|
+
"family": "$TASK_FAMILY",
|
17
|
+
"containerDefinitions": [
|
18
|
+
{
|
19
|
+
"name": "produce_messages",
|
20
|
+
"image": "$repo_uri",
|
21
|
+
"command": ["produce_messages.rb"],
|
22
|
+
"essential": true,
|
23
|
+
"environment": [
|
24
|
+
{"name": "KAFKA_BROKERS", "value": "$KAFKA_BROKERS"},
|
25
|
+
{"name": "KAFKA_TOPIC", "value": "$KAFKA_TOPIC"},
|
26
|
+
{"name": "MYSQL_HOST", "value": "$MYSQL_HOST"},
|
27
|
+
{"name": "MYSQL_PASSWORD", "value": "$MYSQL_PASSWORD"}
|
28
|
+
],
|
29
|
+
"logConfiguration": {
|
30
|
+
"logDriver": "awslogs",
|
31
|
+
"options": {
|
32
|
+
"awslogs-group": "/aws/ecs/ruby-kafka-ec2/benchmark",
|
33
|
+
"awslogs-region": "ap-northeast-1",
|
34
|
+
"awslogs-stream-prefix": "produce-messages"
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
],
|
39
|
+
"cpu": "2048",
|
40
|
+
"memory": "4096"
|
41
|
+
}
|
42
|
+
JSON
|
43
|
+
)"
|
44
|
+
|
45
|
+
aws ecs run-task --cluster $CLUSTER --task-definition $TASK_FAMILY
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
set -eo pipefail
|
4
|
+
|
5
|
+
REPOSITORY=ruby-kafka-ec2/benchmark
|
6
|
+
|
7
|
+
cd $(dirname $(cd $(dirname $0) && pwd))
|
8
|
+
repo_uri=$((aws ecr describe-repositories --repository-names $REPOSITORY | jq -r '.repositories[] | .repositoryUri') || true)
|
9
|
+
if [ -z "$repo_uri" ]; then
|
10
|
+
echo "Create ECR repository '$REPOSITORY'"
|
11
|
+
repo_uri=$(aws ecr create-repository --repository-name $REPOSITORY | jq -r '.repository.repositoryUri')
|
12
|
+
fi
|
13
|
+
|
14
|
+
echo "Build docker image"
|
15
|
+
DOCKER_BUILDKIT=1 docker build -t $REPOSITORY . -f benchmark/Dockerfile
|
16
|
+
|
17
|
+
echo "Push $repo_uri:latest"
|
18
|
+
aws ecr get-login-password | docker login --username AWS --password-stdin $repo_uri
|
19
|
+
docker tag $REPOSITORY:latest $repo_uri:latest
|
20
|
+
docker push $repo_uri:latest
|
@@ -0,0 +1,12 @@
|
|
1
|
+
I, [2020-06-21T21:02:27.530399 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 94.20265760399889, partition_count: 27, processed_count: 135000)
|
2
|
+
I, [2020-06-21T21:02:28.532064 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 106.07931412400012, partition_count: 22, processed_count: 110000)
|
3
|
+
I, [2020-06-21T21:02:28.787485 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 98.62195983100173, partition_count: 30, processed_count: 150000)
|
4
|
+
I, [2020-06-21T21:02:28.838145 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 95.73462492099952, partition_count: 27, processed_count: 135000)
|
5
|
+
I, [2020-06-21T21:02:29.113526 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 79.04582024299998, partition_count: 5, processed_count: 25000)
|
6
|
+
I, [2020-06-21T21:02:29.150897 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 102.10359391399925, partition_count: 7, processed_count: 35000)
|
7
|
+
I, [2020-06-21T21:02:29.181021 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 101.41423055399991, partition_count: 7, processed_count: 35000)
|
8
|
+
I, [2020-06-21T21:02:29.250973 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 101.59283073200277, partition_count: 31, processed_count: 155000)
|
9
|
+
I, [2020-06-21T21:02:29.257868 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 92.3937842589985, partition_count: 6, processed_count: 30000)
|
10
|
+
I, [2020-06-21T21:02:29.255276 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 102.14144639099959, partition_count: 7, processed_count: 35000)
|
11
|
+
I, [2020-06-21T21:02:29.381202 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 105.87376681400019, partition_count: 23, processed_count: 115000)
|
12
|
+
I, [2020-06-21T21:02:29.386796 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 114.40461669500019, partition_count: 8, processed_count: 40000)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
I, [2020-06-21T20:46:00.436023 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 57.33392923200154, partition_count: 17, processed_count: 85000)
|
2
|
+
I, [2020-06-21T20:46:00.539574 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 238.05382897799973, partition_count: 17, processed_count: 85000)
|
3
|
+
I, [2020-06-21T20:46:00.607154 #1] INFO -- : [{:instance_id=>"i-0c0b6259f2e5ec3d8", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 238.03822113800015, partition_count: 17, processed_count: 85000)
|
4
|
+
I, [2020-06-21T20:46:00.988614 #1] INFO -- : [{:instance_id=>"i-0aae2c827f2e13c54", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 56.78356462699958, partition_count: 17, processed_count: 85000)
|
5
|
+
I, [2020-06-21T20:46:01.308462 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 224.05910681800015, partition_count: 16, processed_count: 80000)
|
6
|
+
I, [2020-06-21T20:46:01.377471 #1] INFO -- : [{:instance_id=>"i-0386bf32a565e5bff", :instance_type=>"m5.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 237.7210560049998, partition_count: 17, processed_count: 85000)
|
7
|
+
I, [2020-06-21T20:46:01.431828 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 55.10812590900059, partition_count: 16, processed_count: 80000)
|
8
|
+
I, [2020-06-21T20:46:01.929500 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 257.61435206200076, partition_count: 17, processed_count: 85000)
|
9
|
+
I, [2020-06-21T20:46:01.980481 #1] INFO -- : [{:instance_id=>"i-042ca61bd40d8692c", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1a"}] Complete (duration: 258.9417281019996, partition_count: 17, processed_count: 85000)
|
10
|
+
I, [2020-06-21T20:46:02.120333 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 76.57086077900021, partition_count: 17, processed_count: 85000)
|
11
|
+
I, [2020-06-21T20:46:02.319151 #1] INFO -- : [{:instance_id=>"i-0277b51635cc60328", :instance_type=>"r4.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 74.19314250700063, partition_count: 16, processed_count: 80000)
|
12
|
+
I, [2020-06-21T20:46:02.947805 #1] INFO -- : [{:instance_id=>"i-03d54244a9ca1b766", :instance_type=>"r5.large", :availability_zone=>"ap-northeast-1c"}] Complete (duration: 54.1334114199999, partition_count: 16, processed_count: 80000)
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "kafka/ec2"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/lib/kafka/ec2.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "kafka/ec2/ext/consumer_group"
|
2
|
+
require "kafka/ec2/ext/protocol/join_group_request"
|
3
|
+
require "kafka/ec2/mixed_instance_assignment_strategy_factory"
|
4
|
+
require "kafka/ec2/version"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
class EC2
|
8
|
+
class << self
|
9
|
+
attr_reader :assignment_strategy_factory
|
10
|
+
|
11
|
+
def with_assignment_strategy_factory(factory)
|
12
|
+
@assignment_strategy_factory = factory
|
13
|
+
yield
|
14
|
+
ensure
|
15
|
+
@assignment_strategy_factory = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def assignment_strategy_classes
|
19
|
+
@assignment_strategy_classes ||= {}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/consumer_group"
|
4
|
+
require "kafka/ec2/mixed_instance_assignment_strategy"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
class EC2
|
8
|
+
module Ext
|
9
|
+
module ConsumerGroup
|
10
|
+
def initialize(*args, **kwargs)
|
11
|
+
super
|
12
|
+
if Kafka::EC2.assignment_strategy_factory
|
13
|
+
@assignment_strategy = Kafka::EC2.assignment_strategy_factory.create(cluster: @cluster)
|
14
|
+
end
|
15
|
+
Kafka::EC2.assignment_strategy_classes[@group_id] = @assignment_strategy.class
|
16
|
+
end
|
17
|
+
|
18
|
+
def join_group
|
19
|
+
super
|
20
|
+
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
21
|
+
@assignment_strategy.member_id_to_metadata = @members
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module Kafka
|
30
|
+
class ConsumerGroup
|
31
|
+
prepend Kafka::EC2::Ext::ConsumerGroup
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "net/http"
|
4
|
+
|
5
|
+
require "kafka/protocol/consumer_group_protocol"
|
6
|
+
require "kafka/protocol/join_group_request"
|
7
|
+
|
8
|
+
module Kafka
|
9
|
+
class EC2
|
10
|
+
module Ext
|
11
|
+
module Protocol
|
12
|
+
module JoinGroupRequest
|
13
|
+
def initialize(*args, topics:, **kwargs)
|
14
|
+
super
|
15
|
+
if Kafka::EC2.assignment_strategy_classes[@group_id] == Kafka::EC2::MixedInstanceAssignmentStrategy
|
16
|
+
user_data = Net::HTTP.start("169.254.169.254", 80) do |http|
|
17
|
+
instance_id = http.get("/latest/meta-data/instance-id").body
|
18
|
+
instance_type = http.get("/latest/meta-data/instance-type").body
|
19
|
+
az = http.get("/latest/meta-data/placement/availability-zone").body
|
20
|
+
"|#{instance_id},#{instance_type},#{az}"
|
21
|
+
end
|
22
|
+
@group_protocols = {
|
23
|
+
"mixedinstance" => Kafka::Protocol::ConsumerGroupProtocol.new(topics: topics, user_data: user_data),
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module Kafka
|
34
|
+
module Protocol
|
35
|
+
class JoinGroupRequest
|
36
|
+
prepend Kafka::EC2::Ext::Protocol::JoinGroupRequest
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka"
|
4
|
+
require "kafka/protocol/member_assignment"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
class EC2
|
8
|
+
class MixedInstanceAssignmentStrategy
|
9
|
+
# metadata is a byte sequence created by Kafka::Protocol::ConsumerGroupProtocol.encode
|
10
|
+
attr_accessor :member_id_to_metadata
|
11
|
+
|
12
|
+
# @param cluster [Kafka::Cluster]
|
13
|
+
# @param instance_family_weights [Hash{String => Numeric}, Proc] a hash whose the key
|
14
|
+
# is the instance family and whose value is the weight. If the object is a proc,
|
15
|
+
# it must returns such a hash and the proc is called every time the method "assign"
|
16
|
+
# is called.
|
17
|
+
# @param availability_zone_weights [Hash{String => Numeric}, Proc] a hash whose the key
|
18
|
+
# is the availability zone and whose value is the weight. If the object is a proc,
|
19
|
+
# it must returns such a hash and the proc is called every time the method "assign"
|
20
|
+
# is called.
|
21
|
+
def initialize(cluster:, instance_family_weights:, availability_zone_weights:)
|
22
|
+
@cluster = cluster
|
23
|
+
@instance_family_weights = instance_family_weights
|
24
|
+
@availability_zone_weights = availability_zone_weights
|
25
|
+
end
|
26
|
+
|
27
|
+
# Assign the topic partitions to the group members.
|
28
|
+
#
|
29
|
+
# @param members [Array<String>] member ids
|
30
|
+
# @param topics [Array<String>] topics
|
31
|
+
# @return [Hash{String => Protocol::MemberAssignment}] a hash mapping member
|
32
|
+
# ids to assignments.
|
33
|
+
def assign(members:, topics:)
|
34
|
+
group_assignment = {}
|
35
|
+
instance_id_to_capacity = Hash.new(0)
|
36
|
+
instance_id_to_member_ids = Hash.new { |h, k| h[k] = [] }
|
37
|
+
total_capacity = 0
|
38
|
+
|
39
|
+
instance_family_to_capacity = @instance_family_weights.is_a?(Proc) ? @instance_family_weights.call() : @instance_family_weights
|
40
|
+
az_to_capacity = @availability_zone_weights.is_a?(Proc) ? @availability_zone_weights.call() : @availability_zone_weights
|
41
|
+
members.each do |member_id|
|
42
|
+
group_assignment[member_id] = Protocol::MemberAssignment.new
|
43
|
+
|
44
|
+
instance_id, instance_type, az = member_id_to_metadata[member_id].split(",")
|
45
|
+
instance_id_to_member_ids[instance_id] << member_id
|
46
|
+
capacity = calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
47
|
+
instance_id_to_capacity[instance_id] += capacity
|
48
|
+
total_capacity += capacity
|
49
|
+
end
|
50
|
+
|
51
|
+
topic_partitions = topics.flat_map do |topic|
|
52
|
+
begin
|
53
|
+
partitions = @cluster.partitions_for(topic).map(&:partition_id)
|
54
|
+
rescue UnknownTopicOrPartition
|
55
|
+
raise UnknownTopicOrPartition, "unknown topic #{topic}"
|
56
|
+
end
|
57
|
+
Array.new(partitions.count) { topic }.zip(partitions)
|
58
|
+
end
|
59
|
+
|
60
|
+
partition_count_per_capacity = topic_partitions.size / total_capacity
|
61
|
+
last_index = 0
|
62
|
+
instance_id_to_capacity.sort_by { |_, capacity| -capacity }.each do |instance_id, capacity|
|
63
|
+
partition_count = (capacity * partition_count_per_capacity).round
|
64
|
+
member_ids = instance_id_to_member_ids[instance_id]
|
65
|
+
topic_partitions[last_index, partition_count]&.each_with_index do |(topic, partition), index|
|
66
|
+
member_id = member_ids[index % member_ids.size]
|
67
|
+
group_assignment[member_id].assign(topic, [partition])
|
68
|
+
end
|
69
|
+
|
70
|
+
last_index += partition_count
|
71
|
+
end
|
72
|
+
|
73
|
+
if last_index < topic_partitions.size
|
74
|
+
member_ids = instance_id_to_member_ids.values.flatten
|
75
|
+
topic_partitions[last_index, topic_partitions.size].each_with_index do |(topic, partition), index|
|
76
|
+
member_id = member_ids[index % member_ids.size]
|
77
|
+
group_assignment[member_id].assign(topic, [partition])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
group_assignment
|
82
|
+
rescue Kafka::LeaderNotAvailable
|
83
|
+
sleep 1
|
84
|
+
retry
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def calculate_capacity(instance_type, az, instance_family_to_capacity, az_to_capacity)
|
90
|
+
instance_family, _ = instance_type.split(".")
|
91
|
+
instance_family_to_capacity.fetch(instance_family, 1) * az_to_capacity.fetch(az, 1)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/ec2/mixed_instance_assignment_strategy"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class EC2
|
7
|
+
class MixedInstanceAssignmentStrategyFactory
|
8
|
+
# @param instance_family_weights [Hash, Proc]
|
9
|
+
# @param availability_zone_weights [Hash, Proc]
|
10
|
+
def initialize(instance_family_weights: {}, availability_zone_weights: {})
|
11
|
+
@instance_family_weights = instance_family_weights
|
12
|
+
@availability_zone_weights = availability_zone_weights
|
13
|
+
end
|
14
|
+
|
15
|
+
def create(cluster:)
|
16
|
+
Kafka::EC2::MixedInstanceAssignmentStrategy.new(
|
17
|
+
cluster: cluster,
|
18
|
+
instance_family_weights: @instance_family_weights,
|
19
|
+
availability_zone_weights: @availability_zone_weights,
|
20
|
+
)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require_relative 'lib/kafka/ec2/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "ruby-kafka-ec2"
|
5
|
+
spec.version = Kafka::EC2::VERSION
|
6
|
+
spec.authors = ["abicky"]
|
7
|
+
spec.email = ["takeshi.arabiki@gmail.com"]
|
8
|
+
|
9
|
+
spec.summary = %q{An extension of ruby-kafka for EC2}
|
10
|
+
spec.description = %q{Kafka::EC2 is an extension of ruby-kafka that provides useful features for EC2 like Kafka::EC2::MixedInstanceAssignmentStrategy.}
|
11
|
+
spec.homepage = "https://github.com/abicky/ruby-kafka-ec2"
|
12
|
+
spec.license = "MIT"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
17
|
+
|
18
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
19
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
|
25
|
+
spec.add_runtime_dependency "ruby-kafka", ">= 0.7", "< 2"
|
26
|
+
|
27
|
+
spec.add_development_dependency "webmock"
|
28
|
+
spec.add_development_dependency "concurrent-ruby" # cf. https://github.com/zendesk/ruby-kafka/pull/835
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-kafka-ec2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- abicky
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-06-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ruby-kafka
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.7'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '2'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.7'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: webmock
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :development
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: concurrent-ruby
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
description: Kafka::EC2 is an extension of ruby-kafka that provides useful features
|
62
|
+
for EC2 like Kafka::EC2::MixedInstanceAssignmentStrategy.
|
63
|
+
email:
|
64
|
+
- takeshi.arabiki@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- ".github/workflows/test.yml"
|
70
|
+
- ".gitignore"
|
71
|
+
- ".rspec"
|
72
|
+
- Gemfile
|
73
|
+
- LICENSE.txt
|
74
|
+
- README.md
|
75
|
+
- Rakefile
|
76
|
+
- benchmark/.gitignore
|
77
|
+
- benchmark/Dockerfile
|
78
|
+
- benchmark/Dockerfile.dockerignore
|
79
|
+
- benchmark/Gemfile
|
80
|
+
- benchmark/README.md
|
81
|
+
- benchmark/aws.tf
|
82
|
+
- benchmark/consume_messages.rb
|
83
|
+
- benchmark/consume_messages.sh
|
84
|
+
- benchmark/produce_messages.rb
|
85
|
+
- benchmark/produce_messages.sh
|
86
|
+
- benchmark/register_docker_image.sh
|
87
|
+
- benchmark/ruby-kafka-ec2.log
|
88
|
+
- benchmark/ruby-kafka.log
|
89
|
+
- bin/console
|
90
|
+
- bin/setup
|
91
|
+
- lib/kafka/ec2.rb
|
92
|
+
- lib/kafka/ec2/ext/consumer_group.rb
|
93
|
+
- lib/kafka/ec2/ext/protocol/join_group_request.rb
|
94
|
+
- lib/kafka/ec2/mixed_instance_assignment_strategy.rb
|
95
|
+
- lib/kafka/ec2/mixed_instance_assignment_strategy_factory.rb
|
96
|
+
- lib/kafka/ec2/version.rb
|
97
|
+
- ruby-kafka-ec2.gemspec
|
98
|
+
homepage: https://github.com/abicky/ruby-kafka-ec2
|
99
|
+
licenses:
|
100
|
+
- MIT
|
101
|
+
metadata:
|
102
|
+
homepage_uri: https://github.com/abicky/ruby-kafka-ec2
|
103
|
+
source_code_uri: https://github.com/abicky/ruby-kafka-ec2
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 2.3.0
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirements: []
|
119
|
+
rubygems_version: 3.1.2
|
120
|
+
signing_key:
|
121
|
+
specification_version: 4
|
122
|
+
summary: An extension of ruby-kafka for EC2
|
123
|
+
test_files: []
|