fluent-plugin-datahub 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/README.md +38 -0
- data/Rakefile +14 -0
- data/VERSION +1 -0
- data/build.sh +11 -0
- data/fluent-plugin-datahub.gemspec +22 -0
- data/lib/fluent/plugin/datahub/datahub-client.rb +27 -0
- data/lib/fluent/plugin/datahub/datahub-http-client-test.rb +343 -0
- data/lib/fluent/plugin/datahub/datahub-http-client.rb +229 -0
- data/lib/fluent/plugin/datahub/datahub-project.rb +59 -0
- data/lib/fluent/plugin/datahub/datahub-put-record-result.rb +23 -0
- data/lib/fluent/plugin/datahub/datahub-record-entity.rb +136 -0
- data/lib/fluent/plugin/datahub/datahub-record-schema.rb +73 -0
- data/lib/fluent/plugin/datahub/datahub-shard.rb +13 -0
- data/lib/fluent/plugin/datahub/datahub-topic.rb +73 -0
- data/lib/fluent/plugin/out_datahub.rb +402 -0
- data/sample/csv_sample.conf +22 -0
- data/sample/csv_sample.csv +14 -0
- data/sample/log_sample.conf +17 -0
- data/sample/log_sample.log +1 -0
- metadata +97 -0
@@ -0,0 +1,229 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "openssl"
|
3
|
+
require "base64"
|
4
|
+
require "date"
|
5
|
+
require "time"
|
6
|
+
require "json"
|
7
|
+
require_relative "datahub-record-schema"
|
8
|
+
require_relative "datahub-record-entity"
|
9
|
+
|
10
|
+
########################
|
11
|
+
## @author pholy.ht ##
|
12
|
+
## @time 2016-03-24 ##
|
13
|
+
########################
|
14
|
+
class DatahubHttpClient
|
15
|
+
attr_accessor :access_id
|
16
|
+
attr_accessor :access_key
|
17
|
+
attr_accessor :endpoint
|
18
|
+
|
19
|
+
def initialize(endpoint, access_id, access_key)
|
20
|
+
@endpoint = endpoint
|
21
|
+
@access_id = access_id
|
22
|
+
@access_key = access_key
|
23
|
+
end
|
24
|
+
|
25
|
+
# Signature = base64(HmacSha1(AccessKeySecret, VERB + "\n"
|
26
|
+
# + CONTENT-TYPE + "\n"
|
27
|
+
# + DATE + "\n"
|
28
|
+
# + CanonicalizedDatahubHeaders + "\n"
|
29
|
+
# + CanonicalizedResource))
|
30
|
+
|
31
|
+
def get_signature(params)
|
32
|
+
verb = params["verb"]
|
33
|
+
content_type = params["content-type"]
|
34
|
+
resource = params["resource"]
|
35
|
+
gmt_time = params["date"]
|
36
|
+
data = verb + "\n" + content_type + "\n" + gmt_time + "\n" + "x-datahub-client-version:1\n" + resource
|
37
|
+
# data = "GET\napplication/json\nFri, 06 May 2016 06:43:31 GMT\nx-datahub-client-version:1\n/projects/test_project/topics/datahub_fluentd_out_1"
|
38
|
+
return "DATAHUB " + @access_id + ":" + Base64.encode64("#{OpenSSL::HMAC.digest('sha1', @access_key, data)}").chomp
|
39
|
+
end
|
40
|
+
|
41
|
+
def send_request(method, path, params={}, headers={})
|
42
|
+
uri = URI(@endpoint + path)
|
43
|
+
http_req = nil
|
44
|
+
if method == "GET"
|
45
|
+
http_req = Net::HTTP::Get.new(uri.path)
|
46
|
+
elsif method == "POST"
|
47
|
+
http_req = Net::HTTP::Post.new(uri.path)
|
48
|
+
# start_time = Time.now.to_f
|
49
|
+
# puts "start_time :" + start_time.to_s
|
50
|
+
http_req.body = JSON.generate(params)
|
51
|
+
# end_time = Time.now.to_f
|
52
|
+
# puts "end_time :" + end_time.to_s
|
53
|
+
# puts ((end_time - start_time)*1000).to_s
|
54
|
+
elsif method == "PUT"
|
55
|
+
http_req = Net::HTTP::Put.new(uri.path)
|
56
|
+
http_req.body = JSON.generate(params)
|
57
|
+
elsif method == "DELETE"
|
58
|
+
http_req = Net::HTTP::Delete.new(uri.path)
|
59
|
+
else
|
60
|
+
raise "Unsupported method: " + method
|
61
|
+
end
|
62
|
+
|
63
|
+
sig_params = {}
|
64
|
+
sig_params["verb"] = method
|
65
|
+
sig_params["content-type"] = "application/json"
|
66
|
+
sig_params["resource"] = path
|
67
|
+
sig_params["date"] = Time.now.utc.strftime("%a, %d %b %Y %T") + " GMT"
|
68
|
+
|
69
|
+
http_req["Authorization"] = get_signature(sig_params)
|
70
|
+
http_req["x-datahub-client-version"] = 1
|
71
|
+
http_req["Date"] = sig_params["date"]
|
72
|
+
http_req["Content-Type"] = sig_params["content-type"]
|
73
|
+
http_req["ACCEPT-ENCODING"] = nil
|
74
|
+
|
75
|
+
# puts http_req["Date"]
|
76
|
+
# puts http_req["Authorization"]
|
77
|
+
# puts http_req.to_hash
|
78
|
+
|
79
|
+
headers.each do |k, v|
|
80
|
+
http_req[k] = v
|
81
|
+
end
|
82
|
+
|
83
|
+
# puts path
|
84
|
+
|
85
|
+
return Net::HTTP.start(uri.host, uri.port) {
|
86
|
+
|client|
|
87
|
+
http_resp = client.request(http_req)
|
88
|
+
if http_resp.code != '200' and http_resp.code != '201'
|
89
|
+
if http_resp.body.empty?
|
90
|
+
raise "Send request failed, unknown response "
|
91
|
+
end
|
92
|
+
raise http_resp.body
|
93
|
+
end
|
94
|
+
result = http_resp.body
|
95
|
+
if result.empty?
|
96
|
+
return nil
|
97
|
+
end
|
98
|
+
return JSON.parse(result)
|
99
|
+
}
|
100
|
+
end
|
101
|
+
|
102
|
+
def list_projects()
|
103
|
+
path = "/projects"
|
104
|
+
return send_request("GET", path)
|
105
|
+
end
|
106
|
+
|
107
|
+
def get_project(project_name)
|
108
|
+
path = "/projects/" + project_name
|
109
|
+
return send_request("GET", path)
|
110
|
+
end
|
111
|
+
|
112
|
+
def create_project(project_name, comment)
|
113
|
+
path = "/projects/" + project_name
|
114
|
+
params = {}
|
115
|
+
params["Comment"] = comment
|
116
|
+
return send_request("POST", path, params)
|
117
|
+
end
|
118
|
+
|
119
|
+
# datahub目前未实现update操作
|
120
|
+
# def update_project(project_name, comment)
|
121
|
+
# path = "/projects/" + project_name
|
122
|
+
# params = {}
|
123
|
+
# params["Comment"] = comment
|
124
|
+
# return send_request("PUT", path, params)
|
125
|
+
# end
|
126
|
+
|
127
|
+
def delete_project(project_name)
|
128
|
+
path = "/projects/" + project_name
|
129
|
+
return send_request("DELETE", path)
|
130
|
+
end
|
131
|
+
|
132
|
+
def list_topics(project_name)
|
133
|
+
path = "/projects/" + project_name + "/topics"
|
134
|
+
return send_request("GET", path)
|
135
|
+
end
|
136
|
+
|
137
|
+
def get_topic(project_name, topic_name)
|
138
|
+
path = "/projects/" + project_name + "/topics/" + topic_name
|
139
|
+
return send_request("GET", path)
|
140
|
+
end
|
141
|
+
|
142
|
+
def create_topic(project_name, topic_name, shard_count, lifecycle, record_type, record_schema, comment)
|
143
|
+
path = "/projects/" + project_name + "/topics/" + topic_name
|
144
|
+
params = {}
|
145
|
+
params["ShardCount"] = shard_count
|
146
|
+
params["Lifecycle"] = lifecycle
|
147
|
+
params["RecordType"] = record_type
|
148
|
+
params["RecordSchema"] = JSON.generate(record_schema)
|
149
|
+
params["Comment"] = comment
|
150
|
+
return send_request("POST", path, params)
|
151
|
+
end
|
152
|
+
|
153
|
+
def update_topic(project_name, topic_name, lifecycle, comment)
|
154
|
+
path = "/projects/" + project_name + "/topics/" + topic_name
|
155
|
+
params = {}
|
156
|
+
params["Lifecycle"] = lifecycle
|
157
|
+
params["Comment"] = comment
|
158
|
+
return send_request("PUT", path, params)
|
159
|
+
end
|
160
|
+
|
161
|
+
def delete_topic(project_name, topic_name)
|
162
|
+
path = "/projects/" + project_name + "/topics/" + topic_name
|
163
|
+
return send_request("DELETE", path)
|
164
|
+
end
|
165
|
+
|
166
|
+
def list_shards(project_name, topic_name)
|
167
|
+
path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
|
168
|
+
return send_request("GET", path)
|
169
|
+
end
|
170
|
+
|
171
|
+
##########################################################
|
172
|
+
## shard的merge、split未实现
|
173
|
+
## datahub这一期并没有实现,所以api不开放
|
174
|
+
##########################################################
|
175
|
+
# def merge_shard(project_name, topic_name, shard_id, adjacent_shard_id)
|
176
|
+
# path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
|
177
|
+
# params = {}
|
178
|
+
# params["Action"] = "merge"
|
179
|
+
# params["ShardId"] = shard_id
|
180
|
+
# params["AdjacentShardId"] = adjacent_shard_id
|
181
|
+
# return send_request("POST", path, params)
|
182
|
+
# end
|
183
|
+
|
184
|
+
# def split_shard(project_name, topic_name, shard_id, split_key)
|
185
|
+
# path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
|
186
|
+
# params = {}
|
187
|
+
# params["Action"] = "split"
|
188
|
+
# params["ShardId"] = shard_id
|
189
|
+
# params["SplitKey"] = split_key
|
190
|
+
# return send_request("POST", path, params)
|
191
|
+
# end
|
192
|
+
|
193
|
+
def write_data_to_topic(project_name, topic_name, record_entities)
|
194
|
+
path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
|
195
|
+
params = {}
|
196
|
+
params["Action"] = "pub"
|
197
|
+
params["Records"] = record_entities
|
198
|
+
return send_request("POST", path, params)
|
199
|
+
end
|
200
|
+
|
201
|
+
def get_shard_cursor(project_name, topic_name, shard_id, offset=DateTime.now.strftime('%Q'), type="LATEST")
|
202
|
+
path = "/projects/" + project_name + "/topics/" + topic_name + "/shards/" + shard_id
|
203
|
+
params = {}
|
204
|
+
params["Action"] = "cursor"
|
205
|
+
params["SystemTime"] = offset
|
206
|
+
params["Type"] = type
|
207
|
+
return send_request("POST", path, params)
|
208
|
+
end
|
209
|
+
|
210
|
+
def read_data_from_shard_with_cursor(project_name, topic_name, shard_id, cursor, count)
|
211
|
+
path = "/projects/" + project_name + "/topics/" + topic_name + "/shards/" + shard_id
|
212
|
+
params = {}
|
213
|
+
params["Action"] = "sub"
|
214
|
+
params["Cursor"] = cursor
|
215
|
+
params["Limit"] = count
|
216
|
+
return send_request("POST", path, params)
|
217
|
+
end
|
218
|
+
|
219
|
+
def read_data_from_shard(project_name, topic_name, shard_id, count, offset=DateTime.now.strftime('%Q'), type="LATEST")
|
220
|
+
path = "/projects/" + project_name + "/topics/" + topic_name + "/shards/" + shard_id
|
221
|
+
cursor = get_shard_cursor(project_name, topic_name, shard_id, offset, type).to_hash["Cursor"]
|
222
|
+
params = {}
|
223
|
+
params["Action"] = "sub"
|
224
|
+
params["Cursor"] = cursor
|
225
|
+
params["Limit"] = count
|
226
|
+
return send_request("POST", path, params)
|
227
|
+
end
|
228
|
+
|
229
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require_relative "datahub-http-client"
|
2
|
+
require_relative "datahub-topic"
|
3
|
+
|
4
|
+
class DatahubProject
|
5
|
+
def initialize(datahub_http_client, project_name)
|
6
|
+
@client = datahub_http_client
|
7
|
+
@project_name = project_name
|
8
|
+
end
|
9
|
+
|
10
|
+
def list_topics()
|
11
|
+
topics_map = @client.list_topics
|
12
|
+
topics_array = topics_map["TopicNames"]
|
13
|
+
topics = []
|
14
|
+
for i in 0...topics_array.size
|
15
|
+
topic_name = topics_array[i]
|
16
|
+
topic = DatahubTopic.new(datahub_http_client, @project_name, topic_name)
|
17
|
+
topics.push(topic)
|
18
|
+
end
|
19
|
+
|
20
|
+
return topics
|
21
|
+
end
|
22
|
+
|
23
|
+
def create_topic(topic_name, shard_count, lifecycle, record_type, record_schema, comment)
|
24
|
+
@client.create_topic(@project_name, topic_name, shard_count, lifecycle, record_type, record_schema, comment)
|
25
|
+
return self.get_topic(topic_name)
|
26
|
+
end
|
27
|
+
|
28
|
+
def update_topic(topic_name, lifecycle, desc)
|
29
|
+
@client.update_topic(@project_name, topic_name, lifecycle, desc)
|
30
|
+
end
|
31
|
+
|
32
|
+
def delete_topic(topic_name)
|
33
|
+
@client.delete_topic(@project_name, topic_name)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_topic(topic_name)
|
37
|
+
result_map = @client.get_topic(@project_name, topic_name)
|
38
|
+
record_schema_string = result_map["RecordSchema"]
|
39
|
+
record_schema_map = JSON.parse(record_schema_string)
|
40
|
+
fields = record_schema_map["fields"]
|
41
|
+
record_schema = RecordSchema.new()
|
42
|
+
for i in 0...fields.size
|
43
|
+
field = fields[i]
|
44
|
+
record_field = RecordField.new(field["name"], field["type"])
|
45
|
+
record_schema.add_field(record_field)
|
46
|
+
end
|
47
|
+
topic = DatahubTopic.new(@client, @project_name, topic_name)
|
48
|
+
topic.shard_count = result_map["ShardCount"]
|
49
|
+
topic.lifecycle = result_map["Lifecycle"]
|
50
|
+
topic.record_type = result_map["RecordType"]
|
51
|
+
topic.record_schema = record_schema
|
52
|
+
topic.comment = result_map["Comment"]
|
53
|
+
topic.create_time = result_map["CreateTime"]
|
54
|
+
topic.last_modify_time = result_map["LastModifyTime"]
|
55
|
+
|
56
|
+
return topic
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
|
2
|
+
class PutRecordResult
|
3
|
+
attr_accessor :failed_record_count
|
4
|
+
attr_accessor :failed_record_index
|
5
|
+
attr_accessor :failed_record_error
|
6
|
+
attr_accessor :failed_record_list
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@failed_record_count = 0
|
10
|
+
@failed_record_index = []
|
11
|
+
@failed_record_error = []
|
12
|
+
@failed_record_list = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_json(*a)
|
16
|
+
{
|
17
|
+
"failed_record_count" => @failed_record_count,
|
18
|
+
"failed_record_index" => @failed_record_index,
|
19
|
+
"failed_record_error" => @failed_record_error,
|
20
|
+
"failed_record_list" => @failed_record_list
|
21
|
+
}.to_json(*a)
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
|
2
|
+
class RecordEntity
|
3
|
+
$STRING_MAX_LENGTH = 1 * 1024 * 1024
|
4
|
+
|
5
|
+
def initialize(schema)
|
6
|
+
@columns_map = {}
|
7
|
+
|
8
|
+
@shard_id = nil
|
9
|
+
@attributes = {}
|
10
|
+
|
11
|
+
@schema = schema
|
12
|
+
end
|
13
|
+
|
14
|
+
def set(name, value)
|
15
|
+
@columns_map[name] = value
|
16
|
+
end
|
17
|
+
|
18
|
+
def set_shard_id(shard_id)
|
19
|
+
if shard_id.is_a?String
|
20
|
+
@shard_id = shard_id
|
21
|
+
else
|
22
|
+
raise "shard_id must be String type!"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def add_attribute(name, value)
|
27
|
+
@attributes[name] = value
|
28
|
+
end
|
29
|
+
|
30
|
+
def get_columns_map
|
31
|
+
return @columns_map
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_schema
|
35
|
+
return @schema
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
def setBigInt(name, value)
|
40
|
+
if value == nil
|
41
|
+
set(name, value)
|
42
|
+
elsif value.is_a?Integer
|
43
|
+
set(name, value)
|
44
|
+
elsif value.is_a?String and value.to_i.to_s == value
|
45
|
+
set(name, value.to_i)
|
46
|
+
else
|
47
|
+
raise "value show be Integer, name:" + name.to_s + " value:" + value.to_s
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def setDouble(name, value)
|
52
|
+
if value == nil
|
53
|
+
set(name, value)
|
54
|
+
elsif value.is_a?Float
|
55
|
+
set(name, value)
|
56
|
+
elsif value.is_a?String
|
57
|
+
begin
|
58
|
+
set(name, Float(value))
|
59
|
+
rescue
|
60
|
+
raise "value show be Float, name:" + name.to_s + " value:" + value.to_s
|
61
|
+
end
|
62
|
+
else
|
63
|
+
raise "value show be Float, name:" + name.to_s + " value:" + value.to_s
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def setBoolean(name, value)
|
68
|
+
if value == nil
|
69
|
+
set(name, value)
|
70
|
+
elsif value.is_a?String
|
71
|
+
if value == "true"
|
72
|
+
set(name, true)
|
73
|
+
elsif value == "false"
|
74
|
+
set(name, false)
|
75
|
+
else
|
76
|
+
raise "value must be true or false, name:" + name.to_s + " value:" + value.to_s
|
77
|
+
end
|
78
|
+
elsif value != false and value != true
|
79
|
+
raise "value must be bool or string[true,false], name:" + name.to_s + " value:" + value.to_s
|
80
|
+
end
|
81
|
+
set(name, value)
|
82
|
+
end
|
83
|
+
|
84
|
+
def setTimeStamp(name, value)
|
85
|
+
if value == nil
|
86
|
+
set(name, value)
|
87
|
+
elsif value.is_a?Integer
|
88
|
+
set(name, value)
|
89
|
+
elsif value.is_a?String and value.to_i.to_s == value
|
90
|
+
set(name, value.to_i)
|
91
|
+
else
|
92
|
+
raise "value should be Integer, name:" + name.to_s + " value:" + value.to_s
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def setString(name, value)
|
97
|
+
if value == nil
|
98
|
+
set(name, value)
|
99
|
+
elsif value.is_a?String and value.length < $STRING_MAX_LENGTH
|
100
|
+
if @schema.get_encoding == nil
|
101
|
+
set(name, value)
|
102
|
+
else
|
103
|
+
# puts "encoding:" + @schema.get_encoding.to_s
|
104
|
+
value.force_encoding(@schema.get_encoding)
|
105
|
+
set(name, value)
|
106
|
+
# puts value
|
107
|
+
end
|
108
|
+
else
|
109
|
+
raise "value show be String and len < " + $STRING_MAX_LENGTH.to_s + ", name:" + name.to_s + " value:" + value.to_s
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def get_stored_column_values
|
114
|
+
data = []
|
115
|
+
fields = @schema.get_fields
|
116
|
+
for i in 0...fields.size
|
117
|
+
field = fields[i]
|
118
|
+
name = field.get_name
|
119
|
+
if @columns_map[name] == nil
|
120
|
+
data.push(@columns_map[name])
|
121
|
+
elsif
|
122
|
+
data.push(@columns_map[name].to_s)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
return data
|
126
|
+
end
|
127
|
+
|
128
|
+
def to_json(*a)
|
129
|
+
{
|
130
|
+
"Data" => get_stored_column_values,
|
131
|
+
"ShardId" => @shard_id,
|
132
|
+
"Attributes" => @attributes
|
133
|
+
}.to_json(*a)
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|