fluent-plugin-datahub 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ require "net/http"
2
+ require "openssl"
3
+ require "base64"
4
+ require "date"
5
+ require "time"
6
+ require "json"
7
+ require_relative "datahub-record-schema"
8
+ require_relative "datahub-record-entity"
9
+
10
+ ########################
11
+ ## @author pholy.ht ##
12
+ ## @time 2016-03-24 ##
13
+ ########################
14
+ class DatahubHttpClient
15
+ attr_accessor :access_id
16
+ attr_accessor :access_key
17
+ attr_accessor :endpoint
18
+
19
+ def initialize(endpoint, access_id, access_key)
20
+ @endpoint = endpoint
21
+ @access_id = access_id
22
+ @access_key = access_key
23
+ end
24
+
25
+ # Signature = base64(HmacSha1(AccessKeySecret, VERB + "\n"
26
+ # + CONTENT-TYPE + "\n"
27
+ # + DATE + "\n"
28
+ # + CanonicalizedDatahubHeaders + "\n"
29
+ # + CanonicalizedResource))
30
+
31
+ def get_signature(params)
32
+ verb = params["verb"]
33
+ content_type = params["content-type"]
34
+ resource = params["resource"]
35
+ gmt_time = params["date"]
36
+ data = verb + "\n" + content_type + "\n" + gmt_time + "\n" + "x-datahub-client-version:1\n" + resource
37
+ # data = "GET\napplication/json\nFri, 06 May 2016 06:43:31 GMT\nx-datahub-client-version:1\n/projects/test_project/topics/datahub_fluentd_out_1"
38
+ return "DATAHUB " + @access_id + ":" + Base64.encode64("#{OpenSSL::HMAC.digest('sha1', @access_key, data)}").chomp
39
+ end
40
+
41
+ def send_request(method, path, params={}, headers={})
42
+ uri = URI(@endpoint + path)
43
+ http_req = nil
44
+ if method == "GET"
45
+ http_req = Net::HTTP::Get.new(uri.path)
46
+ elsif method == "POST"
47
+ http_req = Net::HTTP::Post.new(uri.path)
48
+ # start_time = Time.now.to_f
49
+ # puts "start_time :" + start_time.to_s
50
+ http_req.body = JSON.generate(params)
51
+ # end_time = Time.now.to_f
52
+ # puts "end_time :" + end_time.to_s
53
+ # puts ((end_time - start_time)*1000).to_s
54
+ elsif method == "PUT"
55
+ http_req = Net::HTTP::Put.new(uri.path)
56
+ http_req.body = JSON.generate(params)
57
+ elsif method == "DELETE"
58
+ http_req = Net::HTTP::Delete.new(uri.path)
59
+ else
60
+ raise "Unsupported method: " + method
61
+ end
62
+
63
+ sig_params = {}
64
+ sig_params["verb"] = method
65
+ sig_params["content-type"] = "application/json"
66
+ sig_params["resource"] = path
67
+ sig_params["date"] = Time.now.utc.strftime("%a, %d %b %Y %T") + " GMT"
68
+
69
+ http_req["Authorization"] = get_signature(sig_params)
70
+ http_req["x-datahub-client-version"] = 1
71
+ http_req["Date"] = sig_params["date"]
72
+ http_req["Content-Type"] = sig_params["content-type"]
73
+ http_req["ACCEPT-ENCODING"] = nil
74
+
75
+ # puts http_req["Date"]
76
+ # puts http_req["Authorization"]
77
+ # puts http_req.to_hash
78
+
79
+ headers.each do |k, v|
80
+ http_req[k] = v
81
+ end
82
+
83
+ # puts path
84
+
85
+ return Net::HTTP.start(uri.host, uri.port) {
86
+ |client|
87
+ http_resp = client.request(http_req)
88
+ if http_resp.code != '200' and http_resp.code != '201'
89
+ if http_resp.body.empty?
90
+ raise "Send request failed, unknown response "
91
+ end
92
+ raise http_resp.body
93
+ end
94
+ result = http_resp.body
95
+ if result.empty?
96
+ return nil
97
+ end
98
+ return JSON.parse(result)
99
+ }
100
+ end
101
+
102
+ def list_projects()
103
+ path = "/projects"
104
+ return send_request("GET", path)
105
+ end
106
+
107
+ def get_project(project_name)
108
+ path = "/projects/" + project_name
109
+ return send_request("GET", path)
110
+ end
111
+
112
+ def create_project(project_name, comment)
113
+ path = "/projects/" + project_name
114
+ params = {}
115
+ params["Comment"] = comment
116
+ return send_request("POST", path, params)
117
+ end
118
+
119
+ # datahub目前未实现update操作
120
+ # def update_project(project_name, comment)
121
+ # path = "/projects/" + project_name
122
+ # params = {}
123
+ # params["Comment"] = comment
124
+ # return send_request("PUT", path, params)
125
+ # end
126
+
127
+ def delete_project(project_name)
128
+ path = "/projects/" + project_name
129
+ return send_request("DELETE", path)
130
+ end
131
+
132
+ def list_topics(project_name)
133
+ path = "/projects/" + project_name + "/topics"
134
+ return send_request("GET", path)
135
+ end
136
+
137
+ def get_topic(project_name, topic_name)
138
+ path = "/projects/" + project_name + "/topics/" + topic_name
139
+ return send_request("GET", path)
140
+ end
141
+
142
+ def create_topic(project_name, topic_name, shard_count, lifecycle, record_type, record_schema, comment)
143
+ path = "/projects/" + project_name + "/topics/" + topic_name
144
+ params = {}
145
+ params["ShardCount"] = shard_count
146
+ params["Lifecycle"] = lifecycle
147
+ params["RecordType"] = record_type
148
+ params["RecordSchema"] = JSON.generate(record_schema)
149
+ params["Comment"] = comment
150
+ return send_request("POST", path, params)
151
+ end
152
+
153
+ def update_topic(project_name, topic_name, lifecycle, comment)
154
+ path = "/projects/" + project_name + "/topics/" + topic_name
155
+ params = {}
156
+ params["Lifecycle"] = lifecycle
157
+ params["Comment"] = comment
158
+ return send_request("PUT", path, params)
159
+ end
160
+
161
+ def delete_topic(project_name, topic_name)
162
+ path = "/projects/" + project_name + "/topics/" + topic_name
163
+ return send_request("DELETE", path)
164
+ end
165
+
166
+ def list_shards(project_name, topic_name)
167
+ path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
168
+ return send_request("GET", path)
169
+ end
170
+
171
+ ##########################################################
172
+ ## shard的merge、split未实现
173
+ ## datahub这一期并没有实现,所以api不开放
174
+ ##########################################################
175
+ # def merge_shard(project_name, topic_name, shard_id, adjacent_shard_id)
176
+ # path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
177
+ # params = {}
178
+ # params["Action"] = "merge"
179
+ # params["ShardId"] = shard_id
180
+ # params["AdjacentShardId"] = adjacent_shard_id
181
+ # return send_request("POST", path, params)
182
+ # end
183
+
184
+ # def split_shard(project_name, topic_name, shard_id, split_key)
185
+ # path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
186
+ # params = {}
187
+ # params["Action"] = "split"
188
+ # params["ShardId"] = shard_id
189
+ # params["SplitKey"] = split_key
190
+ # return send_request("POST", path, params)
191
+ # end
192
+
193
+ def write_data_to_topic(project_name, topic_name, record_entities)
194
+ path = "/projects/" + project_name + "/topics/" + topic_name + "/shards"
195
+ params = {}
196
+ params["Action"] = "pub"
197
+ params["Records"] = record_entities
198
+ return send_request("POST", path, params)
199
+ end
200
+
201
+ def get_shard_cursor(project_name, topic_name, shard_id, offset=DateTime.now.strftime('%Q'), type="LATEST")
202
+ path = "/projects/" + project_name + "/topics/" + topic_name + "/shards/" + shard_id
203
+ params = {}
204
+ params["Action"] = "cursor"
205
+ params["SystemTime"] = offset
206
+ params["Type"] = type
207
+ return send_request("POST", path, params)
208
+ end
209
+
210
+ def read_data_from_shard_with_cursor(project_name, topic_name, shard_id, cursor, count)
211
+ path = "/projects/" + project_name + "/topics/" + topic_name + "/shards/" + shard_id
212
+ params = {}
213
+ params["Action"] = "sub"
214
+ params["Cursor"] = cursor
215
+ params["Limit"] = count
216
+ return send_request("POST", path, params)
217
+ end
218
+
219
+ def read_data_from_shard(project_name, topic_name, shard_id, count, offset=DateTime.now.strftime('%Q'), type="LATEST")
220
+ path = "/projects/" + project_name + "/topics/" + topic_name + "/shards/" + shard_id
221
+ cursor = get_shard_cursor(project_name, topic_name, shard_id, offset, type).to_hash["Cursor"]
222
+ params = {}
223
+ params["Action"] = "sub"
224
+ params["Cursor"] = cursor
225
+ params["Limit"] = count
226
+ return send_request("POST", path, params)
227
+ end
228
+
229
+ end
@@ -0,0 +1,59 @@
1
+ require_relative "datahub-http-client"
2
+ require_relative "datahub-topic"
3
+
4
+ class DatahubProject
5
+ def initialize(datahub_http_client, project_name)
6
+ @client = datahub_http_client
7
+ @project_name = project_name
8
+ end
9
+
10
+ def list_topics()
11
+ topics_map = @client.list_topics
12
+ topics_array = topics_map["TopicNames"]
13
+ topics = []
14
+ for i in 0...topics_array.size
15
+ topic_name = topics_array[i]
16
+ topic = DatahubTopic.new(datahub_http_client, @project_name, topic_name)
17
+ topics.push(topic)
18
+ end
19
+
20
+ return topics
21
+ end
22
+
23
+ def create_topic(topic_name, shard_count, lifecycle, record_type, record_schema, comment)
24
+ @client.create_topic(@project_name, topic_name, shard_count, lifecycle, record_type, record_schema, comment)
25
+ return self.get_topic(topic_name)
26
+ end
27
+
28
+ def update_topic(topic_name, lifecycle, desc)
29
+ @client.update_topic(@project_name, topic_name, lifecycle, desc)
30
+ end
31
+
32
+ def delete_topic(topic_name)
33
+ @client.delete_topic(@project_name, topic_name)
34
+ end
35
+
36
+ def get_topic(topic_name)
37
+ result_map = @client.get_topic(@project_name, topic_name)
38
+ record_schema_string = result_map["RecordSchema"]
39
+ record_schema_map = JSON.parse(record_schema_string)
40
+ fields = record_schema_map["fields"]
41
+ record_schema = RecordSchema.new()
42
+ for i in 0...fields.size
43
+ field = fields[i]
44
+ record_field = RecordField.new(field["name"], field["type"])
45
+ record_schema.add_field(record_field)
46
+ end
47
+ topic = DatahubTopic.new(@client, @project_name, topic_name)
48
+ topic.shard_count = result_map["ShardCount"]
49
+ topic.lifecycle = result_map["Lifecycle"]
50
+ topic.record_type = result_map["RecordType"]
51
+ topic.record_schema = record_schema
52
+ topic.comment = result_map["Comment"]
53
+ topic.create_time = result_map["CreateTime"]
54
+ topic.last_modify_time = result_map["LastModifyTime"]
55
+
56
+ return topic
57
+ end
58
+
59
+ end
@@ -0,0 +1,23 @@
1
+
2
+ class PutRecordResult
3
+ attr_accessor :failed_record_count
4
+ attr_accessor :failed_record_index
5
+ attr_accessor :failed_record_error
6
+ attr_accessor :failed_record_list
7
+
8
+ def initialize
9
+ @failed_record_count = 0
10
+ @failed_record_index = []
11
+ @failed_record_error = []
12
+ @failed_record_list = []
13
+ end
14
+
15
+ def to_json(*a)
16
+ {
17
+ "failed_record_count" => @failed_record_count,
18
+ "failed_record_index" => @failed_record_index,
19
+ "failed_record_error" => @failed_record_error,
20
+ "failed_record_list" => @failed_record_list
21
+ }.to_json(*a)
22
+ end
23
+ end
@@ -0,0 +1,136 @@
1
+
2
+ class RecordEntity
3
+ $STRING_MAX_LENGTH = 1 * 1024 * 1024
4
+
5
+ def initialize(schema)
6
+ @columns_map = {}
7
+
8
+ @shard_id = nil
9
+ @attributes = {}
10
+
11
+ @schema = schema
12
+ end
13
+
14
+ def set(name, value)
15
+ @columns_map[name] = value
16
+ end
17
+
18
+ def set_shard_id(shard_id)
19
+ if shard_id.is_a?String
20
+ @shard_id = shard_id
21
+ else
22
+ raise "shard_id must be String type!"
23
+ end
24
+ end
25
+
26
+ def add_attribute(name, value)
27
+ @attributes[name] = value
28
+ end
29
+
30
+ def get_columns_map
31
+ return @columns_map
32
+ end
33
+
34
+ def get_schema
35
+ return @schema
36
+ end
37
+
38
+
39
+ def setBigInt(name, value)
40
+ if value == nil
41
+ set(name, value)
42
+ elsif value.is_a?Integer
43
+ set(name, value)
44
+ elsif value.is_a?String and value.to_i.to_s == value
45
+ set(name, value.to_i)
46
+ else
47
+ raise "value show be Integer, name:" + name.to_s + " value:" + value.to_s
48
+ end
49
+ end
50
+
51
+ def setDouble(name, value)
52
+ if value == nil
53
+ set(name, value)
54
+ elsif value.is_a?Float
55
+ set(name, value)
56
+ elsif value.is_a?String
57
+ begin
58
+ set(name, Float(value))
59
+ rescue
60
+ raise "value show be Float, name:" + name.to_s + " value:" + value.to_s
61
+ end
62
+ else
63
+ raise "value show be Float, name:" + name.to_s + " value:" + value.to_s
64
+ end
65
+ end
66
+
67
+ def setBoolean(name, value)
68
+ if value == nil
69
+ set(name, value)
70
+ elsif value.is_a?String
71
+ if value == "true"
72
+ set(name, true)
73
+ elsif value == "false"
74
+ set(name, false)
75
+ else
76
+ raise "value must be true or false, name:" + name.to_s + " value:" + value.to_s
77
+ end
78
+ elsif value != false and value != true
79
+ raise "value must be bool or string[true,false], name:" + name.to_s + " value:" + value.to_s
80
+ end
81
+ set(name, value)
82
+ end
83
+
84
+ def setTimeStamp(name, value)
85
+ if value == nil
86
+ set(name, value)
87
+ elsif value.is_a?Integer
88
+ set(name, value)
89
+ elsif value.is_a?String and value.to_i.to_s == value
90
+ set(name, value.to_i)
91
+ else
92
+ raise "value should be Integer, name:" + name.to_s + " value:" + value.to_s
93
+ end
94
+ end
95
+
96
+ def setString(name, value)
97
+ if value == nil
98
+ set(name, value)
99
+ elsif value.is_a?String and value.length < $STRING_MAX_LENGTH
100
+ if @schema.get_encoding == nil
101
+ set(name, value)
102
+ else
103
+ # puts "encoding:" + @schema.get_encoding.to_s
104
+ value.force_encoding(@schema.get_encoding)
105
+ set(name, value)
106
+ # puts value
107
+ end
108
+ else
109
+ raise "value show be String and len < " + $STRING_MAX_LENGTH.to_s + ", name:" + name.to_s + " value:" + value.to_s
110
+ end
111
+ end
112
+
113
+ def get_stored_column_values
114
+ data = []
115
+ fields = @schema.get_fields
116
+ for i in 0...fields.size
117
+ field = fields[i]
118
+ name = field.get_name
119
+ if @columns_map[name] == nil
120
+ data.push(@columns_map[name])
121
+ elsif
122
+ data.push(@columns_map[name].to_s)
123
+ end
124
+ end
125
+ return data
126
+ end
127
+
128
+ def to_json(*a)
129
+ {
130
+ "Data" => get_stored_column_values,
131
+ "ShardId" => @shard_id,
132
+ "Attributes" => @attributes
133
+ }.to_json(*a)
134
+ end
135
+
136
+ end