fluent-plugin-datahub 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 922859012ad1c685d8396cca953cc6ca35471758
4
+ data.tar.gz: 646a600f82a8891f8fcb8e05de4db5ccbeecaadc
5
+ SHA512:
6
+ metadata.gz: 9f36f93349cb259c9c9194909186ef5d3322dabe0a1287698ec04f803e9d0c971552bb9db91ae9d6bd941e8f506990c7c9d31ec84e750f31e6c29df69241323f
7
+ data.tar.gz: 861bf12a2478ee7458d50f2843295f59be030e3410f8c1fd9f463681fffce8ebea340e834c8af8045db3d63e312e5eaae3c50d7afdb090e4ab9bb7a64aef67d5
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # fluent-plugin-datahub
2
+ 概述
3
+ -------
4
+ fluent的datahub上传插件
5
+ 安装
6
+ -------
7
+ gem install fluent-plugin-datahub
8
+
9
+ 配置
10
+ -------
11
+ ```
12
+ <source>
13
+ @type tail
14
+ path ${DIR}/csv_sample.csv
15
+ tag test1
16
+ format csv
17
+ keys id,name,gender,salary,my_time
18
+ </source>
19
+
20
+ ```
21
+ <match test1>
22
+ @type datahub
23
+ access_id
24
+ access_key
25
+ endpoint
26
+ project_name test_project
27
+ topic_name fluentd_out_7
28
+ # shard_id 6
29
+ column_names ["id", "name", "gender", "salary", "my_time"]
30
+ flush_interval 10s
31
+ dirty_data_continue true
32
+ dirty_data_file ${DIR}/dirty.data
33
+ retry_times 3
34
+ </match>
35
+
36
+ ```
37
+ 1、source标签中的keys为源数据,会根据key对应fields中字段\<br>
38
+ 2、match标签中的column_names为要写入datahub的字段
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.test_files = FileList['test/test_*.rb']
10
+ test.verbose = true
11
+ end
12
+
13
+ task :default => [:build]
14
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/build.sh ADDED
@@ -0,0 +1,11 @@
1
+ #!/bin/bash
2
+ mkdir package
3
+ mkdir package/temp
4
+ mkdir package/temp/fluentd-with-datahub
5
+ gem build fluent-plugin-datahub.gemspec
6
+ cp -r ext/* ./package/temp/fluentd-with-datahub/
7
+ cp README.md ./package/temp/fluentd-with-datahub/README.md
8
+ cp -r sample ./package/temp/fluentd-with-datahub/
9
+ cp fluent-plugin-datahub-*.gem ./package/temp/fluentd-with-datahub/dependency_gem/
10
+ tar zcvf ./package/fluentd-with-datahub-0.12.23.tar.gz -C ./package/temp/ .
11
+ rm -rf ./package/temp/
@@ -0,0 +1,22 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "fluent-plugin-datahub"
6
+ gem.description = "Aliyun Datahub output plugin for Fluentd event collector"
7
+ gem.license = "Apache-2.0"
8
+ gem.homepage = "https://datahub.console.aliyun.com/datahub"
9
+ gem.summary = gem.description
10
+ gem.version = File.read("VERSION").strip
11
+ gem.authors = ["Aliyun"]
12
+ gem.email = "stream@service.aliyun.com"
13
+ gem.has_rdoc = false
14
+ #gem.platform = Gem::Platform::RUBY
15
+ gem.files = `git ls-files | grep -v ext | grep -v package`.split("\n")
16
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ gem.require_paths = ['lib']
19
+
20
+ gem.add_dependency "fluentd", [">= 0.12.0", "< 2"]
21
+ gem.add_dependency "yajl-ruby", "~> 1.0"
22
+ end
@@ -0,0 +1,27 @@
1
+ require_relative "datahub-http-client"
2
+ require_relative "datahub-project"
3
+ require_relative "datahub-topic"
4
+
5
+ class DatahubClient
6
+
7
+ def initialize(endpoint, access_id, access_key)
8
+ @endpoint = endpoint
9
+ @access_id = access_id
10
+ @access_key = access_key
11
+
12
+ @http_client = DatahubHttpClient.new(@endpoint, @access_id, @access_key)
13
+ end
14
+
15
+ def create_project(project_name, comment)
16
+ @http_client.create_project(project_name, comment)
17
+ end
18
+
19
+ def delete_project(project_name)
20
+ @http_client.delete_project(project_name)
21
+ end
22
+
23
+ def get_project(project_name)
24
+ return DatahubProject.new(@http_client, project_name)
25
+ end
26
+
27
+ end
@@ -0,0 +1,343 @@
1
+ require "test/unit"
2
+ require_relative "datahub-http-client"
3
+ require_relative "datahub-client"
4
+ require_relative "datahub-record-schema"
5
+ require_relative "datahub-record-entity"
6
+
7
+ class DatahubHttpClientTest < Test::Unit::TestCase
8
+ # def setup
9
+ # @client = DatahubHttpClient.new("http://10.101.200.231:12357", "63wd3dpztlmb5ocdkj94pxmm", "oRd30z7sV4hBX9aYtJgii5qnyhg=")
10
+
11
+ # @project_name = "ruby_sdk_test_project"
12
+ # begin
13
+ # @client.delete_project(@project_name)
14
+ # rescue
15
+ # end
16
+ #
17
+ # @comment = "fuju test project"
18
+ # @client.create_project(@project_name, @comment)
19
+ #
20
+ # @topic_name = "ruby_sdk_test_table_01"
21
+ #
22
+ # @record_schema = RecordSchema.new
23
+ # @record_schema.add_field(RecordField.new("id","BIGINT"))
24
+ # @record_schema.add_field(RecordField.new("name","STRING"))
25
+ # @record_schema.add_field(RecordField.new("gender","BOOLEAN"))
26
+ # @record_schema.add_field(RecordField.new("salary","DOUBLE"))
27
+ # @record_schema.add_field(RecordField.new("gmt_create","TIMESTAMP"))
28
+ #
29
+ # @topic_comment = "test table"
30
+ #
31
+ # begin
32
+ # @client.delete_topic(@project_name, @topic_name)
33
+ # rescue
34
+ # end
35
+ #
36
+ # @client.create_topic(@project_name, @topic_name, 10, 7, "TUPLE", @record_schema, @topic_comment)
37
+ # end
38
+
39
+ # def teardown
40
+ # # @client.delete_project(@project_name)
41
+ # # @client.delete_topic(@project_name, @topic_name)
42
+ # end
43
+ #
44
+ # def test_list_projects
45
+ # puts @client.list_projects
46
+ # end
47
+ #
48
+ # def test_get_project
49
+ # project_name = "test_project_5"
50
+ # @comment = "test"
51
+ # @client.create_project(project_name, @comment)
52
+ # # project存在
53
+ # project_map = @client.get_project(project_name)
54
+ # puts project_map
55
+ # assert_equal(project_map["Comment"], @comment)
56
+ # @client.delete_project(project_name)
57
+ #
58
+ # # project不存在
59
+ # begin
60
+ # project_map = @client.get_project("not_exists_project_name")
61
+ # rescue => e
62
+ # resp = JSON.parse(e.message)
63
+ # assert_equal(resp["ErrorCode"], "NoSuchProject")
64
+ # end
65
+ # end
66
+ #
67
+ # def test_create_project
68
+ # test_project_name = "test_project_3"
69
+ # begin
70
+ # @client.delete_project(test_project_name)
71
+ # rescue => e
72
+ # end
73
+ #
74
+ # comment = "jype"
75
+ # @client.create_project(test_project_name, comment)
76
+ # project = @client.get_project(test_project_name)
77
+ #
78
+ # assert_equal(project["Comment"], comment)
79
+ #
80
+ # # project重复
81
+ # begin
82
+ # @client.create_project(test_project_name, comment)
83
+ # rescue => e
84
+ # resp = JSON.parse(e.message)
85
+ # assert_equal(resp["ErrorCode"], "ProjectAlreadyExist")
86
+ # end
87
+ #
88
+ # # 清理
89
+ # @client.delete_project(test_project_name)
90
+ # end
91
+ #
92
+ #
93
+ # def test_delete_project
94
+ # test_project_name = "jyp_production_test_create_project_del"
95
+ # comment = "jype"
96
+ # @client.create_project(test_project_name, comment)
97
+ # project = @client.get_project(test_project_name)
98
+ # assert(project != nil, "project should be not null")
99
+ # @client.delete_project(test_project_name)
100
+ # begin
101
+ # new_project = @client.get_project(test_project_name)
102
+ # rescue => e
103
+ # resp = JSON.parse(e.message)
104
+ # assert_equal(resp["ErrorCode"], "NoSuchProject")
105
+ # end
106
+ # end
107
+ #
108
+ # def test_list_topics
109
+ # puts @client.list_topics(@project_name)
110
+ # end
111
+ #
112
+ # def test_write_and_read
113
+ # @client = DatahubClient.new("http://10.101.200.231:12357", "63wd3dpztlmb5ocdkj94pxmm", "oRd30z7sV4hBX9aYtJgii5qnyhg=")
114
+ # @datahub_project = @client.get_project("test_project")
115
+ # # @datahub_topic = @datahub_project.get_topic("fluentd_out_6")
116
+ # # abc = @datahub_topic.record_schema
117
+ # # fields = abc.get_fields
118
+ # # puts fields[0].get_name
119
+ #
120
+ # record_schema = RecordSchema.new
121
+ # record_schema.add_field(RecordField.new("id","BIGINT"))
122
+ # record_schema.add_field(RecordField.new("name","STRING"))
123
+ # record_schema.add_field(RecordField.new("gender","BOOLEAN"))
124
+ # record_schema.add_field(RecordField.new("salary","DOUBLE"))
125
+ # record_schema.add_field(RecordField.new("gmt_create","TIMESTAMP"))
126
+ #
127
+ # #@datahub_project.create_topic("test_ruby_sdk_table_70", 12, 5, "TUPLE", record_schema, "test for ruby sdk")
128
+ # my_topic = @datahub_project.get_topic("test_ruby_sdk_table_70")
129
+ # # assert_equal(my_topic.record_schema.get_fields[0].get_name, "id")
130
+ #
131
+ # record_entities = []
132
+ # record_entity = RecordEntity.new(record_schema)
133
+ # record_entity.setBigInt("id", 1)
134
+ # record_entity.setString("name", "hj")
135
+ # record_entity.set_shard_id("0")
136
+ # record_entities.push(record_entity)
137
+ # my_topic.write_data(record_entities)
138
+ #
139
+ # cursor = my_topic.get_cursor("0")
140
+ # result = my_topic.read_data("0", cursor, 2)
141
+ # puts result
142
+ #
143
+ # end
144
+ #
145
+ #
146
+ # def hash_code(str)
147
+ # str.each_char.reduce(0) do |result, char|
148
+ # [((result << 5) - result) + char.ord].pack('L').unpack('l').first
149
+ # end
150
+ # end
151
+ #
152
+ # def test_hash
153
+ # string1 = "abcdefg,makeboluogggg"
154
+ #
155
+ # puts hash_code(string1)
156
+ # end
157
+ #
158
+ # def test_get_topic
159
+ # actual_topic = @client.get_topic(@project_name, @topic_name)
160
+ # puts actual_topic
161
+ # assert_equal(actual_topic["Comment"], @topic_comment)
162
+ # assert_equal(actual_topic["RecordType"], "TUPLE")
163
+ # assert_equal(actual_topic["ShardCount"], 10)
164
+ #
165
+ # end
166
+ #
167
+ # def test_create_topic
168
+ # record_schema = RecordSchema.new()
169
+ # id = RecordField.new("id", "BIGINT")
170
+ # name = RecordField.new("name", "STRING")
171
+ # age = RecordField.new("age", "BIGINT")
172
+ #
173
+ # record_schema.add_field(id)
174
+ # record_schema.add_field(name)
175
+ # record_schema.add_field(age)
176
+ #
177
+ # topic_comment = "jyp_production_made_in_asia_test_topic_create_topic"
178
+ # topic_name = "jyp_production_made_in_asia_test_topic_create_topic"
179
+ #
180
+ # @client.create_topic(@project_name, topic_name, 4, 7, "TUPLE", record_schema, topic_comment)
181
+ # actual_topic = @client.get_topic(@project_name, topic_name)
182
+ # assert_equal(actual_topic["Comment"], topic_comment)
183
+ # assert_equal(actual_topic["RecordType"], "TUPLE")
184
+ # assert_equal(actual_topic["ShardCount"], 4)
185
+ #
186
+ # # 重复创建
187
+ # begin
188
+ # @client.create_topic(@project_name, topic_name, 4, 7, "TUPLE", record_schema, topic_comment)
189
+ # rescue => e
190
+ # resp = JSON.parse(e.message)
191
+ # assert_equal(resp["ErrorCode"], "TopicAlreadyExist")
192
+ # end
193
+ #
194
+ # @client.delete_topic(@project_name, topic_name)
195
+ #
196
+ # # 工程不存在
197
+ # begin
198
+ # @client.create_topic("project_not_exists_aa", topic_name, 4, 7, "TUPLE", record_schema, topic_comment)
199
+ # rescue => e
200
+ # resp = JSON.parse(e.message)
201
+ # assert_equal(resp["ErrorCode"], "NoSuchProject")
202
+ # end
203
+ #
204
+ # # 无效的参数
205
+ # begin
206
+ # @client.create_topic(@project_name, topic_name, 4, nil, "TUPLE", record_schema, topic_comment)
207
+ # rescue => e
208
+ # resp = JSON.parse(e.message)
209
+ # assert_equal(resp["ErrorCode"], "InvalidParameter")
210
+ # end
211
+ # end
212
+ #
213
+ # def test_update_topic
214
+ # record_schema = RecordSchema.new()
215
+ # id = RecordField.new("id", "BIGINT")
216
+ # name = RecordField.new("name", "STRING")
217
+ # age = RecordField.new("age", "BIGINT")
218
+ #
219
+ # record_schema.add_field(id)
220
+ # record_schema.add_field(name)
221
+ # record_schema.add_field(age)
222
+ #
223
+ # topic_comment = "jyp_production_made_in_asia_test_topic_update_topic"
224
+ # topic_new_comment = "jyp_production_made_in_asia_test_topic_update_topic_new"
225
+ # topic_name = "jyp_production_made_in_asia_test_topic_update_topic"
226
+ #
227
+ # @client.create_topic(@project_name, topic_name, 4, 7, "TUPLE", record_schema, topic_comment)
228
+ # actual_topic = @client.get_topic(@project_name, topic_name)
229
+ # assert_equal(actual_topic["Comment"], topic_comment)
230
+ #
231
+ # @client.update_topic(@project_name, topic_name, 6, topic_new_comment)
232
+ # actual_topic = @client.get_topic(@project_name, topic_name)
233
+ # assert_equal(actual_topic["Comment"], topic_new_comment)
234
+ # assert_equal(actual_topic["Lifecycle"], 6)
235
+ #
236
+ # @client.delete_topic(@project_name, topic_name)
237
+ #
238
+ # end
239
+ #
240
+ # def test_delete_topic
241
+ # record_schema = RecordSchema.new()
242
+ # id = RecordField.new("id", "BIGINT")
243
+ # name = RecordField.new("name", "STRING")
244
+ # age = RecordField.new("age", "BIGINT")
245
+ #
246
+ # record_schema.add_field(id)
247
+ # record_schema.add_field(name)
248
+ # record_schema.add_field(age)
249
+ #
250
+ # topic_comment = "jyp_production_made_in_asia_test_topic_delete_topic"
251
+ # topic_name = "jyp_production_made_in_asia_test_topic_delete_topic"
252
+ #
253
+ # @client.create_topic(@project_name, topic_name, 4, 7, "TUPLE", record_schema, topic_comment)
254
+ # actual_topic = @client.get_topic(@project_name, topic_name)
255
+ # assert(actual_topic != nil, "topic can not be null")
256
+ # @client.delete_topic(@project_name, topic_name)
257
+ #
258
+ # begin
259
+ # actual_topic = @client.get_topic(@project_name, topic_name)
260
+ # rescue => e
261
+ # resp = JSON.parse(e.message)
262
+ # assert_equal(resp["ErrorCode"], "NoSuchTopic")
263
+ # end
264
+ # end
265
+ #
266
+ # def test_list_shard
267
+ # puts @client.list_shards(@project_name, @topic_name)
268
+ # end
269
+ # #
270
+ # def test_write_data_to_topic
271
+ # sleep 1
272
+ # entity = RecordEntity.new(@record_schema)
273
+ # entity.set("id", 123)
274
+ # entity.set("name", "jyp")
275
+ # entity.set("age", 24)
276
+ # entity.set_shard_id("0")
277
+ # record_entities = []
278
+ # record_entities.push(entity)
279
+ # puts @client.write_data_to_topic(@project_name, @topic_name, record_entities)
280
+ # end
281
+ #
282
+ # def test_get_shard_cursor
283
+ # sleep 1
284
+ # puts @client.get_shard_cursor(@project_name, @topic_name, "0", 10)
285
+ # end
286
+ #
287
+ # def test_read_data_from_shard
288
+ # sleep 1
289
+ # record_entities = []
290
+ # # 写入0号shard
291
+ # entity1 = RecordEntity.new(@record_schema)
292
+ # entity1.set("id", 1111)
293
+ # entity1.set("name", "jyp111")
294
+ # entity1.set("age", 24)
295
+ # entity1.set_shard_id("0")
296
+ # record_entities.push(entity1)
297
+ #
298
+ # # 写入1号shard
299
+ # entity2 = RecordEntity.new(@record_schema)
300
+ # entity2.set("id", 2222)
301
+ # entity2.set("name", "jyp222")
302
+ # entity2.set("age", 25)
303
+ # entity2.set_shard_id("1")
304
+ # record_entities.push(entity2)
305
+ #
306
+ # puts @client.write_data_to_topic(@project_name, @topic_name, record_entities)
307
+ #
308
+ # sleep 1
309
+ # # 消费0、1shard的数据
310
+ # puts @client.read_data_from_shard(@project_name, @topic_name, "0", 10, DateTime.now.strftime('%Q'), "OLDEST")
311
+ # puts @client.read_data_from_shard(@project_name, @topic_name, "1", 10, DateTime.now.strftime('%Q'), "OLDEST")
312
+ # end
313
+
314
+ def test
315
+ @client = DatahubHttpClient.new("http://10.101.200.231:12357", "63wd3dpztlmb5ocdkj94pxmm", "oRd30z7sV4hBX9aYtJgii5qnyhg=")
316
+
317
+ record_schema = RecordSchema.new
318
+ record_schema.setEncoding("UTF-8")
319
+ record_schema.add_field(RecordField.new("id","BIGINT"))
320
+ record_schema.add_field(RecordField.new("name","STRING"))
321
+ record_schema.add_field(RecordField.new("gender","BOOLEAN"))
322
+ record_schema.add_field(RecordField.new("salary","DOUBLE"))
323
+ record_schema.add_field(RecordField.new("gmt_create","TIMESTAMP"))
324
+ record_entities = []
325
+ record_entity = RecordEntity.new(record_schema)
326
+ record_entity.setBigInt("id", 1)
327
+ tmp = "test哈哈中国人"
328
+ #p tmp.encoding
329
+ #tmp.force_encoding("ASCII-8BIT")
330
+ #p tmp.encoding
331
+
332
+
333
+ record_entity.setString("name",tmp)
334
+ record_entity.set_shard_id("1")
335
+ record_entity.setDouble("salary","10.00")
336
+ record_entities.push(record_entity)
337
+
338
+ @client.write_data_to_topic("test_project", "datahub_file_result", record_entities)
339
+
340
+ puts "ok"
341
+ end
342
+
343
+ end