fluent-plugin-bigquery 0.2.12 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 24d39b32bcb8f6028618041edda11e1ffd3f6d16
4
- data.tar.gz: a238724b34f64d36f7f319b0d6fc43f578b0dffc
3
+ metadata.gz: 8266112b60355067f61e514946b70ccea8b1eed4
4
+ data.tar.gz: 8bb87ae21e1391c18f51ca4ae6806e5a7e8219ff
5
5
  SHA512:
6
- metadata.gz: 46e8ffbe6007cd2d855114671121285b4fb89b9fab378b59fcc37a5c20bd9f6587a5f8ab96a4f684cf9464d6d9da5b42b7a94f9d74e2cd6fd6769cf9c5f2f5df
7
- data.tar.gz: 1abae1988128f8b2349aa898f1afb5f847e894c2ee88bd34126cb5981ffa0af5dd3fcf74c2e1f27fdc3c1d45814b2b000b932100f94e795ed27e8ab90e9b7894
6
+ metadata.gz: ea18b5361d0789b91bd41c7838a560d1c3207158b69b5f8abc269e44dd020a26b9db6b9e58c24b84114956384cd44b147a9e9f0e0eec4af920a4779e6493bffc
7
+ data.tar.gz: 739c72ae73c4d28354c53e0bae9dc1b2f9126cd52cbe84784a36f2b69fcae42724a01c29688af15c5a3c8b891dcae909725929473c74e51ab1f1e02997ab8fd1
data/.travis.yml CHANGED
@@ -1,8 +1,15 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.1.0
4
+ - 2.0
5
+ - 2.1
6
+ - 2.2
7
+ - 2.3.0
8
+
9
+ before_install:
10
+ - gem update bundler
11
+
12
+ before_install:
13
+ - gem update bundler
7
14
 
8
15
  script: bundle exec rake test
data/README.md CHANGED
@@ -220,8 +220,18 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
220
220
  </match>
221
221
  ```
222
222
 
223
- Note that the timestamp of logs and the date in the table id do not always match,
224
- because there is a time lag between collection and transmission of logs.
223
+ The format can be suffixed with attribute name.
224
+
225
+ ```apache
226
+ <match dummy>
227
+ ...
228
+ table accesslog_%Y_%m@timestamp
229
+ ...
230
+ </match>
231
+ ```
232
+
233
+ If attribute name is given, the time to be used for formatting is value of each row.
234
+ The value for the time should be a UNIX time.
225
235
 
226
236
  ### Dynamic table creating
227
237
 
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
25
25
 
26
26
  spec.add_runtime_dependency "google-api-client", "~> 0.8.0"
27
- spec.add_runtime_dependency "googleauth"
27
+ spec.add_runtime_dependency "googleauth", ">= 0.5.0"
28
28
  spec.add_runtime_dependency "fluentd"
29
29
  spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
30
30
  spec.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
@@ -1,6 +1,6 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.2.12"
3
+ VERSION = "0.2.13"
4
4
  end
5
5
  end
6
6
 
@@ -83,6 +83,8 @@ module Fluent
83
83
  config_param :replace_record_key, :bool, default: false
84
84
  (1..REGEXP_MAX_NUM).each {|i| config_param :"replace_record_key_regexp#{i}", :string, default: nil }
85
85
 
86
+ config_param :convert_hash_to_json, :bool, default: false
87
+
86
88
  config_param :time_format, :string, default: nil
87
89
  config_param :localtime, :bool, default: nil
88
90
  config_param :utc, :bool, default: nil
@@ -255,11 +257,11 @@ module Fluent
255
257
  when 'json_key'
256
258
  if File.exist?(@json_key)
257
259
  auth = File.open(@json_key) do |f|
258
- Google::Auth::ServiceAccountCredentials.new(json_key_io: f, scope: scope)
260
+ Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: scope)
259
261
  end
260
262
  else
261
263
  key = StringIO.new(@json_key)
262
- auth = Google::Auth::ServiceAccountCredentials.new(json_key_io: key, scope: scope)
264
+ auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
263
265
  end
264
266
 
265
267
  when 'application_default'
@@ -276,8 +278,16 @@ module Fluent
276
278
  @cached_client = client
277
279
  end
278
280
 
279
- def generate_table_id(table_id_format, current_time)
280
- current_time.strftime(table_id_format)
281
+ def generate_table_id(table_id_format, current_time, row)
282
+ format, col = table_id_format.split(/@/)
283
+ time = if col && row
284
+ keys = col.split('.')
285
+ t = keys.inject(row['json']) {|obj, attr| obj[attr] }
286
+ Time.at(t)
287
+ else
288
+ current_time
289
+ end
290
+ time.strftime(format)
281
291
  end
282
292
 
283
293
  def create_table(table_id)
@@ -318,8 +328,7 @@ module Fluent
318
328
  end
319
329
  end
320
330
 
321
- def insert(table_id_format, rows)
322
- table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
331
+ def insert(table_id, rows)
323
332
  res = client().execute(
324
333
  api_method: @bq.tabledata.insert_all,
325
334
  parameters: {
@@ -367,6 +376,15 @@ module Fluent
367
376
  new_record
368
377
  end
369
378
 
379
+ def convert_hash_to_json(record)
380
+ record.each do |key, value|
381
+ if value.class == Hash
382
+ record[key] = value.to_json
383
+ end
384
+ end
385
+ record
386
+ end
387
+
370
388
  def format_stream(tag, es)
371
389
  super
372
390
  buf = ''
@@ -375,6 +393,10 @@ module Fluent
375
393
  record = replace_record_key(record)
376
394
  end
377
395
 
396
+ if @convert_hash_to_json
397
+ record = convert_hash_to_json(record)
398
+ end
399
+
378
400
  row = @fields.format(@add_time_field.call(record, time))
379
401
  unless row.empty?
380
402
  row = {"json" => row}
@@ -394,17 +416,20 @@ module Fluent
394
416
 
395
417
  # TODO: method
396
418
 
397
- insert_table = @tables_mutex.synchronize do
419
+ insert_table_format = @tables_mutex.synchronize do
398
420
  t = @tables_queue.shift
399
421
  @tables_queue.push t
400
422
  t
401
423
  end
402
- insert(insert_table, rows)
424
+
425
+ rows.group_by {|row| generate_table_id(insert_table_format, Time.at(Fluent::Engine.now), row) }.each do |table_id, rows|
426
+ insert(table_id, rows)
427
+ end
403
428
  end
404
429
 
405
430
  def fetch_schema
406
431
  table_id_format = @tablelist[0]
407
- table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
432
+ table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now), nil)
408
433
  res = client.execute(
409
434
  api_method: @bq.tables.get,
410
435
  parameters: {
@@ -105,7 +105,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
105
105
  json_key_path = 'test/plugin/testdata/json_key.json'
106
106
  authorization = Object.new
107
107
  mock(authorization).fetch_access_token!
108
- mock(Google::Auth::ServiceAccountCredentials).new(json_key_io: File.open(json_key_path), scope: API_SCOPE) { authorization }
108
+ mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: File.open(json_key_path), scope: API_SCOPE) { authorization }
109
109
 
110
110
  mock.proxy(Google::APIClient).new.with_any_args {
111
111
  mock!.__send__(:authorization=, authorization) {}
@@ -128,7 +128,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
128
128
  mock(StringIO).new(json_key) { json_key_io }
129
129
  authorization = Object.new
130
130
  mock(authorization).fetch_access_token!
131
- mock(Google::Auth::ServiceAccountCredentials).new(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
131
+ mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
132
132
 
133
133
  mock.proxy(Google::APIClient).new.with_any_args {
134
134
  mock!.__send__(:authorization=, authorization) {}
@@ -820,6 +820,60 @@ class BigQueryOutputTest < Test::Unit::TestCase
820
820
  assert_equal expected, MessagePack.unpack(buf)
821
821
  end
822
822
 
823
+ def test_convert_hash_to_json
824
+ now = Time.now
825
+ input = [
826
+ now,
827
+ {
828
+ "vhost" => :bar,
829
+ "referer" => "http://referer.example",
830
+ "bot_access" => true,
831
+ "loginsession" => false,
832
+ "remote" => {
833
+ "host" => "remote.example",
834
+ "ip" => "192.0.2.1",
835
+ "port" => 12345,
836
+ "user" => "tagomoris",
837
+ }
838
+ }
839
+ ]
840
+ expected = {
841
+ "json" => {
842
+ "time" => now.to_i,
843
+ "vhost" => "bar",
844
+ "referer" => "http://referer.example",
845
+ "bot_access" => true,
846
+ "loginsession" => false,
847
+ "remote" => "{\"host\":\"remote.example\",\"ip\":\"192.0.2.1\",\"port\":12345,\"user\":\"tagomoris\"}"
848
+ }
849
+ }
850
+
851
+ driver = create_driver(<<-CONFIG)
852
+ table foo
853
+ email foo@bar.example
854
+ private_key_path /path/to/key
855
+ project yourproject_id
856
+ dataset yourdataset_id
857
+
858
+ convert_hash_to_json true
859
+
860
+ time_format %s
861
+ time_field time
862
+
863
+ field_integer time
864
+ field_string vhost, referer, remote
865
+ field_boolean bot_access, loginsession
866
+ CONFIG
867
+ mock_client(driver) do |expect|
868
+ expect.discovered_api("bigquery", "v2") { stub! }
869
+ end
870
+ driver.instance.start
871
+ buf = driver.instance.format_stream("my.tag", [input])
872
+ driver.instance.shutdown
873
+
874
+ assert_equal expected, MessagePack.unpack(buf)
875
+ end
876
+
823
877
  def test_write
824
878
  entry = {"json" => {"a" => "b"}}, {"json" => {"b" => "c"}}
825
879
  driver = create_driver(CONFIG)
@@ -846,14 +900,90 @@ class BigQueryOutputTest < Test::Unit::TestCase
846
900
  driver.instance.shutdown
847
901
  end
848
902
 
849
- def test_generate_table_id
903
+ def test_write_with_row_based_table_id_formatting
904
+ entry = [
905
+ {"json" => {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).to_i}},
906
+ {"json" => {"b" => "c", "created_at" => Time.local(2014,8,21,9,0,0).to_i}}
907
+ ]
908
+ driver = create_driver(<<-CONFIG)
909
+ table foo_%Y_%m_%d@created_at
910
+ email foo@bar.example
911
+ private_key_path /path/to/key
912
+ project yourproject_id
913
+ dataset yourdataset_id
914
+
915
+ time_format %s
916
+ time_field time
917
+
918
+ field_integer time,status,bytes
919
+ field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
920
+ field_float requesttime
921
+ field_boolean bot_access,loginsession
922
+ CONFIG
923
+ mock_client(driver) do |expect|
924
+ expect.discovered_api("bigquery", "v2") { mock!.tabledata.times(2).mock!.insert_all.times(2) { Object.new } }
925
+
926
+ expect.execute(
927
+ :api_method => anything,
928
+ :parameters => {
929
+ 'projectId' => 'yourproject_id',
930
+ 'datasetId' => 'yourdataset_id',
931
+ 'tableId' => 'foo_2014_08_20',
932
+ },
933
+ :body_object => {
934
+ 'rows' => [entry[0]]
935
+ }
936
+ ) { stub!.success? { true } }
937
+
938
+ expect.execute(
939
+ :api_method => anything,
940
+ :parameters => {
941
+ 'projectId' => 'yourproject_id',
942
+ 'datasetId' => 'yourdataset_id',
943
+ 'tableId' => 'foo_2014_08_21',
944
+ },
945
+ :body_object => {
946
+ 'rows' => [entry[1]]
947
+ }
948
+ ) { stub!.success? { true } }
949
+ end
950
+
951
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
952
+ entry.each do |object|
953
+ chunk << object.to_msgpack
954
+ end
955
+
956
+ driver.instance.start
957
+ driver.instance.write(chunk)
958
+ driver.instance.shutdown
959
+ end
960
+
961
+ def test_generate_table_id_without_row
850
962
  driver = create_driver
851
963
  table_id_format = 'foo_%Y_%m_%d'
852
964
  time = Time.local(2014, 8, 11, 21, 20, 56)
853
- table_id = driver.instance.generate_table_id(table_id_format, time)
965
+ table_id = driver.instance.generate_table_id(table_id_format, time, nil)
854
966
  assert_equal 'foo_2014_08_11', table_id
855
967
  end
856
968
 
969
+ def test_generate_table_id_with_row
970
+ driver = create_driver
971
+ table_id_format = 'foo_%Y_%m_%d@created_at'
972
+ time = Time.local(2014, 8, 11, 21, 20, 56)
973
+ row = { "json" => { "created_at" => Time.local(2014,8,10,21,20,57).to_i } }
974
+ table_id = driver.instance.generate_table_id(table_id_format, time, row)
975
+ assert_equal 'foo_2014_08_10', table_id
976
+ end
977
+
978
+ def test_generate_table_id_with_row_nested_attribute
979
+ driver = create_driver
980
+ table_id_format = 'foo_%Y_%m_%d@foo.bar.created_at'
981
+ time = Time.local(2014, 8, 11, 21, 20, 56)
982
+ row = { "json" => { "foo" => { "bar" => { "created_at" => Time.local(2014,8,10,21,20,57).to_i } } } }
983
+ table_id = driver.instance.generate_table_id(table_id_format, time, row)
984
+ assert_equal 'foo_2014_08_10', table_id
985
+ end
986
+
857
987
  def test_auto_create_table_by_bigquery_api
858
988
  now = Time.now
859
989
  message = {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-20 00:00:00.000000000 Z
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: 0.5.0
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: 0.5.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: fluentd
99
99
  requirement: !ruby/object:Gem::Requirement