fluent-plugin-bigquery 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 24d39b32bcb8f6028618041edda11e1ffd3f6d16
4
- data.tar.gz: a238724b34f64d36f7f319b0d6fc43f578b0dffc
3
+ metadata.gz: 8266112b60355067f61e514946b70ccea8b1eed4
4
+ data.tar.gz: 8bb87ae21e1391c18f51ca4ae6806e5a7e8219ff
5
5
  SHA512:
6
- metadata.gz: 46e8ffbe6007cd2d855114671121285b4fb89b9fab378b59fcc37a5c20bd9f6587a5f8ab96a4f684cf9464d6d9da5b42b7a94f9d74e2cd6fd6769cf9c5f2f5df
7
- data.tar.gz: 1abae1988128f8b2349aa898f1afb5f847e894c2ee88bd34126cb5981ffa0af5dd3fcf74c2e1f27fdc3c1d45814b2b000b932100f94e795ed27e8ab90e9b7894
6
+ metadata.gz: ea18b5361d0789b91bd41c7838a560d1c3207158b69b5f8abc269e44dd020a26b9db6b9e58c24b84114956384cd44b147a9e9f0e0eec4af920a4779e6493bffc
7
+ data.tar.gz: 739c72ae73c4d28354c53e0bae9dc1b2f9126cd52cbe84784a36f2b69fcae42724a01c29688af15c5a3c8b891dcae909725929473c74e51ab1f1e02997ab8fd1
data/.travis.yml CHANGED
@@ -1,8 +1,15 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - 1.9.3
5
- - 2.0.0
6
- - 2.1.0
4
+ - 2.0
5
+ - 2.1
6
+ - 2.2
7
+ - 2.3.0
8
+
9
+ before_install:
10
+ - gem update bundler
11
+
12
+ before_install:
13
+ - gem update bundler
7
14
 
8
15
  script: bundle exec rake test
data/README.md CHANGED
@@ -220,8 +220,18 @@ data is inserted into tables `accesslog_2014_08`, `accesslog_2014_09` and so on.
220
220
  </match>
221
221
  ```
222
222
 
223
- Note that the timestamp of logs and the date in the table id do not always match,
224
- because there is a time lag between collection and transmission of logs.
223
+ The format can be suffixed with attribute name.
224
+
225
+ ```apache
226
+ <match dummy>
227
+ ...
228
+ table accesslog_%Y_%m@timestamp
229
+ ...
230
+ </match>
231
+ ```
232
+
233
+ If attribute name is given, the time to be used for formatting is value of each row.
234
+ The value for the time should be a UNIX time.
225
235
 
226
236
  ### Dynamic table creating
227
237
 
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "test-unit-rr", "~> 1.0.3"
25
25
 
26
26
  spec.add_runtime_dependency "google-api-client", "~> 0.8.0"
27
- spec.add_runtime_dependency "googleauth"
27
+ spec.add_runtime_dependency "googleauth", ">= 0.5.0"
28
28
  spec.add_runtime_dependency "fluentd"
29
29
  spec.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
30
30
  spec.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
@@ -1,6 +1,6 @@
1
1
  module Fluent
2
2
  module BigQueryPlugin
3
- VERSION = "0.2.12"
3
+ VERSION = "0.2.13"
4
4
  end
5
5
  end
6
6
 
@@ -83,6 +83,8 @@ module Fluent
83
83
  config_param :replace_record_key, :bool, default: false
84
84
  (1..REGEXP_MAX_NUM).each {|i| config_param :"replace_record_key_regexp#{i}", :string, default: nil }
85
85
 
86
+ config_param :convert_hash_to_json, :bool, default: false
87
+
86
88
  config_param :time_format, :string, default: nil
87
89
  config_param :localtime, :bool, default: nil
88
90
  config_param :utc, :bool, default: nil
@@ -255,11 +257,11 @@ module Fluent
255
257
  when 'json_key'
256
258
  if File.exist?(@json_key)
257
259
  auth = File.open(@json_key) do |f|
258
- Google::Auth::ServiceAccountCredentials.new(json_key_io: f, scope: scope)
260
+ Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: scope)
259
261
  end
260
262
  else
261
263
  key = StringIO.new(@json_key)
262
- auth = Google::Auth::ServiceAccountCredentials.new(json_key_io: key, scope: scope)
264
+ auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
263
265
  end
264
266
 
265
267
  when 'application_default'
@@ -276,8 +278,16 @@ module Fluent
276
278
  @cached_client = client
277
279
  end
278
280
 
279
- def generate_table_id(table_id_format, current_time)
280
- current_time.strftime(table_id_format)
281
+ def generate_table_id(table_id_format, current_time, row)
282
+ format, col = table_id_format.split(/@/)
283
+ time = if col && row
284
+ keys = col.split('.')
285
+ t = keys.inject(row['json']) {|obj, attr| obj[attr] }
286
+ Time.at(t)
287
+ else
288
+ current_time
289
+ end
290
+ time.strftime(format)
281
291
  end
282
292
 
283
293
  def create_table(table_id)
@@ -318,8 +328,7 @@ module Fluent
318
328
  end
319
329
  end
320
330
 
321
- def insert(table_id_format, rows)
322
- table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
331
+ def insert(table_id, rows)
323
332
  res = client().execute(
324
333
  api_method: @bq.tabledata.insert_all,
325
334
  parameters: {
@@ -367,6 +376,15 @@ module Fluent
367
376
  new_record
368
377
  end
369
378
 
379
+ def convert_hash_to_json(record)
380
+ record.each do |key, value|
381
+ if value.class == Hash
382
+ record[key] = value.to_json
383
+ end
384
+ end
385
+ record
386
+ end
387
+
370
388
  def format_stream(tag, es)
371
389
  super
372
390
  buf = ''
@@ -375,6 +393,10 @@ module Fluent
375
393
  record = replace_record_key(record)
376
394
  end
377
395
 
396
+ if @convert_hash_to_json
397
+ record = convert_hash_to_json(record)
398
+ end
399
+
378
400
  row = @fields.format(@add_time_field.call(record, time))
379
401
  unless row.empty?
380
402
  row = {"json" => row}
@@ -394,17 +416,20 @@ module Fluent
394
416
 
395
417
  # TODO: method
396
418
 
397
- insert_table = @tables_mutex.synchronize do
419
+ insert_table_format = @tables_mutex.synchronize do
398
420
  t = @tables_queue.shift
399
421
  @tables_queue.push t
400
422
  t
401
423
  end
402
- insert(insert_table, rows)
424
+
425
+ rows.group_by {|row| generate_table_id(insert_table_format, Time.at(Fluent::Engine.now), row) }.each do |table_id, rows|
426
+ insert(table_id, rows)
427
+ end
403
428
  end
404
429
 
405
430
  def fetch_schema
406
431
  table_id_format = @tablelist[0]
407
- table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
432
+ table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now), nil)
408
433
  res = client.execute(
409
434
  api_method: @bq.tables.get,
410
435
  parameters: {
@@ -105,7 +105,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
105
105
  json_key_path = 'test/plugin/testdata/json_key.json'
106
106
  authorization = Object.new
107
107
  mock(authorization).fetch_access_token!
108
- mock(Google::Auth::ServiceAccountCredentials).new(json_key_io: File.open(json_key_path), scope: API_SCOPE) { authorization }
108
+ mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: File.open(json_key_path), scope: API_SCOPE) { authorization }
109
109
 
110
110
  mock.proxy(Google::APIClient).new.with_any_args {
111
111
  mock!.__send__(:authorization=, authorization) {}
@@ -128,7 +128,7 @@ class BigQueryOutputTest < Test::Unit::TestCase
128
128
  mock(StringIO).new(json_key) { json_key_io }
129
129
  authorization = Object.new
130
130
  mock(authorization).fetch_access_token!
131
- mock(Google::Auth::ServiceAccountCredentials).new(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
131
+ mock(Google::Auth::ServiceAccountCredentials).make_creds(json_key_io: json_key_io, scope: API_SCOPE) { authorization }
132
132
 
133
133
  mock.proxy(Google::APIClient).new.with_any_args {
134
134
  mock!.__send__(:authorization=, authorization) {}
@@ -820,6 +820,60 @@ class BigQueryOutputTest < Test::Unit::TestCase
820
820
  assert_equal expected, MessagePack.unpack(buf)
821
821
  end
822
822
 
823
+ def test_convert_hash_to_json
824
+ now = Time.now
825
+ input = [
826
+ now,
827
+ {
828
+ "vhost" => :bar,
829
+ "referer" => "http://referer.example",
830
+ "bot_access" => true,
831
+ "loginsession" => false,
832
+ "remote" => {
833
+ "host" => "remote.example",
834
+ "ip" => "192.0.2.1",
835
+ "port" => 12345,
836
+ "user" => "tagomoris",
837
+ }
838
+ }
839
+ ]
840
+ expected = {
841
+ "json" => {
842
+ "time" => now.to_i,
843
+ "vhost" => "bar",
844
+ "referer" => "http://referer.example",
845
+ "bot_access" => true,
846
+ "loginsession" => false,
847
+ "remote" => "{\"host\":\"remote.example\",\"ip\":\"192.0.2.1\",\"port\":12345,\"user\":\"tagomoris\"}"
848
+ }
849
+ }
850
+
851
+ driver = create_driver(<<-CONFIG)
852
+ table foo
853
+ email foo@bar.example
854
+ private_key_path /path/to/key
855
+ project yourproject_id
856
+ dataset yourdataset_id
857
+
858
+ convert_hash_to_json true
859
+
860
+ time_format %s
861
+ time_field time
862
+
863
+ field_integer time
864
+ field_string vhost, referer, remote
865
+ field_boolean bot_access, loginsession
866
+ CONFIG
867
+ mock_client(driver) do |expect|
868
+ expect.discovered_api("bigquery", "v2") { stub! }
869
+ end
870
+ driver.instance.start
871
+ buf = driver.instance.format_stream("my.tag", [input])
872
+ driver.instance.shutdown
873
+
874
+ assert_equal expected, MessagePack.unpack(buf)
875
+ end
876
+
823
877
  def test_write
824
878
  entry = {"json" => {"a" => "b"}}, {"json" => {"b" => "c"}}
825
879
  driver = create_driver(CONFIG)
@@ -846,14 +900,90 @@ class BigQueryOutputTest < Test::Unit::TestCase
846
900
  driver.instance.shutdown
847
901
  end
848
902
 
849
- def test_generate_table_id
903
+ def test_write_with_row_based_table_id_formatting
904
+ entry = [
905
+ {"json" => {"a" => "b", "created_at" => Time.local(2014,8,20,9,0,0).to_i}},
906
+ {"json" => {"b" => "c", "created_at" => Time.local(2014,8,21,9,0,0).to_i}}
907
+ ]
908
+ driver = create_driver(<<-CONFIG)
909
+ table foo_%Y_%m_%d@created_at
910
+ email foo@bar.example
911
+ private_key_path /path/to/key
912
+ project yourproject_id
913
+ dataset yourdataset_id
914
+
915
+ time_format %s
916
+ time_field time
917
+
918
+ field_integer time,status,bytes
919
+ field_string vhost,path,method,protocol,agent,referer,remote.host,remote.ip,remote.user
920
+ field_float requesttime
921
+ field_boolean bot_access,loginsession
922
+ CONFIG
923
+ mock_client(driver) do |expect|
924
+ expect.discovered_api("bigquery", "v2") { mock!.tabledata.times(2).mock!.insert_all.times(2) { Object.new } }
925
+
926
+ expect.execute(
927
+ :api_method => anything,
928
+ :parameters => {
929
+ 'projectId' => 'yourproject_id',
930
+ 'datasetId' => 'yourdataset_id',
931
+ 'tableId' => 'foo_2014_08_20',
932
+ },
933
+ :body_object => {
934
+ 'rows' => [entry[0]]
935
+ }
936
+ ) { stub!.success? { true } }
937
+
938
+ expect.execute(
939
+ :api_method => anything,
940
+ :parameters => {
941
+ 'projectId' => 'yourproject_id',
942
+ 'datasetId' => 'yourdataset_id',
943
+ 'tableId' => 'foo_2014_08_21',
944
+ },
945
+ :body_object => {
946
+ 'rows' => [entry[1]]
947
+ }
948
+ ) { stub!.success? { true } }
949
+ end
950
+
951
+ chunk = Fluent::MemoryBufferChunk.new("my.tag")
952
+ entry.each do |object|
953
+ chunk << object.to_msgpack
954
+ end
955
+
956
+ driver.instance.start
957
+ driver.instance.write(chunk)
958
+ driver.instance.shutdown
959
+ end
960
+
961
+ def test_generate_table_id_without_row
850
962
  driver = create_driver
851
963
  table_id_format = 'foo_%Y_%m_%d'
852
964
  time = Time.local(2014, 8, 11, 21, 20, 56)
853
- table_id = driver.instance.generate_table_id(table_id_format, time)
965
+ table_id = driver.instance.generate_table_id(table_id_format, time, nil)
854
966
  assert_equal 'foo_2014_08_11', table_id
855
967
  end
856
968
 
969
+ def test_generate_table_id_with_row
970
+ driver = create_driver
971
+ table_id_format = 'foo_%Y_%m_%d@created_at'
972
+ time = Time.local(2014, 8, 11, 21, 20, 56)
973
+ row = { "json" => { "created_at" => Time.local(2014,8,10,21,20,57).to_i } }
974
+ table_id = driver.instance.generate_table_id(table_id_format, time, row)
975
+ assert_equal 'foo_2014_08_10', table_id
976
+ end
977
+
978
+ def test_generate_table_id_with_row_nested_attribute
979
+ driver = create_driver
980
+ table_id_format = 'foo_%Y_%m_%d@foo.bar.created_at'
981
+ time = Time.local(2014, 8, 11, 21, 20, 56)
982
+ row = { "json" => { "foo" => { "bar" => { "created_at" => Time.local(2014,8,10,21,20,57).to_i } } } }
983
+ table_id = driver.instance.generate_table_id(table_id_format, time, row)
984
+ assert_equal 'foo_2014_08_10', table_id
985
+ end
986
+
857
987
  def test_auto_create_table_by_bigquery_api
858
988
  now = Time.now
859
989
  message = {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-bigquery
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naoya Ito
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-20 00:00:00.000000000 Z
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: 0.5.0
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: '0'
96
+ version: 0.5.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: fluentd
99
99
  requirement: !ruby/object:Gem::Requirement