RubyGems - fluent-plugin-bigquery - Versions diffs - 0.1.0 → 0.2.0 - Mend

fluent-plugin-bigquery 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +31 -7
data/fluent-plugin-bigquery.gemspec +3 -3
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/out_bigquery.rb +42 -3
data/test/plugin/test_out_bigquery.rb +107 -0
metadata +5 -6

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: aaf6a32f582cc409d6f2bf4b6177baa8b1ebfa0a
-  data.tar.gz: ee9b47714b0875f5a6ddb5e9e4f3cbe9c6961c62
+  metadata.gz: bc7d16f7400a52c0cbf655c22b6c919572142182
+  data.tar.gz: 7c83037e999617f73a0fc6a51d41167a979b72a9
 SHA512:
-  metadata.gz: 6509396ad1c0f373dfdea2a0cd8a98f9804382404c743d3db4d4f2dfd431d5b3bd2488eebf1d752aa5035a68da4d41a043ec833c459227c63d08bf7554420013
-  data.tar.gz: 2331e007cde55dd311b6cd19ab3fdad75dbd05b41303066adb22c167c6edb554c07173540a2e4f19a1471012a3d0c898dd1fa0ae1be4d497c274281b6a4fa24c
+  metadata.gz: a25328702bfad9ddff282cbcd9c8d84d64e2857158c1bc7a20ce388610bb5150e969ac0f07ebd1162b01ffca29b50cf920a28c3806715512cad4a9ff6cbe4d5c
+  data.tar.gz: 9701a7f10f17e0f7dbaf61bca99d6f73e1b769deba6361935cc413c1bbc5039349c626a84309f57919e884228cc3084eb38d1fdad83fde99505a68509d171145

data/README.md CHANGED

@@ -165,10 +165,11 @@ because there is a time lag between collection and transmission of logs.
 ### Table schema
-There are two methods to describe the schema of the target table.
+There are three methods to describe the schema of the target table.
 1. List fields in fluent.conf
 2. Load a schema file in JSON.
+3. Fetch a schema using BigQuery API
 The examples above use the first method.  In this method,
 you can also specify nested fields by prefixing their belonging record fields.
@@ -176,12 +177,12 @@ you can also specify nested fields by prefixing their belonging record fields.
 ```apache
 <match dummy>
   type bigquery
   ...
   time_format %s
   time_field  time
   field_integer time,response.status,response.bytes
   field_string  request.vhost,request.path,request.method,request.protocol,request.agent,request.referer,remote.host,remote.ip,remote.user
   field_float   request.time
@@ -215,20 +216,38 @@ The second method is to specify a path to a BigQuery schema file instead of list
 ```apache
 <match dummy>
   type bigquery
   ...
   time_format %s
   time_field  time
   schema_path /path/to/httpd.schema
   field_integer time
 </match>
 ```
 where /path/to/httpd.schema is a path to the JSON-encoded schema file which you used for creating the table on BigQuery.
+The third method is to set `fetch_schema` to `true` to enable fetch a schema using BigQuery API.  In this case, your fluent.conf looks like:
+```apache
+<match dummy>
+  type bigquery
+  ...
+  time_format %s
+  time_field  time
+  fetch_schema true
+  field_integer time
+</match>
+```
+If you specify multiple talbe in configuration file, plugin get all schema data from BigQuery and merge it.
 NOTE: Since JSON does not define how to encode data of TIMESTAMP type,
-you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example.
+you are still recommended to specify JSON types for TIMESTAMP fields as "time" field does in the example, if you use second or third method.
 ## TODO
@@ -240,3 +259,8 @@ you are still recommended to specify JSON types for TIMESTAMP fields as "time" f
 * Google API discovery expiration
 * Error classes
 * check row size limits
+## Authors
+* @tagomoris: First author, original version
+* KAIZEN platform Inc.: Maintener, Since 2014.08.19

data/fluent-plugin-bigquery.gemspec CHANGED

@@ -6,11 +6,11 @@ require 'fluent/plugin/bigquery/version'
 Gem::Specification.new do |spec|
   spec.name          = "fluent-plugin-bigquery"
   spec.version       = Fluent::BigQueryPlugin::VERSION
-  spec.authors       = ["TAGOMORI Satoshi"]
-  spec.email         = ["tagomoris@gmail.com"]
+  spec.authors       = ["Naoya Ito"]
+  spec.email         = ["i.naoya@gmail.com"]
   spec.description   = %q{Fluentd plugin to store data on Google BigQuery, by load, or by stream inserts}
   spec.summary       = %q{Fluentd plugin to store data on Google BigQuery}
-  spec.homepage      = "https://github.com/tagomoris/fluent-plugin-bigquery"
+  spec.homepage      = "https://github.com/kaizenplatform/fluent-plugin-bigquery"
   spec.license       = "APLv2"
   spec.files         = `git ls-files`.split($/)

data/lib/fluent/plugin/bigquery/version.rb CHANGED

@@ -1,6 +1,6 @@
 module Fluent
   module BigQueryPlugin
-    VERSION = "0.1.0"
+    VERSION = "0.2.0"
   end
 end

data/lib/fluent/plugin/out_bigquery.rb CHANGED

@@ -62,6 +62,7 @@ module Fluent
     config_param :tables, :string, :default => nil
     config_param :schema_path, :string, :default => nil
+    config_param :fetch_schema, :bool, :default => false
     config_param :field_string,  :string, :default => nil
     config_param :field_integer, :string, :default => nil
     config_param :field_float,   :string, :default => nil
@@ -201,6 +202,8 @@ module Fluent
       @tables_queue = @tablelist.dup.shuffle
       @tables_mutex = Mutex.new
+      fetch_schema() if @fetch_schema
     end
     def shutdown
@@ -270,7 +273,7 @@ module Fluent
             log.warn "Parse error: google api error response body", :body => res.body
           end
         end
-        log.error "tabledata.insertAll API", :project_id => @project_id, :dataset => @dataset_id, :table => table_id, :code => res.status, :message => message
+        log.error "tabledata.insertAll API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
         raise "failed to insert into bigquery" # TODO: error class
       end
     end
@@ -307,6 +310,40 @@ module Fluent
       insert(insert_table, rows)
     end
+    def fetch_schema
+      table_id = @tablelist[0]
+      res = client.execute(
+        :api_method => @bq.tables.get,
+        :parameters => {
+          'projectId' => @project,
+          'datasetId' => @dataset,
+          'tableId' => table_id,
+        }
+      )
+      unless res.success?
+        # api_error? -> client cache clear
+        @cached_client = nil
+        message = res.body
+        if res.body =~ /^\{/
+          begin
+            res_obj = JSON.parse(res.body)
+            message = res_obj['error']['message'] || res.body
+          rescue => e
+            log.warn "Parse error: google api error response body", :body => res.body
+          end
+        end
+        log.error "tables.get API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => res.status, :message => message
+        raise "failed to fetch schema from bigquery" # TODO: error class
+      end
+      res_obj = JSON.parse(res.body)
+      schema = res_obj['schema']['fields']
+      log.debug "Load schema from BigQuery: #{@project}:#{@dataset}.#{table_id} #{schema}"
+      @fields.load_schema(schema, false)
+    end
     # def client_oauth # not implemented
     #   raise NotImplementedError, "OAuth needs browser authentication..."
     #
@@ -434,7 +471,7 @@ module Fluent
         @fields[name]
       end
-      def load_schema(schema)
+      def load_schema(schema, allow_overwrite=true)
         schema.each do |field|
           raise ConfigError, 'field must have type' unless field.key?('type')
@@ -445,11 +482,13 @@ module Fluent
           field_schema_class = FIELD_TYPES[type]
           raise ConfigError, "Invalid field type: #{field['type']}" unless field_schema_class
+          next if @fields.key?(name) and !allow_overwrite
           field_schema = field_schema_class.new(name, mode)
           @fields[name] = field_schema
           if type == :record
             raise ConfigError, "record field must have fields" unless field.key?('fields')
-            field_schema.load_schema(field['fields'])
+            field_schema.load_schema(field['fields'], allow_overwrite)
           end
         end
       end

data/test/plugin/test_out_bigquery.rb CHANGED

@@ -413,6 +413,113 @@ class BigQueryOutputTest < Test::Unit::TestCase
     assert_equal expected, MessagePack.unpack(buf)
   end
+  def test_format_fetch_from_bigquery_api
+    now = Time.now
+    input = [
+      now,
+      {
+        "tty" => nil,
+        "pwd" => "/home/yugui",
+        "user" => "fluentd",
+        "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+      }
+    ]
+    expected = {
+      "json" => {
+        "time" => now.to_i,
+        "pwd" => "/home/yugui",
+        "user" => "fluentd",
+        "argv" => %w[ tail -f /var/log/fluentd/fluentd.log ]
+      }
+    }
+    driver = create_driver(<<-CONFIG)
+      table foo
+      email foo@bar.example
+      private_key_path /path/to/key
+      project yourproject_id
+      dataset yourdataset_id
+      time_format %s
+      time_field  time
+      fetch_schema true
+      field_integer time
+    CONFIG
+    mock_client(driver) do |expect|
+      expect.discovered_api("bigquery", "v2") { mock!.tables.mock!.get { Object.new } }
+      expect.execute(
+        :api_method => anything,
+        :parameters => {
+          'projectId' => 'yourproject_id',
+          'datasetId' => 'yourdataset_id',
+          'tableId' => 'foo'
+        }
+      ) {
+        s = stub!
+        s.success? { true }
+        s.body {
+          JSON.generate({
+            schema: {
+              fields: [
+                {
+                  name: "time",
+                  type: "TIMESTAMP",
+                  mode: "REQUIRED"
+                },
+                {
+                  name: "tty",
+                  type: "STRING",
+                  mode: "NULLABLE"
+                },
+                {
+                  name: "pwd",
+                  type: "STRING",
+                  mode: "REQUIRED"
+                },
+                {
+                  name: "user",
+                  type: "STRING",
+                  mode: "REQUIRED"
+                },
+                {
+                  name: "argv",
+                  type: "STRING",
+                  mode: "REPEATED"
+                }
+              ]
+            }
+          })
+        }
+        s
+      }
+    end
+    driver.instance.start
+    buf = driver.instance.format_stream("my.tag", [input])
+    driver.instance.shutdown
+    fields = driver.instance.instance_eval{ @fields }
+    assert fields["time"]
+    assert_equal :integer, fields["time"].type  # DO NOT OVERWRITE
+    assert_equal :nullable, fields["time"].mode # DO NOT OVERWRITE
+    assert fields["tty"]
+    assert_equal :string, fields["tty"].type
+    assert_equal :nullable, fields["tty"].mode
+    assert fields["pwd"]
+    assert_equal :string, fields["pwd"].type
+    assert_equal :required, fields["pwd"].mode
+    assert fields["user"]
+    assert_equal :string, fields["user"].type
+    assert_equal :required, fields["user"].mode
+    assert fields["argv"]
+    assert_equal :string, fields["argv"].type
+    assert_equal :repeated, fields["argv"].mode
+  end
   def test_empty_value_in_required
     now = Time.now
     input = [

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-bigquery
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
-- TAGOMORI Satoshi
+- Naoya Ito
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-08-18 00:00:00.000000000 Z
+date: 2014-08-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -125,7 +125,7 @@ dependencies:
 description: Fluentd plugin to store data on Google BigQuery, by load, or by stream
   inserts
 email:
-- tagomoris@gmail.com
+- i.naoya@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files: []
@@ -145,7 +145,7 @@ files:
 - test/plugin/testdata/apache.schema
 - test/plugin/testdata/sudo.schema
 - test/test_load_request_body_wrapper.rb
-homepage: https://github.com/tagomoris/fluent-plugin-bigquery
+homepage: https://github.com/kaizenplatform/fluent-plugin-bigquery
 licenses:
 - APLv2
 metadata: {}
@@ -175,4 +175,3 @@ test_files:
 - test/plugin/testdata/apache.schema
 - test/plugin/testdata/sudo.schema
 - test/test_load_request_body_wrapper.rb
-has_rdoc: