RubyGems - fluent-plugin-bigquery - Versions diffs - 0.2.16 → 0.3.0 - Mend

fluent-plugin-bigquery 0.2.16 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.md +76 -3
data/Rakefile +1 -0
data/fluent-plugin-bigquery.gemspec +3 -5
data/lib/fluent/plugin/bigquery/schema.rb +221 -0
data/lib/fluent/plugin/bigquery/version.rb +1 -1
data/lib/fluent/plugin/bigquery/writer.rb +289 -0
data/lib/fluent/plugin/out_bigquery.rb +159 -373
data/test/helper.rb +1 -0
data/test/plugin/test_out_bigquery.rb +470 -142
data/test/plugin/test_record_schema.rb +173 -0
metadata +17 -21

data/lib/fluent/plugin/out_bigquery.rb CHANGED

@@ -5,6 +5,9 @@ require 'fluent/plugin/bigquery/version'
 require 'fluent/mixin/config_placeholders'
 require 'fluent/mixin/plaintextformatter'
+require 'fluent/plugin/bigquery/schema'
+require 'fluent/plugin/bigquery/writer'
 ## TODO: load implementation
 # require 'fluent/plugin/bigquery/load_request_body_wrapper'
@@ -19,36 +22,39 @@ module Fluent
     # https://developers.google.com/bigquery/browser-tool-quickstart
     # https://developers.google.com/bigquery/bigquery-api-quickstart
-    config_set_default :buffer_type, 'lightening'
-    config_set_default :flush_interval, 0.25
-    config_set_default :try_flush_interval, 0.05
+    ### default for insert
+    def configure_for_insert(conf)
+      raise ConfigError unless conf["method"] != "load"
-    config_set_default :buffer_chunk_records_limit, 500
-    config_set_default :buffer_chunk_limit, 1000000
-    config_set_default :buffer_queue_limit, 1024
+      conf["buffer_type"]                = "lightening"  unless conf["buffer_type"]
+      conf["flush_interval"]             = 0.25          unless conf["flush_interval"]
+      conf["try_flush_interval"]         = 0.05          unless conf["try_flush_interval"]
+      conf["buffer_chunk_limit"]         = 1 * 1024 ** 2 unless conf["buffer_chunk_limit"] # 1MB
+      conf["buffer_queue_limit"]         = 1024          unless conf["buffer_queue_limit"]
+      conf["buffer_chunk_records_limit"] = 500           unless conf["buffer_chunk_records_limit"]
+    end
-    ### for loads
-    ### TODO: different default values for buffering between 'load' and insert
-    # config_set_default :flush_interval, 1800 # 30min => 48 imports/day
-    # config_set_default :buffer_chunk_limit, 1000**4 # 1.0*10^12 < 1TB (1024^4)
+    ### default for loads
+    def configure_for_load(conf)
+      raise ConfigError unless conf["method"] == "load"
-    ### OAuth credential
-    # config_param :client_id, :string
-    # config_param :client_secret, :string
+      # buffer_type, flush_interval, try_flush_interval is TimeSlicedOutput default
+      conf["buffer_chunk_limit"] = 1 * 1024 ** 3 unless conf["buffer_chunk_limit"] # 1GB
+      conf["buffer_queue_limit"] = 32            unless conf["buffer_queue_limit"]
+    end
     # Available methods are:
     # * private_key -- Use service account credential from pkcs12 private key file
     # * compute_engine -- Use access token available in instances of ComputeEngine
-    # * private_json_key -- Use service account credential from JSON key
+    # * json_key -- Use service account credential from JSON key
     # * application_default -- Use application default credential
-    config_param :auth_method, :string, default: 'private_key'
+    config_param :auth_method, :enum, list: [:private_key, :compute_engine, :json_key, :application_default], default: :private_key
     ### Service Account credential
     config_param :email, :string, default: nil
     config_param :private_key_path, :string, default: nil
     config_param :private_key_passphrase, :string, default: 'notasecret', secret: true
-    config_param :json_key, default: nil
+    config_param :json_key, default: nil, secret: true
     # see as simple reference
     #   https://github.com/abronte/BigQuery/blob/master/lib/bigquery.rb
@@ -62,12 +68,32 @@ module Fluent
     # table_id
     #   In Table ID, enter a name for your new table. Naming rules are the same as for your dataset.
     config_param :table, :string, default: nil
-    config_param :tables, :string, default: nil
+    config_param :tables, :string, default: nil # TODO: use :array with value_type: :string
+    # template_suffix (only insert)
+    #   https://cloud.google.com/bigquery/streaming-data-into-bigquery#template_table_details
+    config_param :template_suffix, :string, default: nil
     config_param :auto_create_table, :bool, default: false
+    # skip_invalid_rows (only insert)
+    #   Insert all valid rows of a request, even if invalid rows exist.
+    #   The default value is false, which causes the entire request to fail if any invalid rows exist.
+    config_param :skip_invalid_rows, :bool, default: false
+    # max_bad_records (only load)
+    #   The maximum number of bad records that BigQuery can ignore when running the job.
+    #   If the number of bad records exceeds this value, an invalid error is returned in the job result.
+    #   The default value is 0, which requires that all records are valid.
+    config_param :max_bad_records, :integer, default: 0
+    # ignore_unknown_values
+    #   Accept rows that contain values that do not match the schema. The unknown values are ignored.
+    #   Default is false, which treats unknown values as errors.
+    config_param :ignore_unknown_values, :bool, default: false
     config_param :schema_path, :string, default: nil
     config_param :fetch_schema, :bool, default: false
+    config_param :fetch_schema_table, :string, default: nil
+    config_param :schema_cache_expire, :time, default: 600
     config_param :field_string,  :string, default: nil
     config_param :field_integer, :string, default: nil
     config_param :field_float,   :string, default: nil
@@ -90,20 +116,15 @@ module Fluent
     config_param :utc, :bool, default: nil
     config_param :time_field, :string, default: nil
+    # insert_id_field (only insert)
     config_param :insert_id_field, :string, default: nil
+    # prevent_duplicate_load (only load)
+    config_param :prevent_duplicate_load, :bool, default: false
-    config_param :method, :string, default: 'insert' # or 'load'
+    config_param :method, :enum, list: [:insert, :load], default: :insert, skip_accessor: true
-    config_param :load_size_limit, :integer, default: 1000**4 # < 1TB (1024^4) # TODO: not implemented now
-    ### method: 'load'
-    #   https://developers.google.com/bigquery/loading-data-into-bigquery
-    # Maximum File Sizes:
-    # File Type   Compressed   Uncompressed
-    # CSV         1 GB         With new-lines in strings: 4 GB
-    #                          Without new-lines in strings: 1 TB
-    # JSON        1 GB         1 TB
-    config_param :row_size_limit, :integer, default: 100*1000 # < 100KB # configurable in google ?
+    # TODO
+    # config_param :row_size_limit, :integer, default: 100*1000 # < 100KB # configurable in google ?
     # config_param :insert_size_limit, :integer, default: 1000**2 # < 1MB
     # config_param :rows_per_second_limit, :integer, default: 1000 # spike limit
     ### method: ''Streaming data inserts support
@@ -114,6 +135,14 @@ module Fluent
     #                          If you exceed 100 rows per second for an extended period of time, throttling might occur.
     ### Toooooooooooooo short/small per inserts and row!
+    ## Timeout
+    # request_timeout_sec
+    #   Bigquery API response timeout
+    # request_open_timeout_sec
+    #   Bigquery API connection, and request timeout
+    config_param :request_timeout_sec, :time, default: nil
+    config_param :request_open_timeout_sec, :time, default: 60
     ### Table types
     # https://developers.google.com/bigquery/docs/tables
     #
@@ -142,34 +171,36 @@ module Fluent
       Faraday.default_connection.options.timeout = 60
     end
-    # Define `log` method for v0.10.42 or earlier
-    unless method_defined?(:log)
-      define_method("log") { $log }
-    end
     def configure(conf)
+      if conf["method"] == "load"
+        configure_for_load(conf)
+      else
+        configure_for_insert(conf)
+      end
       super
-      if @method == "insert"
+      case @method
+      when :insert
         extend(InsertImplementation)
-      elsif @method == "load"
+      when :load
+        raise Fluent::ConfigError, "'template_suffix' is for only `insert` mode, instead use 'fetch_schema_table' and formatted table name" if @template_suffix
         extend(LoadImplementation)
       else
-        raise Fluend::ConfigError "'method' must be 'insert' or 'load'"
+        raise Fluent::ConfigError "'method' must be 'insert' or 'load'"
       end
       case @auth_method
-      when 'private_key'
+      when :private_key
         unless @email && @private_key_path
           raise Fluent::ConfigError, "'email' and 'private_key_path' must be specified if auth_method == 'private_key'"
         end
-      when 'compute_engine'
+      when :compute_engine
         # Do nothing
-      when 'json_key'
+      when :json_key
         unless @json_key
           raise Fluent::ConfigError, "'json_key' must be specified if auth_method == 'json_key'"
         end
-      when 'application_default'
+      when :application_default
         # Do nothing
       else
         raise Fluent::ConfigError, "unrecognized 'auth_method': #{@auth_method}"
@@ -181,7 +212,7 @@ module Fluent
       @tablelist = @tables ? @tables.split(',') : [@table]
-      @fields = RecordSchema.new('record')
+      @fields = Fluent::BigQuery::RecordSchema.new('record')
       if @schema_path
         @fields.load_schema(MultiJson.load(File.read(@schema_path)))
       end
@@ -232,57 +263,20 @@ module Fluent
     def start
       super
-      @cached_client = nil
-      @cached_client_expiration = nil
       @tables_queue = @tablelist.dup.shuffle
       @tables_mutex = Mutex.new
+      @fetch_schema_mutex = Mutex.new
-      fetch_schema() if @fetch_schema
+      @last_fetch_schema_time = 0
+      fetch_schema(false) if @fetch_schema
     end
-    def client
-      return @cached_client if @cached_client && @cached_client_expiration > Time.now
-      client = Google::Apis::BigqueryV2::BigqueryService.new
-      scope = "https://www.googleapis.com/auth/bigquery"
-      case @auth_method
-      when 'private_key'
-        require 'google/api_client/auth/key_utils'
-        key = Google::APIClient::KeyUtils.load_from_pkcs12(@private_key_path, @private_key_passphrase)
-        auth = Signet::OAuth2::Client.new(
-                token_credential_uri: "https://accounts.google.com/o/oauth2/token",
-                audience: "https://accounts.google.com/o/oauth2/token",
-                scope: scope,
-                issuer: @email,
-                signing_key: key)
-      when 'compute_engine'
-        auth = Google::Auth::GCECredentials.new
-      when 'json_key'
-        if File.exist?(@json_key)
-          auth = File.open(@json_key) do |f|
-            Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: f, scope: scope)
-          end
-        else
-          key = StringIO.new(@json_key)
-          auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
-        end
-      when 'application_default'
-        auth = Google::Auth.get_application_default([scope])
-      else
-        raise ConfigError, "Unknown auth method: #{@auth_method}"
-      end
-      client.authorization = auth
-      @cached_client_expiration = Time.now + 1800
-      @cached_client = client
+    def writer
+      @writer ||= Fluent::BigQuery::Writer.new(@log, @auth_method, {
+        private_key_path: @private_key_path, private_key_passphrase: @private_key_passphrase,
+        email: @email,
+        json_key: @json_key,
+      })
     end
     def generate_table_id(table_id_format, current_time, row = nil, chunk = nil)
@@ -295,7 +289,6 @@ module Fluent
                current_time
              end
       if row && format =~ /\$\{/
-        json = row[:json]
         format.gsub!(/\$\{\s*(\w+)\s*\}/) do |m|
           row[:json][$1.to_sym].to_s.gsub(/[^\w]/, '')
         end
@@ -313,28 +306,6 @@ module Fluent
       end
     end
-    def create_table(table_id)
-      client.insert_table(@project, @dataset, {
-        table_reference: {
-          table_id: table_id,
-        },
-        schema: {
-          fields: @fields.to_a,
-        }
-      }, {})
-    rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-      # api_error? -> client cache clear
-      @cached_client = nil
-      message = e.message
-      if e.status_code == 409 && /Already Exists:/ =~ message
-        # ignore 'Already Exists' error
-        return
-      end
-      log.error "tables.insert API", :project_id => @project, :dataset => @dataset, :table => table_id, :code => e.status_code, :message => message
-      raise "failed to create table in bigquery" # TODO: error class
-    end
     def replace_record_key(record)
       new_record = {}
       record.each do |key, _|
@@ -363,28 +334,42 @@ module Fluent
         @tables_queue.push t
         t
       end
-      _write(chunk, table_id_format)
+      template_suffix_format = @template_suffix
+      _write(chunk, table_id_format, template_suffix_format)
     end
-    def fetch_schema
-      table_id_format = @tablelist[0]
-      table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
-      res = client.get_table(@project, @dataset, table_id)
-      schema = res.schema.fields.as_json
-      log.debug "Load schema from BigQuery: #{@project}:#{@dataset}.#{table_id} #{schema}"
-      @fields.load_schema(schema, false)
-    rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-      # api_error? -> client cache clear
-      @cached_client = nil
-      message = e.message
-      log.error "tables.get API", project_id: @project, dataset: @dataset, table: table_id, code: e.status_code, message: message
-      raise "failed to fetch schema from bigquery" # TODO: error class
+    def fetch_schema(allow_overwrite = true)
+      table_id = nil
+      @fetch_schema_mutex.synchronize do
+        if Fluent::Engine.now - @last_fetch_schema_time > @schema_cache_expire
+          table_id_format = @fetch_schema_table || @tablelist[0]
+          table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now))
+          schema = writer.fetch_schema(@project, @dataset, table_id)
+          if schema
+            if allow_overwrite
+              fields = Fluent::BigQuery::RecordSchema.new("record")
+              fields.load_schema(schema, allow_overwrite)
+              @fields = fields
+            else
+              @fields.load_schema(schema, allow_overwrite)
+            end
+          else
+            if @fields.empty?
+              raise "failed to fetch schema from bigquery"
+            else
+              log.warn "#{table_id} uses previous schema"
+            end
+          end
+          @last_fetch_schema_time = Fluent::Engine.now
+        end
+      end
     end
     module InsertImplementation
       def format(tag, time, record)
-        buf = ''
+        fetch_schema if @template_suffix
         if @replace_record_key
           record = replace_record_key(record)
@@ -394,6 +379,7 @@ module Fluent
           record = convert_hash_to_json(record)
         end
+        buf = String.new
         row = @fields.format(@add_time_field.call(record, time))
         unless row.empty?
           row = {"json" => row}
@@ -403,44 +389,51 @@ module Fluent
         buf
       end
-      def _write(chunk, table_format)
+      def _write(chunk, table_format, template_suffix_format)
         rows = []
         chunk.msgpack_each do |row_object|
           # TODO: row size limit
           rows << row_object.deep_symbolize_keys
         end
-        rows.group_by {|row| generate_table_id(table_format, Time.at(Fluent::Engine.now), row, chunk) }.each do |table_id, group|
-          insert(table_id, group)
+        now = Time.at(Fluent::Engine.now)
+        group = rows.group_by do |row|
+          [
+            generate_table_id(table_format, now, row, chunk),
+            template_suffix_format ? generate_table_id(template_suffix_format, now, row, chunk) : nil,
+          ]
+        end
+        group.each do |(table_id, template_suffix), group_rows|
+          insert(table_id, group_rows, template_suffix)
         end
       end
-      def insert(table_id, rows)
-        client.insert_all_table_data(@project, @dataset, table_id, {
-          rows: rows
-        }, {})
-      rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
-        # api_error? -> client cache clear
-        @cached_client = nil
-        message = e.message
-        if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ message.to_s
+      def insert(table_id, rows, template_suffix)
+        writer.insert_rows(@project, @dataset, table_id, rows, skip_invalid_rows: @skip_invalid_rows, ignore_unknown_values: @ignore_unknown_values, template_suffix: template_suffix)
+      rescue Fluent::BigQuery::Writer::Error => e
+        if @auto_create_table && e.status_code == 404 && /Not Found: Table/i =~ e.message
           # Table Not Found: Auto Create Table
-          create_table(table_id)
+          writer.create_table(@project, @dataset, table_id, @fields)
           raise "table created. send rows next time."
         end
-        log.error "tabledata.insertAll API", project_id: @project, dataset: @dataset, table: table_id, code: e.status_code, message: message
-        raise "failed to insert into bigquery" # TODO: error class
+        if e.retryable?
+          raise e # TODO: error class
+        elsif @secondary
+          flush_secondary(@secondary)
+        end
       end
     end
     module LoadImplementation
       def format(tag, time, record)
-        buf = ''
+        fetch_schema if @fetch_schema_table
         if @replace_record_key
           record = replace_record_key(record)
         end
+        buf = String.new
         row = @fields.format(@add_time_field.call(record, time))
         unless row.empty?
           buf << MultiJson.dump(row) + "\n"
@@ -448,53 +441,37 @@ module Fluent
         buf
       end
-      def _write(chunk, table_id_format)
-        table_id = generate_table_id(table_id_format, Time.at(Fluent::Engine.now), nil, chunk)
+      def _write(chunk, table_id_format, _)
+        now = Time.at(Fluent::Engine.now)
+        table_id = generate_table_id(table_id_format, now, nil, chunk)
         load(chunk, table_id)
       end
       def load(chunk, table_id)
         res = nil
-        create_upload_source(chunk) do |upload_source|
-          res = client.insert_job(@project, {
-            configuration: {
-              load: {
-                destination_table: {
-                  project_id: @project,
-                  dataset_id: @dataset,
-                  table_id: table_id,
-                },
-                schema: {
-                  fields: @fields.to_a,
-                },
-                write_disposition: "WRITE_APPEND",
-                source_format: "NEWLINE_DELIMITED_JSON"
-              }
-            }
-          }, {upload_source: upload_source, content_type: "application/octet-stream"})
-        end
-        wait_load(res, table_id)
-      end
-      private
-      def wait_load(res, table_id)
-        wait_interval = 10
-        _response = res
-        until _response.status.state == "DONE"
-          log.debug "wait for load job finish", state: _response.status.state
-          sleep wait_interval
-          _response = client.get_job(@project, _response.job_reference.job_id)
+        if @prevent_duplicate_load
+          job_id = create_job_id(chunk, @dataset, table_id, @fields.to_a, @max_bad_records, @ignore_unknown_values)
+        else
+          job_id = nil
         end
-        if _response.status.error_result
-          log.error "job.insert API", project_id: @project, dataset: @dataset, table: table_id, message: _response.status.error_result.message
-          raise "failed to load into bigquery"
+        create_upload_source(chunk) do |upload_source|
+          res = writer.create_load_job(@project, @dataset, table_id, upload_source, job_id, @fields, {
+            ignore_unknown_values: @ignore_unknown_values, max_bad_records: @max_bad_records,
+            timeout_sec: @request_timeout_sec,  open_timeout_sec: @request_open_timeout_sec,
+          })
+        end
+      rescue Fluent::BigQuery::Writer::Error => e
+        if e.retryable?
+          raise e
+        elsif @secondary
+          flush_secondary(@secondary)
         end
-        log.debug "finish load job", state: _response.status.state
       end
+      private
       def create_upload_source(chunk)
         chunk_is_file = @buffer_type == 'file'
         if chunk_is_file
@@ -511,200 +488,9 @@ module Fluent
           end
         end
       end
-    end
-    class FieldSchema
-      def initialize(name, mode = :nullable)
-        unless [:nullable, :required, :repeated].include?(mode)
-          raise ConfigError, "Unrecognized mode for #{name}: #{mode}"
-        end
-        ### https://developers.google.com/bigquery/docs/tables
-        # Each field has the following properties:
-        #
-        # name - The name must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
-        #        and must start with a letter or underscore. The maximum length is 128 characters.
-        #        https://cloud.google.com/bigquery/docs/reference/v2/tables#schema.fields.name
-        unless name =~ /^[_A-Za-z][_A-Za-z0-9]{,127}$/
-          raise Fluent::ConfigError, "invalid bigquery field name: '#{name}'"
-        end
-        @name = name
-        @mode = mode
-      end
-      attr_reader :name, :mode
-      def format(value)
-        case @mode
-        when :nullable
-          format_one(value) unless value.nil?
-        when :required
-          raise "Required field #{name} cannot be null" if value.nil?
-          format_one(value)
-        when :repeated
-          value.nil? ? [] : value.map {|v| format_one(v) }
-        end
-      end
-      def format_one(value)
-        raise NotImplementedError, "Must implement in a subclass"
-      end
-      def to_h
-        {
-          :name => name,
-          :type => type.to_s.upcase,
-          :mode => mode.to_s.upcase,
-        }
-      end
-    end
-    class StringFieldSchema < FieldSchema
-      def type
-        :string
-      end
-      def format_one(value)
-        value.to_s
-      end
-    end
-    class IntegerFieldSchema < FieldSchema
-      def type
-        :integer
-      end
-      def format_one(value)
-        value.to_i
-      end
-    end
-    class FloatFieldSchema < FieldSchema
-      def type
-        :float
-      end
-      def format_one(value)
-        value.to_f
-      end
-    end
-    class BooleanFieldSchema < FieldSchema
-      def type
-        :boolean
-      end
-      def format_one(value)
-        !!value
-      end
-    end
-    class TimestampFieldSchema < FieldSchema
-      def type
-        :timestamp
-      end
-      def format_one(value)
-        value
-      end
-    end
-    class RecordSchema < FieldSchema
-      FIELD_TYPES = {
-        string: StringFieldSchema,
-        integer: IntegerFieldSchema,
-        float: FloatFieldSchema,
-        boolean: BooleanFieldSchema,
-        timestamp: TimestampFieldSchema,
-        record: RecordSchema
-      }.freeze
-      def initialize(name, mode = :nullable)
-        super(name, mode)
-        @fields = {}
-      end
-      def type
-        :record
-      end
-      def [](name)
-        @fields[name]
-      end
-      def to_a
-        @fields.map do |_, field_schema|
-          field_schema.to_h
-        end
-      end
-      def to_h
-        {
-          :name => name,
-          :type => type.to_s.upcase,
-          :mode => mode.to_s.upcase,
-          :fields => self.to_a,
-        }
-      end
-      def load_schema(schema, allow_overwrite=true)
-        schema.each do |field|
-          raise ConfigError, 'field must have type' unless field.key?('type')
-          name = field['name']
-          mode = (field['mode'] || 'nullable').downcase.to_sym
-          type = field['type'].downcase.to_sym
-          field_schema_class = FIELD_TYPES[type]
-          raise ConfigError, "Invalid field type: #{field['type']}" unless field_schema_class
-          next if @fields.key?(name) and !allow_overwrite
-          field_schema = field_schema_class.new(name, mode)
-          @fields[name] = field_schema
-          if type == :record
-            raise ConfigError, "record field must have fields" unless field.key?('fields')
-            field_schema.load_schema(field['fields'], allow_overwrite)
-          end
-        end
-      end
-      def register_field(name, type)
-        if @fields.key?(name) and @fields[name].type != :timestamp
-          raise ConfigError, "field #{name} is registered twice"
-        end
-        if name[/\./]
-          recordname = $`
-          fieldname = $'
-          register_record_field(recordname)
-          @fields[recordname].register_field(fieldname, type)
-        else
-          schema = FIELD_TYPES[type]
-          raise ConfigError, "[Bug] Invalid field type #{type}" unless schema
-          @fields[name] = schema.new(name)
-        end
-      end
-      def format_one(record)
-        out = {}
-        @fields.each do |key, schema|
-          value = record[key]
-          formatted = schema.format(value)
-          next if formatted.nil? # field does not exists, or null value
-          out[key] = formatted
-        end
-        out
-      end
-      private
-      def register_record_field(name)
-        if !@fields.key?(name)
-          @fields[name] = RecordSchema.new(name)
-        else
-          unless @fields[name].kind_of?(RecordSchema)
-            raise ConfigError, "field #{name} is required to be a record but already registered as #{@field[name]}"
-          end
-        end
+      def create_job_id(chunk, dataset, table, schema, max_bad_records, ignore_unknown_values)
+        "fluentd_job_" + Digest::SHA1.hexdigest("#{chunk.unique_id}#{dataset}#{table}#{schema.to_s}#{max_bad_records}#{ignore_unknown_values}")
       end
     end
   end