logstash-output-google_bigquery 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTk2MDFkYjMwODRkZGUwODE5Yzk3MDhhNDU0MTE5NWNjZjZjYWM2NA==
5
+ data.tar.gz: !binary |-
6
+ NTdjMjA0N2IxOGJiNDY5MjFjMzFlMjMwZWE1MDllODI3OGUxOTBhNA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZjdmODMzNGQzMDRmZDJmNDliNDBiNDJlNDM4Mzc2ZDU0OGE2OTczYmM5ZjI0
10
+ NWJjZjdiOTdhNzkxNjQwZTFlMTEyYWUxZmNmNzU2OWQ4MGNjZjc2NjBlODQ1
11
+ MTliMzQwNWM5NDk5MjBhMzBlMjE1ZTgyZTNmMTdmYTg3NGQ5MjY=
12
+ data.tar.gz: !binary |-
13
+ YjU0MWQ1MTA0OTMzZWQ4YmZlMjU4YWE5MzRiYTAyYTQ2ODA2NzM0NDA1ODhj
14
+ ODU5NzMyN2FiZmUxNWQzOWYzNjdjNzcyMWYyZmEwYzBmMWMwNTRlNWJhYTY4
15
+ NzMzN2EzMzUwNTUzZTRkNzlmMmY5YzQ4MWFjNWY5NTdmYjUzZGI=
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,571 @@
1
+
2
+ # Author: Rodrigo De Castro <rdc@google.com>
3
+ # Date: 2013-09-20
4
+ #
5
+ # Copyright 2013 Google Inc.
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ require "logstash/outputs/base"
19
+ require "logstash/namespace"
20
+
21
+ # Summary: plugin to upload log events to Google BigQuery (BQ), rolling
22
+ # files based on the date pattern provided as a configuration setting. Events
23
+ # are written to files locally and, once file is closed, this plugin uploads
24
+ # it to the configured BigQuery dataset.
25
+ #
26
+ # VERY IMPORTANT:
27
+ # 1 - To make good use of BigQuery, your log events should be parsed and
28
+ # structured. Consider using grok to parse your events into fields that can
29
+ # be uploaded to BQ.
30
+ # 2 - You must configure your plugin so it gets events with the same structure,
31
+ # so the BigQuery schema suits them. In case you want to upload log events
32
+ # with different structures, you can utilize multiple configuration blocks,
33
+ # separating different log events with Logstash conditionals. More details on
34
+ # Logstash conditionals can be found here:
35
+ # http://logstash.net/docs/1.2.1/configuration#conditionals
36
+ #
37
+ # For more info on Google BigQuery, please go to:
38
+ # https://developers.google.com/bigquery/
39
+ #
40
+ # In order to use this plugin, a Google service account must be used. For
41
+ # more information, please refer to:
42
+ # https://developers.google.com/storage/docs/authentication#service_accounts
43
+ #
44
+ # Recommendations:
45
+
46
+ # a - Experiment with the settings depending on how much log data you generate,
47
+ # your needs to see "fresh" data, and how much data you could lose in the event
48
+ # of crash. For instance, if you want to see recent data in BQ quickly, you
49
+ # could configure the plugin to upload data every minute or so (provided you
50
+ # have enough log events to justify that). Note also, that if uploads are too
51
+ # frequent, there is no guarantee that they will be imported in the same order,
52
+ # so later data may be available before earlier data.
53
+
54
+ # b - BigQuery charges for storage and for queries, depending on how much data
55
+ # it reads to perform a query. These are other aspects to consider when
56
+ # considering the date pattern which will be used to create new tables and also
57
+ # how to compose the queries when using BQ. For more info on BigQuery Pricing,
58
+ # please access:
59
+ # https://developers.google.com/bigquery/pricing
60
+ #
61
+ # USAGE:
62
+ # This is an example of logstash config:
63
+ #
64
+ # output {
65
+ # google_bigquery {
66
+ # project_id => "folkloric-guru-278" (required)
67
+ # dataset => "logs" (required)
68
+ # csv_schema => "path:STRING,status:INTEGER,score:FLOAT" (required)
69
+ # key_path => "/path/to/privatekey.p12" (required)
70
+ # key_password => "notasecret" (optional)
71
+ # service_account => "1234@developer.gserviceaccount.com" (required)
72
+ # temp_directory => "/tmp/logstash-bq" (optional)
73
+ # temp_file_prefix => "logstash_bq" (optional)
74
+ # date_pattern => "%Y-%m-%dT%H:00" (optional)
75
+ # flush_interval_secs => 2 (optional)
76
+ # uploader_interval_secs => 60 (optional)
77
+ # deleter_interval_secs => 60 (optional)
78
+ # }
79
+ # }
80
+ #
81
+ # Improvements TODO list:
82
+ # - Refactor common code between Google BQ and GCS plugins.
83
+ # - Turn Google API code into a Plugin Mixin (like AwsConfig).
84
+ # - There's no recover method, so if logstash/plugin crashes, files may not
85
+ # be uploaded to BQ.
86
+ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
87
+ config_name "google_bigquery"
88
+ milestone 1
89
+
90
+ # Google Cloud Project ID (number, not Project Name!).
91
+ config :project_id, :validate => :string, :required => true
92
+
93
+ # BigQuery dataset to which these events will be added to.
94
+ config :dataset, :validate => :string, :required => true
95
+
96
+ # BigQuery table ID prefix to be used when creating new tables for log data.
97
+ # Table name will be <table_prefix>_<date>
98
+ config :table_prefix, :validate => :string, :default => "logstash"
99
+
100
+ # Schema for log data. It must follow this format:
101
+ # <field1-name>:<field1-type>,<field2-name>:<field2-type>,...
102
+ # Example: path:STRING,status:INTEGER,score:FLOAT
103
+ config :csv_schema, :validate => :string, :required => true
104
+
105
+ # Path to private key file for Google Service Account.
106
+ config :key_path, :validate => :string, :required => true
107
+
108
+ # Private key password for service account private key.
109
+ config :key_password, :validate => :string, :default => "notasecret"
110
+
111
+ # Service account to access Google APIs.
112
+ config :service_account, :validate => :string, :required => true
113
+
114
+ # Directory where temporary files are stored.
115
+ # Defaults to /tmp/logstash-bq-<random-suffix>
116
+ config :temp_directory, :validate => :string, :default => ""
117
+
118
+ # Temporary local file prefix. Log file will follow the format:
119
+ # <prefix>_hostname_date.part?.log
120
+ config :temp_file_prefix, :validate => :string, :default => "logstash_bq"
121
+
122
+ # Time pattern for BigQuery table, defaults to hourly tables.
123
+ # Must Time.strftime patterns: www.ruby-doc.org/core-2.0/Time.html#method-i-strftime
124
+ config :date_pattern, :validate => :string, :default => "%Y-%m-%dT%H:00"
125
+
126
+ # Flush interval in seconds for flushing writes to log files. 0 will flush
127
+ # on every message.
128
+ config :flush_interval_secs, :validate => :number, :default => 2
129
+
130
+ # Uploader interval when uploading new files to BigQuery. Adjust time based
131
+ # on your time pattern (for example, for hourly files, this interval can be
132
+ # around one hour).
133
+ config :uploader_interval_secs, :validate => :number, :default => 60
134
+
135
+ # Deleter interval when checking if upload jobs are done for file deletion.
136
+ # This only affects how long files are on the hard disk after the job is done.
137
+ config :deleter_interval_secs, :validate => :number, :default => 60
138
+
139
+ public
140
+ def register
141
+ require 'csv'
142
+ require "fileutils"
143
+ require "thread"
144
+
145
+ @logger.debug("BQ: register plugin")
146
+
147
+ @fields = Array.new
148
+
149
+ CSV.parse(@csv_schema.gsub('\"', '""')).flatten.each do |field|
150
+ temp = field.strip.split(":")
151
+
152
+ # Check that the field in the schema follows the format (<name>:<value>)
153
+ if temp.length != 2
154
+ raise "BigQuery schema must follow the format <field-name>:<field-value>"
155
+ end
156
+
157
+ @fields << { "name" => temp[0], "type" => temp[1] }
158
+ end
159
+
160
+ # Check that we have at least one field in the schema
161
+ if @fields.length == 0
162
+ raise "BigQuery schema must contain at least one field"
163
+ end
164
+
165
+ @json_schema = { "fields" => @fields }
166
+
167
+ @upload_queue = Queue.new
168
+ @delete_queue = Queue.new
169
+ @last_flush_cycle = Time.now
170
+ initialize_temp_directory()
171
+ initialize_current_log()
172
+ initialize_google_client()
173
+ initialize_uploader()
174
+ initialize_deleter()
175
+ end
176
+
177
+ # Method called for each log event. It writes the event to the current output
178
+ # file, flushing depending on flush interval configuration.
179
+ public
180
+ def receive(event)
181
+ return unless output?(event)
182
+
183
+ @logger.debug("BQ: receive method called", :event => event)
184
+
185
+ # Message must be written as json
186
+ message = event.to_json
187
+ # Remove "@" from property names
188
+ message = message.gsub(/\"@(\w+)\"/, '"\1"')
189
+
190
+ new_base_path = get_base_path()
191
+
192
+ # Time to roll file based on the date pattern? Or are we due to upload it to BQ?
193
+ if (@current_base_path != new_base_path || Time.now - @last_file_time >= @uploader_interval_secs)
194
+ @logger.debug("BQ: log file will be closed and uploaded",
195
+ :filename => File.basename(@temp_file.to_path),
196
+ :size => @temp_file.size.to_s,
197
+ :uploader_interval_secs => @uploader_interval_secs.to_s)
198
+ # Close alone does not guarantee that data is physically written to disk,
199
+ # so flushing it before.
200
+ @temp_file.fsync()
201
+ @temp_file.close()
202
+ initialize_next_log()
203
+ end
204
+
205
+ @temp_file.write(message)
206
+ @temp_file.write("\n")
207
+
208
+ sync_log_file()
209
+
210
+ @logger.debug("BQ: event appended to log file",
211
+ :filename => File.basename(@temp_file.to_path))
212
+ end
213
+
214
+ public
215
+ def teardown
216
+ @logger.debug("BQ: teardown method called")
217
+
218
+ @temp_file.flush()
219
+ @temp_file.close()
220
+ end
221
+
222
+ private
223
+ ##
224
+ # Flushes temporary log file every flush_interval_secs seconds or so.
225
+ # This is triggered by events, but if there are no events there's no point
226
+ # flushing files anyway.
227
+ #
228
+ # Inspired by lib/logstash/outputs/file.rb (flush(fd), flush_pending_files)
229
+ def sync_log_file
230
+ if flush_interval_secs <= 0
231
+ @temp_file.fsync
232
+ return
233
+ end
234
+
235
+ return unless Time.now - @last_flush_cycle >= flush_interval_secs
236
+ @temp_file.fsync
237
+ @logger.debug("BQ: flushing file",
238
+ :path => @temp_file.to_path,
239
+ :fd => @temp_file)
240
+ @last_flush_cycle = Time.now
241
+ end
242
+
243
+ ##
244
+ # Creates temporary directory, if it does not exist.
245
+ #
246
+ # A random suffix is appended to the temporary directory
247
+ def initialize_temp_directory
248
+ if @temp_directory.empty?
249
+ require "stud/temporary"
250
+ @temp_directory = Stud::Temporary.directory("logstash-bq")
251
+ @logger.info("BQ: temporary directory generated",
252
+ :directory => @temp_directory)
253
+ end
254
+
255
+ if !(File.directory? @temp_directory)
256
+ @logger.debug("BQ: directory doesn't exist. Creating it.",
257
+ :directory => @temp_directory)
258
+ FileUtils.mkdir_p(@temp_directory)
259
+ end
260
+ end
261
+
262
+ ##
263
+ # Starts thread to delete uploaded log files once their jobs are done.
264
+ #
265
+ # Deleter is done in a separate thread, not holding the receive method above.
266
+ def initialize_deleter
267
+ @uploader = Thread.new do
268
+ @logger.debug("BQ: starting deleter")
269
+ while true
270
+ delete_item = @delete_queue.pop
271
+ job_id = delete_item["job_id"]
272
+ filename = delete_item["filename"]
273
+ job_status = get_job_status(job_id)
274
+ case job_status["state"]
275
+ when "DONE"
276
+ if job_status.has_key?("errorResult")
277
+ @logger.error("BQ: job failed, please enable debug and check full "\
278
+ "response (probably the issue is an incompatible "\
279
+ "schema). NOT deleting local file.",
280
+ :job_id => job_id,
281
+ :filename => filename,
282
+ :job_status => job_status)
283
+ else
284
+ @logger.debug("BQ: job is done, deleting local temporary file ",
285
+ :job_id => job_id,
286
+ :filename => filename,
287
+ :job_status => job_status)
288
+ File.delete(filename)
289
+ end
290
+ when "PENDING", "RUNNING"
291
+ @logger.debug("BQ: job is not done, NOT deleting local file yet.",
292
+ :job_id => job_id,
293
+ :filename => filename,
294
+ :job_status => job_status)
295
+ @delete_queue << delete_item
296
+ else
297
+ @logger.error("BQ: unknown job status, please enable debug and "\
298
+ "check full response (probably the issue is an "\
299
+ "incompatible schema). NOT deleting local file yet.",
300
+ :job_id => job_id,
301
+ :filename => filename,
302
+ :job_status => job_status)
303
+ end
304
+
305
+ sleep @deleter_interval_secs
306
+ end
307
+ end
308
+ end
309
+
310
+ ##
311
+ # Starts thread to upload log files.
312
+ #
313
+ # Uploader is done in a separate thread, not holding the receive method above.
314
+ def initialize_uploader
315
+ @uploader = Thread.new do
316
+ @logger.debug("BQ: starting uploader")
317
+ while true
318
+ filename = @upload_queue.pop
319
+
320
+ # Reenqueue if it is still the current file.
321
+ if filename == @temp_file.to_path
322
+ if @current_base_path == get_base_path()
323
+ if Time.now - @last_file_time < @uploader_interval_secs
324
+ @logger.debug("BQ: reenqueue as log file is being currently appended to.",
325
+ :filename => filename)
326
+ @upload_queue << filename
327
+ # If we got here, it means that older files were uploaded, so let's
328
+ # wait another minute before checking on this file again.
329
+ sleep @uploader_interval_secs
330
+ next
331
+ else
332
+ @logger.debug("BQ: flush and close file to be uploaded.",
333
+ :filename => filename)
334
+ @temp_file.flush()
335
+ @temp_file.close()
336
+ initialize_next_log()
337
+ end
338
+ end
339
+ end
340
+
341
+ if File.size(filename) > 0
342
+ job_id = upload_object(filename)
343
+ @delete_queue << { "filename" => filename, "job_id" => job_id }
344
+ else
345
+ @logger.debug("BQ: skipping empty file.")
346
+ @logger.debug("BQ: delete local temporary file ",
347
+ :filename => filename)
348
+ File.delete(filename)
349
+ end
350
+
351
+ sleep @uploader_interval_secs
352
+ end
353
+ end
354
+ end
355
+
356
+ ##
357
+ # Returns undated path used to construct base path and final full path.
358
+ # This path only includes directory, prefix, and hostname info.
359
+ def get_undated_path
360
+ return @temp_directory + File::SEPARATOR + @temp_file_prefix + "_" +
361
+ Socket.gethostname()
362
+ end
363
+
364
+ ##
365
+ # Returns base path to log file that is invariant regardless of any
366
+ # user options.
367
+ def get_base_path
368
+ return get_undated_path() + "_" + Time.now.strftime(@date_pattern)
369
+ end
370
+
371
+ ##
372
+ # Returns full path to the log file based on global variables (like
373
+ # current_base_path) and configuration options (max file size).
374
+ def get_full_path
375
+ return @current_base_path + ".part" + ("%03d" % @size_counter) + ".log"
376
+ end
377
+
378
+ ##
379
+ # Returns date from a temporary log file name.
380
+ def get_date_pattern(filename)
381
+ match = /^#{get_undated_path()}_(?<date>.*)\.part(\d+)\.log$/.match(filename)
382
+ return match[:date]
383
+ end
384
+
385
+ ##
386
+ # Returns latest part number for a base path. This method checks all existing
387
+ # log files in order to find the highest part number, so this file can be used
388
+ # for appending log events.
389
+ #
390
+ # Only applicable if max file size is enabled.
391
+ def get_latest_part_number(base_path)
392
+ part_numbers = Dir.glob(base_path + ".part*.log").map do |item|
393
+ match = /^.*\.part(?<part_num>\d+).log$/.match(item)
394
+ next if match.nil?
395
+ match[:part_num].to_i
396
+ end
397
+
398
+ return part_numbers.max if part_numbers.any?
399
+ 0
400
+ end
401
+
402
+ ##
403
+ # Opens current log file and updates @temp_file with an instance of IOWriter.
404
+ # This method also adds file to the upload queue.
405
+ def open_current_file()
406
+ path = get_full_path()
407
+ stat = File.stat(path) rescue nil
408
+ if stat and stat.ftype == "fifo" and RUBY_PLATFORM == "java"
409
+ fd = java.io.FileWriter.new(java.io.File.new(path))
410
+ else
411
+ fd = File.new(path, "a")
412
+ end
413
+ @temp_file = IOWriter.new(fd)
414
+ @upload_queue << @temp_file.to_path
415
+ end
416
+
417
+ ##
418
+ # Opens log file on plugin initialization, trying to resume from an existing
419
+ # file. If max file size is enabled, find the highest part number and resume
420
+ # from it.
421
+ def initialize_current_log
422
+ @current_base_path = get_base_path
423
+ @last_file_time = Time.now
424
+ @size_counter = get_latest_part_number(@current_base_path)
425
+ @logger.debug("BQ: resuming from latest part.",
426
+ :part => @size_counter)
427
+ open_current_file()
428
+ end
429
+
430
+ ##
431
+ # Generates new log file name based on configuration options and opens log
432
+ # file. If max file size is enabled, part number if incremented in case the
433
+ # the base log file name is the same (e.g. log file was not rolled given the
434
+ # date pattern).
435
+ def initialize_next_log
436
+ new_base_path = get_base_path
437
+ @size_counter = @current_base_path == new_base_path ? @size_counter + 1 : 0
438
+ @logger.debug("BQ: opening next log file.",
439
+ :filename => @current_base_path,
440
+ :part => @size_counter)
441
+ @current_base_path = new_base_path
442
+ @last_file_time = Time.now
443
+ open_current_file()
444
+ end
445
+
446
+ ##
447
+ # Initializes Google Client instantiating client and authorizing access.
448
+ def initialize_google_client
449
+ require "google/api_client"
450
+ require "openssl"
451
+
452
+ @client = Google::APIClient.new(:application_name =>
453
+ 'Logstash Google BigQuery output plugin',
454
+ :application_version => '0.1')
455
+ @bq = @client.discovered_api('bigquery', 'v2')
456
+
457
+
458
+ key = Google::APIClient::PKCS12.load_key(@key_path, @key_password)
459
+ # Authorization scope reference:
460
+ # https://developers.google.com/bigquery/docs/authorization
461
+ service_account = Google::APIClient::JWTAsserter.new(@service_account,
462
+ 'https://www.googleapis.com/auth/bigquery',
463
+ key)
464
+ @client.authorization = service_account.authorize
465
+ end
466
+
467
+ ##
468
+ # Uploads a local file to the configured bucket.
469
+ def get_job_status(job_id)
470
+ begin
471
+ require 'json'
472
+ @logger.debug("BQ: check job status.",
473
+ :job_id => job_id)
474
+ get_result = @client.execute(:api_method => @bq.jobs.get,
475
+ :parameters => {
476
+ 'jobId' => job_id,
477
+ 'projectId' => @project_id
478
+ })
479
+ response = JSON.parse(get_result.response.body)
480
+ @logger.debug("BQ: successfully invoked API.",
481
+ :response => response)
482
+
483
+ if response.has_key?("error")
484
+ raise response["error"]
485
+ end
486
+
487
+ # Successful invocation
488
+ contents = response["status"]
489
+ return contents
490
+ rescue => e
491
+ @logger.error("BQ: failed to check status", :exception => e)
492
+ # TODO(rdc): limit retries?
493
+ sleep 1
494
+ retry
495
+ end
496
+ end
497
+
498
+ ##
499
+ # Uploads a local file to the configured bucket.
500
+ def upload_object(filename)
501
+ begin
502
+ require 'json'
503
+ table_id = @table_prefix + "_" + get_date_pattern(filename)
504
+ # BQ does not accept anything other than alphanumeric and _
505
+ # Ref: https://developers.google.com/bigquery/browser-tool-quickstart?hl=en
506
+ table_id = table_id.gsub!(':','_').gsub!('-', '_')
507
+
508
+ @logger.debug("BQ: upload object.",
509
+ :filename => filename,
510
+ :table_id => table_id)
511
+ media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
512
+ body = {
513
+ "configuration" => {
514
+ "load" => {
515
+ "sourceFormat" => "NEWLINE_DELIMITED_JSON",
516
+ "schema" => @json_schema,
517
+ "destinationTable" => {
518
+ "projectId" => @project_id,
519
+ "datasetId" => @dataset,
520
+ "tableId" => table_id
521
+ },
522
+ 'createDisposition' => 'CREATE_IF_NEEDED',
523
+ 'writeDisposition' => 'WRITE_APPEND'
524
+ }
525
+ }
526
+ }
527
+ insert_result = @client.execute(:api_method => @bq.jobs.insert,
528
+ :body_object => body,
529
+ :parameters => {
530
+ 'uploadType' => 'multipart',
531
+ 'projectId' => @project_id
532
+ },
533
+ :media => media)
534
+
535
+ job_id = JSON.parse(insert_result.response.body)["jobReference"]["jobId"]
536
+ @logger.debug("BQ: multipart insert",
537
+ :job_id => job_id)
538
+ return job_id
539
+ rescue => e
540
+ @logger.error("BQ: failed to upload file", :exception => e)
541
+ # TODO(rdc): limit retries?
542
+ sleep 1
543
+ retry
544
+ end
545
+ end
546
+ end
547
+
548
+ ##
549
+ # Wrapper class that abstracts which IO being used (for instance, regular
550
+ # files or GzipWriter.
551
+ #
552
+ # Inspired by lib/logstash/outputs/file.rb.
553
+ class IOWriter
554
+ def initialize(io)
555
+ @io = io
556
+ end
557
+ def write(*args)
558
+ @io.write(*args)
559
+ end
560
+ def flush
561
+ @io.flush
562
+ end
563
+ def method_missing(method_name, *args, &block)
564
+ if @io.respond_to?(method_name)
565
+ @io.send(method_name, *args, &block)
566
+ else
567
+ super
568
+ end
569
+ end
570
+ attr_accessor :active
571
+ end
@@ -0,0 +1,27 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-output-google_bigquery'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Plugin to upload log events to Google BigQuery (BQ)"
7
+ s.description = "Plugin to upload log events to Google BigQuery (BQ)"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "output" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+ s.add_runtime_dependency 'google-api-client'
25
+
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,6 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ require 'logstash/outputs/google_bigquery'
4
+
5
+ describe LogStash::Outputs::GoogleBigQuery do
6
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-google_bigquery
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: google-api-client
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ description: Plugin to upload log events to Google BigQuery (BQ)
48
+ email: richard.pijnenburg@elasticsearch.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE
56
+ - Rakefile
57
+ - lib/logstash/outputs/google_bigquery.rb
58
+ - logstash-output-google_bigquery.gemspec
59
+ - rakelib/publish.rake
60
+ - rakelib/vendor.rake
61
+ - spec/outputs/google_bigquery_spec.rb
62
+ homepage: http://logstash.net/
63
+ licenses:
64
+ - Apache License (2.0)
65
+ metadata:
66
+ logstash_plugin: 'true'
67
+ group: output
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.4.1
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Plugin to upload log events to Google BigQuery (BQ)
88
+ test_files:
89
+ - spec/outputs/google_bigquery_spec.rb