logstash-output-google_bigquery 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTk2MDFkYjMwODRkZGUwODE5Yzk3MDhhNDU0MTE5NWNjZjZjYWM2NA==
5
+ data.tar.gz: !binary |-
6
+ NTdjMjA0N2IxOGJiNDY5MjFjMzFlMjMwZWE1MDllODI3OGUxOTBhNA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZjdmODMzNGQzMDRmZDJmNDliNDBiNDJlNDM4Mzc2ZDU0OGE2OTczYmM5ZjI0
10
+ NWJjZjdiOTdhNzkxNjQwZTFlMTEyYWUxZmNmNzU2OWQ4MGNjZjc2NjBlODQ1
11
+ MTliMzQwNWM5NDk5MjBhMzBlMjE1ZTgyZTNmMTdmYTg3NGQ5MjY=
12
+ data.tar.gz: !binary |-
13
+ YjU0MWQ1MTA0OTMzZWQ4YmZlMjU4YWE5MzRiYTAyYTQ2ODA2NzM0NDA1ODhj
14
+ ODU5NzMyN2FiZmUxNWQzOWYzNjdjNzcyMWYyZmEwYzBmMWMwNTRlNWJhYTY4
15
+ NzMzN2EzMzUwNTUzZTRkNzlmMmY5YzQ4MWFjNWY5NTdmYjUzZGI=
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,571 @@
1
+
2
+ # Author: Rodrigo De Castro <rdc@google.com>
3
+ # Date: 2013-09-20
4
+ #
5
+ # Copyright 2013 Google Inc.
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ require "logstash/outputs/base"
19
+ require "logstash/namespace"
20
+
21
+ # Summary: plugin to upload log events to Google BigQuery (BQ), rolling
22
+ # files based on the date pattern provided as a configuration setting. Events
23
+ # are written to files locally and, once file is closed, this plugin uploads
24
+ # it to the configured BigQuery dataset.
25
+ #
26
+ # VERY IMPORTANT:
27
+ # 1 - To make good use of BigQuery, your log events should be parsed and
28
+ # structured. Consider using grok to parse your events into fields that can
29
+ # be uploaded to BQ.
30
+ # 2 - You must configure your plugin so it gets events with the same structure,
31
+ # so the BigQuery schema suits them. In case you want to upload log events
32
+ # with different structures, you can utilize multiple configuration blocks,
33
+ # separating different log events with Logstash conditionals. More details on
34
+ # Logstash conditionals can be found here:
35
+ # http://logstash.net/docs/1.2.1/configuration#conditionals
36
+ #
37
+ # For more info on Google BigQuery, please go to:
38
+ # https://developers.google.com/bigquery/
39
+ #
40
+ # In order to use this plugin, a Google service account must be used. For
41
+ # more information, please refer to:
42
+ # https://developers.google.com/storage/docs/authentication#service_accounts
43
+ #
44
+ # Recommendations:
45
+
46
+ # a - Experiment with the settings depending on how much log data you generate,
47
+ # your needs to see "fresh" data, and how much data you could lose in the event
48
+ # of crash. For instance, if you want to see recent data in BQ quickly, you
49
+ # could configure the plugin to upload data every minute or so (provided you
50
+ # have enough log events to justify that). Note also, that if uploads are too
51
+ # frequent, there is no guarantee that they will be imported in the same order,
52
+ # so later data may be available before earlier data.
53
+
54
+ # b - BigQuery charges for storage and for queries, depending on how much data
55
+ # it reads to perform a query. These are other aspects to consider when
56
+ # considering the date pattern which will be used to create new tables and also
57
+ # how to compose the queries when using BQ. For more info on BigQuery Pricing,
58
+ # please access:
59
+ # https://developers.google.com/bigquery/pricing
60
+ #
61
+ # USAGE:
62
+ # This is an example of logstash config:
63
+ #
64
+ # output {
65
+ # google_bigquery {
66
+ # project_id => "folkloric-guru-278" (required)
67
+ # dataset => "logs" (required)
68
+ # csv_schema => "path:STRING,status:INTEGER,score:FLOAT" (required)
69
+ # key_path => "/path/to/privatekey.p12" (required)
70
+ # key_password => "notasecret" (optional)
71
+ # service_account => "1234@developer.gserviceaccount.com" (required)
72
+ # temp_directory => "/tmp/logstash-bq" (optional)
73
+ # temp_file_prefix => "logstash_bq" (optional)
74
+ # date_pattern => "%Y-%m-%dT%H:00" (optional)
75
+ # flush_interval_secs => 2 (optional)
76
+ # uploader_interval_secs => 60 (optional)
77
+ # deleter_interval_secs => 60 (optional)
78
+ # }
79
+ # }
80
+ #
81
+ # Improvements TODO list:
82
+ # - Refactor common code between Google BQ and GCS plugins.
83
+ # - Turn Google API code into a Plugin Mixin (like AwsConfig).
84
+ # - There's no recover method, so if logstash/plugin crashes, files may not
85
+ # be uploaded to BQ.
86
+ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
87
+ config_name "google_bigquery"
88
+ milestone 1
89
+
90
+ # Google Cloud Project ID (number, not Project Name!).
91
+ config :project_id, :validate => :string, :required => true
92
+
93
+ # BigQuery dataset to which these events will be added to.
94
+ config :dataset, :validate => :string, :required => true
95
+
96
+ # BigQuery table ID prefix to be used when creating new tables for log data.
97
+ # Table name will be <table_prefix>_<date>
98
+ config :table_prefix, :validate => :string, :default => "logstash"
99
+
100
+ # Schema for log data. It must follow this format:
101
+ # <field1-name>:<field1-type>,<field2-name>:<field2-type>,...
102
+ # Example: path:STRING,status:INTEGER,score:FLOAT
103
+ config :csv_schema, :validate => :string, :required => true
104
+
105
+ # Path to private key file for Google Service Account.
106
+ config :key_path, :validate => :string, :required => true
107
+
108
+ # Private key password for service account private key.
109
+ config :key_password, :validate => :string, :default => "notasecret"
110
+
111
+ # Service account to access Google APIs.
112
+ config :service_account, :validate => :string, :required => true
113
+
114
+ # Directory where temporary files are stored.
115
+ # Defaults to /tmp/logstash-bq-<random-suffix>
116
+ config :temp_directory, :validate => :string, :default => ""
117
+
118
+ # Temporary local file prefix. Log file will follow the format:
119
+ # <prefix>_hostname_date.part?.log
120
+ config :temp_file_prefix, :validate => :string, :default => "logstash_bq"
121
+
122
+ # Time pattern for BigQuery table, defaults to hourly tables.
123
+ # Must Time.strftime patterns: www.ruby-doc.org/core-2.0/Time.html#method-i-strftime
124
+ config :date_pattern, :validate => :string, :default => "%Y-%m-%dT%H:00"
125
+
126
+ # Flush interval in seconds for flushing writes to log files. 0 will flush
127
+ # on every message.
128
+ config :flush_interval_secs, :validate => :number, :default => 2
129
+
130
+ # Uploader interval when uploading new files to BigQuery. Adjust time based
131
+ # on your time pattern (for example, for hourly files, this interval can be
132
+ # around one hour).
133
+ config :uploader_interval_secs, :validate => :number, :default => 60
134
+
135
+ # Deleter interval when checking if upload jobs are done for file deletion.
136
+ # This only affects how long files are on the hard disk after the job is done.
137
+ config :deleter_interval_secs, :validate => :number, :default => 60
138
+
139
+ public
140
+ def register
141
+ require 'csv'
142
+ require "fileutils"
143
+ require "thread"
144
+
145
+ @logger.debug("BQ: register plugin")
146
+
147
+ @fields = Array.new
148
+
149
+ CSV.parse(@csv_schema.gsub('\"', '""')).flatten.each do |field|
150
+ temp = field.strip.split(":")
151
+
152
+ # Check that the field in the schema follows the format (<name>:<value>)
153
+ if temp.length != 2
154
+ raise "BigQuery schema must follow the format <field-name>:<field-value>"
155
+ end
156
+
157
+ @fields << { "name" => temp[0], "type" => temp[1] }
158
+ end
159
+
160
+ # Check that we have at least one field in the schema
161
+ if @fields.length == 0
162
+ raise "BigQuery schema must contain at least one field"
163
+ end
164
+
165
+ @json_schema = { "fields" => @fields }
166
+
167
+ @upload_queue = Queue.new
168
+ @delete_queue = Queue.new
169
+ @last_flush_cycle = Time.now
170
+ initialize_temp_directory()
171
+ initialize_current_log()
172
+ initialize_google_client()
173
+ initialize_uploader()
174
+ initialize_deleter()
175
+ end
176
+
177
+ # Method called for each log event. It writes the event to the current output
178
+ # file, flushing depending on flush interval configuration.
179
+ public
180
+ def receive(event)
181
+ return unless output?(event)
182
+
183
+ @logger.debug("BQ: receive method called", :event => event)
184
+
185
+ # Message must be written as json
186
+ message = event.to_json
187
+ # Remove "@" from property names
188
+ message = message.gsub(/\"@(\w+)\"/, '"\1"')
189
+
190
+ new_base_path = get_base_path()
191
+
192
+ # Time to roll file based on the date pattern? Or are we due to upload it to BQ?
193
+ if (@current_base_path != new_base_path || Time.now - @last_file_time >= @uploader_interval_secs)
194
+ @logger.debug("BQ: log file will be closed and uploaded",
195
+ :filename => File.basename(@temp_file.to_path),
196
+ :size => @temp_file.size.to_s,
197
+ :uploader_interval_secs => @uploader_interval_secs.to_s)
198
+ # Close alone does not guarantee that data is physically written to disk,
199
+ # so flushing it before.
200
+ @temp_file.fsync()
201
+ @temp_file.close()
202
+ initialize_next_log()
203
+ end
204
+
205
+ @temp_file.write(message)
206
+ @temp_file.write("\n")
207
+
208
+ sync_log_file()
209
+
210
+ @logger.debug("BQ: event appended to log file",
211
+ :filename => File.basename(@temp_file.to_path))
212
+ end
213
+
214
+ public
215
+ def teardown
216
+ @logger.debug("BQ: teardown method called")
217
+
218
+ @temp_file.flush()
219
+ @temp_file.close()
220
+ end
221
+
222
+ private
223
+ ##
224
+ # Flushes temporary log file every flush_interval_secs seconds or so.
225
+ # This is triggered by events, but if there are no events there's no point
226
+ # flushing files anyway.
227
+ #
228
+ # Inspired by lib/logstash/outputs/file.rb (flush(fd), flush_pending_files)
229
+ def sync_log_file
230
+ if flush_interval_secs <= 0
231
+ @temp_file.fsync
232
+ return
233
+ end
234
+
235
+ return unless Time.now - @last_flush_cycle >= flush_interval_secs
236
+ @temp_file.fsync
237
+ @logger.debug("BQ: flushing file",
238
+ :path => @temp_file.to_path,
239
+ :fd => @temp_file)
240
+ @last_flush_cycle = Time.now
241
+ end
242
+
243
+ ##
244
+ # Creates temporary directory, if it does not exist.
245
+ #
246
+ # A random suffix is appended to the temporary directory
247
+ def initialize_temp_directory
248
+ if @temp_directory.empty?
249
+ require "stud/temporary"
250
+ @temp_directory = Stud::Temporary.directory("logstash-bq")
251
+ @logger.info("BQ: temporary directory generated",
252
+ :directory => @temp_directory)
253
+ end
254
+
255
+ if !(File.directory? @temp_directory)
256
+ @logger.debug("BQ: directory doesn't exist. Creating it.",
257
+ :directory => @temp_directory)
258
+ FileUtils.mkdir_p(@temp_directory)
259
+ end
260
+ end
261
+
262
+ ##
263
+ # Starts thread to delete uploaded log files once their jobs are done.
264
+ #
265
+ # Deleter is done in a separate thread, not holding the receive method above.
266
+ def initialize_deleter
267
+ @uploader = Thread.new do
268
+ @logger.debug("BQ: starting deleter")
269
+ while true
270
+ delete_item = @delete_queue.pop
271
+ job_id = delete_item["job_id"]
272
+ filename = delete_item["filename"]
273
+ job_status = get_job_status(job_id)
274
+ case job_status["state"]
275
+ when "DONE"
276
+ if job_status.has_key?("errorResult")
277
+ @logger.error("BQ: job failed, please enable debug and check full "\
278
+ "response (probably the issue is an incompatible "\
279
+ "schema). NOT deleting local file.",
280
+ :job_id => job_id,
281
+ :filename => filename,
282
+ :job_status => job_status)
283
+ else
284
+ @logger.debug("BQ: job is done, deleting local temporary file ",
285
+ :job_id => job_id,
286
+ :filename => filename,
287
+ :job_status => job_status)
288
+ File.delete(filename)
289
+ end
290
+ when "PENDING", "RUNNING"
291
+ @logger.debug("BQ: job is not done, NOT deleting local file yet.",
292
+ :job_id => job_id,
293
+ :filename => filename,
294
+ :job_status => job_status)
295
+ @delete_queue << delete_item
296
+ else
297
+ @logger.error("BQ: unknown job status, please enable debug and "\
298
+ "check full response (probably the issue is an "\
299
+ "incompatible schema). NOT deleting local file yet.",
300
+ :job_id => job_id,
301
+ :filename => filename,
302
+ :job_status => job_status)
303
+ end
304
+
305
+ sleep @deleter_interval_secs
306
+ end
307
+ end
308
+ end
309
+
310
+ ##
311
+ # Starts thread to upload log files.
312
+ #
313
+ # Uploader is done in a separate thread, not holding the receive method above.
314
+ def initialize_uploader
315
+ @uploader = Thread.new do
316
+ @logger.debug("BQ: starting uploader")
317
+ while true
318
+ filename = @upload_queue.pop
319
+
320
+ # Reenqueue if it is still the current file.
321
+ if filename == @temp_file.to_path
322
+ if @current_base_path == get_base_path()
323
+ if Time.now - @last_file_time < @uploader_interval_secs
324
+ @logger.debug("BQ: reenqueue as log file is being currently appended to.",
325
+ :filename => filename)
326
+ @upload_queue << filename
327
+ # If we got here, it means that older files were uploaded, so let's
328
+ # wait another minute before checking on this file again.
329
+ sleep @uploader_interval_secs
330
+ next
331
+ else
332
+ @logger.debug("BQ: flush and close file to be uploaded.",
333
+ :filename => filename)
334
+ @temp_file.flush()
335
+ @temp_file.close()
336
+ initialize_next_log()
337
+ end
338
+ end
339
+ end
340
+
341
+ if File.size(filename) > 0
342
+ job_id = upload_object(filename)
343
+ @delete_queue << { "filename" => filename, "job_id" => job_id }
344
+ else
345
+ @logger.debug("BQ: skipping empty file.")
346
+ @logger.debug("BQ: delete local temporary file ",
347
+ :filename => filename)
348
+ File.delete(filename)
349
+ end
350
+
351
+ sleep @uploader_interval_secs
352
+ end
353
+ end
354
+ end
355
+
356
+ ##
357
+ # Returns undated path used to construct base path and final full path.
358
+ # This path only includes directory, prefix, and hostname info.
359
+ def get_undated_path
360
+ return @temp_directory + File::SEPARATOR + @temp_file_prefix + "_" +
361
+ Socket.gethostname()
362
+ end
363
+
364
+ ##
365
+ # Returns base path to log file that is invariant regardless of any
366
+ # user options.
367
+ def get_base_path
368
+ return get_undated_path() + "_" + Time.now.strftime(@date_pattern)
369
+ end
370
+
371
+ ##
372
+ # Returns full path to the log file based on global variables (like
373
+ # current_base_path) and configuration options (max file size).
374
+ def get_full_path
375
+ return @current_base_path + ".part" + ("%03d" % @size_counter) + ".log"
376
+ end
377
+
378
+ ##
379
+ # Returns date from a temporary log file name.
380
+ def get_date_pattern(filename)
381
+ match = /^#{get_undated_path()}_(?<date>.*)\.part(\d+)\.log$/.match(filename)
382
+ return match[:date]
383
+ end
384
+
385
+ ##
386
+ # Returns latest part number for a base path. This method checks all existing
387
+ # log files in order to find the highest part number, so this file can be used
388
+ # for appending log events.
389
+ #
390
+ # Only applicable if max file size is enabled.
391
+ def get_latest_part_number(base_path)
392
+ part_numbers = Dir.glob(base_path + ".part*.log").map do |item|
393
+ match = /^.*\.part(?<part_num>\d+).log$/.match(item)
394
+ next if match.nil?
395
+ match[:part_num].to_i
396
+ end
397
+
398
+ return part_numbers.max if part_numbers.any?
399
+ 0
400
+ end
401
+
402
+ ##
403
+ # Opens current log file and updates @temp_file with an instance of IOWriter.
404
+ # This method also adds file to the upload queue.
405
+ def open_current_file()
406
+ path = get_full_path()
407
+ stat = File.stat(path) rescue nil
408
+ if stat and stat.ftype == "fifo" and RUBY_PLATFORM == "java"
409
+ fd = java.io.FileWriter.new(java.io.File.new(path))
410
+ else
411
+ fd = File.new(path, "a")
412
+ end
413
+ @temp_file = IOWriter.new(fd)
414
+ @upload_queue << @temp_file.to_path
415
+ end
416
+
417
+ ##
418
+ # Opens log file on plugin initialization, trying to resume from an existing
419
+ # file. If max file size is enabled, find the highest part number and resume
420
+ # from it.
421
+ def initialize_current_log
422
+ @current_base_path = get_base_path
423
+ @last_file_time = Time.now
424
+ @size_counter = get_latest_part_number(@current_base_path)
425
+ @logger.debug("BQ: resuming from latest part.",
426
+ :part => @size_counter)
427
+ open_current_file()
428
+ end
429
+
430
+ ##
431
+ # Generates new log file name based on configuration options and opens log
432
+ # file. If max file size is enabled, part number if incremented in case the
433
+ # the base log file name is the same (e.g. log file was not rolled given the
434
+ # date pattern).
435
+ def initialize_next_log
436
+ new_base_path = get_base_path
437
+ @size_counter = @current_base_path == new_base_path ? @size_counter + 1 : 0
438
+ @logger.debug("BQ: opening next log file.",
439
+ :filename => @current_base_path,
440
+ :part => @size_counter)
441
+ @current_base_path = new_base_path
442
+ @last_file_time = Time.now
443
+ open_current_file()
444
+ end
445
+
446
+ ##
447
+ # Initializes Google Client instantiating client and authorizing access.
448
+ def initialize_google_client
449
+ require "google/api_client"
450
+ require "openssl"
451
+
452
+ @client = Google::APIClient.new(:application_name =>
453
+ 'Logstash Google BigQuery output plugin',
454
+ :application_version => '0.1')
455
+ @bq = @client.discovered_api('bigquery', 'v2')
456
+
457
+
458
+ key = Google::APIClient::PKCS12.load_key(@key_path, @key_password)
459
+ # Authorization scope reference:
460
+ # https://developers.google.com/bigquery/docs/authorization
461
+ service_account = Google::APIClient::JWTAsserter.new(@service_account,
462
+ 'https://www.googleapis.com/auth/bigquery',
463
+ key)
464
+ @client.authorization = service_account.authorize
465
+ end
466
+
467
+ ##
468
+ # Uploads a local file to the configured bucket.
469
+ def get_job_status(job_id)
470
+ begin
471
+ require 'json'
472
+ @logger.debug("BQ: check job status.",
473
+ :job_id => job_id)
474
+ get_result = @client.execute(:api_method => @bq.jobs.get,
475
+ :parameters => {
476
+ 'jobId' => job_id,
477
+ 'projectId' => @project_id
478
+ })
479
+ response = JSON.parse(get_result.response.body)
480
+ @logger.debug("BQ: successfully invoked API.",
481
+ :response => response)
482
+
483
+ if response.has_key?("error")
484
+ raise response["error"]
485
+ end
486
+
487
+ # Successful invocation
488
+ contents = response["status"]
489
+ return contents
490
+ rescue => e
491
+ @logger.error("BQ: failed to check status", :exception => e)
492
+ # TODO(rdc): limit retries?
493
+ sleep 1
494
+ retry
495
+ end
496
+ end
497
+
498
+ ##
499
+ # Uploads a local file to the configured bucket.
500
+ def upload_object(filename)
501
+ begin
502
+ require 'json'
503
+ table_id = @table_prefix + "_" + get_date_pattern(filename)
504
+ # BQ does not accept anything other than alphanumeric and _
505
+ # Ref: https://developers.google.com/bigquery/browser-tool-quickstart?hl=en
506
+ table_id = table_id.gsub!(':','_').gsub!('-', '_')
507
+
508
+ @logger.debug("BQ: upload object.",
509
+ :filename => filename,
510
+ :table_id => table_id)
511
+ media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
512
+ body = {
513
+ "configuration" => {
514
+ "load" => {
515
+ "sourceFormat" => "NEWLINE_DELIMITED_JSON",
516
+ "schema" => @json_schema,
517
+ "destinationTable" => {
518
+ "projectId" => @project_id,
519
+ "datasetId" => @dataset,
520
+ "tableId" => table_id
521
+ },
522
+ 'createDisposition' => 'CREATE_IF_NEEDED',
523
+ 'writeDisposition' => 'WRITE_APPEND'
524
+ }
525
+ }
526
+ }
527
+ insert_result = @client.execute(:api_method => @bq.jobs.insert,
528
+ :body_object => body,
529
+ :parameters => {
530
+ 'uploadType' => 'multipart',
531
+ 'projectId' => @project_id
532
+ },
533
+ :media => media)
534
+
535
+ job_id = JSON.parse(insert_result.response.body)["jobReference"]["jobId"]
536
+ @logger.debug("BQ: multipart insert",
537
+ :job_id => job_id)
538
+ return job_id
539
+ rescue => e
540
+ @logger.error("BQ: failed to upload file", :exception => e)
541
+ # TODO(rdc): limit retries?
542
+ sleep 1
543
+ retry
544
+ end
545
+ end
546
+ end
547
+
548
+ ##
549
+ # Wrapper class that abstracts which IO being used (for instance, regular
550
+ # files or GzipWriter.
551
+ #
552
+ # Inspired by lib/logstash/outputs/file.rb.
553
+ class IOWriter
554
+ def initialize(io)
555
+ @io = io
556
+ end
557
+ def write(*args)
558
+ @io.write(*args)
559
+ end
560
+ def flush
561
+ @io.flush
562
+ end
563
+ def method_missing(method_name, *args, &block)
564
+ if @io.respond_to?(method_name)
565
+ @io.send(method_name, *args, &block)
566
+ else
567
+ super
568
+ end
569
+ end
570
+ attr_accessor :active
571
+ end
@@ -0,0 +1,27 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-output-google_bigquery'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Plugin to upload log events to Google BigQuery (BQ)"
7
+ s.description = "Plugin to upload log events to Google BigQuery (BQ)"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "output" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+ s.add_runtime_dependency 'google-api-client'
25
+
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,6 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ require 'logstash/outputs/google_bigquery'
4
+
5
+ describe LogStash::Outputs::GoogleBigQuery do
6
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-google_bigquery
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: google-api-client
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ description: Plugin to upload log events to Google BigQuery (BQ)
48
+ email: richard.pijnenburg@elasticsearch.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE
56
+ - Rakefile
57
+ - lib/logstash/outputs/google_bigquery.rb
58
+ - logstash-output-google_bigquery.gemspec
59
+ - rakelib/publish.rake
60
+ - rakelib/vendor.rake
61
+ - spec/outputs/google_bigquery_spec.rb
62
+ homepage: http://logstash.net/
63
+ licenses:
64
+ - Apache License (2.0)
65
+ metadata:
66
+ logstash_plugin: 'true'
67
+ group: output
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.4.1
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Plugin to upload log events to Google BigQuery (BQ)
88
+ test_files:
89
+ - spec/outputs/google_bigquery_spec.rb