logstash-input-google_cloud_storage 0.9.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +9 -0
  3. data/CONTRIBUTING.md +7 -0
  4. data/CONTRIBUTORS +11 -0
  5. data/Gemfile +10 -0
  6. data/LICENSE +11 -0
  7. data/README.md +95 -0
  8. data/docs/configuration.asciidoc +112 -0
  9. data/docs/examples.asciidoc +84 -0
  10. data/docs/index.asciidoc +43 -0
  11. data/docs/metadata.asciidoc +20 -0
  12. data/docs/resources.asciidoc +6 -0
  13. data/lib/logstash-input-google_cloud_storage_jars.rb +38 -0
  14. data/lib/logstash/inputs/cloud_storage/blob_adapter.rb +92 -0
  15. data/lib/logstash/inputs/cloud_storage/blob_filter.rb +99 -0
  16. data/lib/logstash/inputs/cloud_storage/client.rb +87 -0
  17. data/lib/logstash/inputs/cloud_storage/file_reader.rb +59 -0
  18. data/lib/logstash/inputs/cloud_storage/processed_db.rb +53 -0
  19. data/lib/logstash/inputs/google_cloud_storage.rb +130 -0
  20. data/logstash-input-google_cloud_storage.gemspec +49 -0
  21. data/spec/fixtures/credentials.json +8 -0
  22. data/spec/fixtures/helloworld.log +2 -0
  23. data/spec/fixtures/helloworld.log.gz +0 -0
  24. data/spec/inputs/cloud_storage/blob_adapter_spec.rb +95 -0
  25. data/spec/inputs/cloud_storage/blob_filter_spec.rb +92 -0
  26. data/spec/inputs/cloud_storage/client_spec.rb +33 -0
  27. data/spec/inputs/cloud_storage/file_reader_spec.rb +76 -0
  28. data/spec/inputs/cloud_storage/processed_db_spec.rb +82 -0
  29. data/spec/inputs/google_cloud_storage_spec.rb +127 -0
  30. data/vendor/jar-dependencies/com/fasterxml/jackson/core/jackson-core/2.1.3/jackson-core-2.1.3.jar +0 -0
  31. data/vendor/jar-dependencies/com/google/api-client/google-api-client/1.23.0/google-api-client-1.23.0.jar +0 -0
  32. data/vendor/jar-dependencies/com/google/api/api-common/1.5.0/api-common-1.5.0.jar +0 -0
  33. data/vendor/jar-dependencies/com/google/api/gax-httpjson/0.40.0/gax-httpjson-0.40.0.jar +0 -0
  34. data/vendor/jar-dependencies/com/google/api/gax/1.23.0/gax-1.23.0.jar +0 -0
  35. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-common-protos/1.7.0/proto-google-common-protos-1.7.0.jar +0 -0
  36. data/vendor/jar-dependencies/com/google/api/grpc/proto-google-iam-v1/0.8.0/proto-google-iam-v1-0.8.0.jar +0 -0
  37. data/vendor/jar-dependencies/com/google/apis/google-api-services-storage/v1-rev114-1.23.0/google-api-services-storage-v1-rev114-1.23.0.jar +0 -0
  38. data/vendor/jar-dependencies/com/google/auth/google-auth-library-credentials/0.9.0/google-auth-library-credentials-0.9.0.jar +0 -0
  39. data/vendor/jar-dependencies/com/google/auth/google-auth-library-oauth2-http/0.9.0/google-auth-library-oauth2-http-0.9.0.jar +0 -0
  40. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core-http/1.25.0/google-cloud-core-http-1.25.0.jar +0 -0
  41. data/vendor/jar-dependencies/com/google/cloud/google-cloud-core/1.25.0/google-cloud-core-1.25.0.jar +0 -0
  42. data/vendor/jar-dependencies/com/google/cloud/google-cloud-storage/1.25.0/google-cloud-storage-1.25.0.jar +0 -0
  43. data/vendor/jar-dependencies/com/google/code/findbugs/jsr305/3.0.1/jsr305-3.0.1.jar +0 -0
  44. data/vendor/jar-dependencies/com/google/code/gson/gson/2.7/gson-2.7.jar +0 -0
  45. data/vendor/jar-dependencies/com/google/errorprone/error_prone_annotations/2.2.0/error_prone_annotations-2.2.0.jar +0 -0
  46. data/vendor/jar-dependencies/com/google/guava/guava-jdk5/17.0/guava-jdk5-17.0.jar +0 -0
  47. data/vendor/jar-dependencies/com/google/guava/guava/20.0/guava-20.0.jar +0 -0
  48. data/vendor/jar-dependencies/com/google/http-client/google-http-client-appengine/1.23.0/google-http-client-appengine-1.23.0.jar +0 -0
  49. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson/1.23.0/google-http-client-jackson-1.23.0.jar +0 -0
  50. data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson2/1.23.0/google-http-client-jackson2-1.23.0.jar +0 -0
  51. data/vendor/jar-dependencies/com/google/http-client/google-http-client/1.23.0/google-http-client-1.23.0.jar +0 -0
  52. data/vendor/jar-dependencies/com/google/oauth-client/google-oauth-client/1.23.0/google-oauth-client-1.23.0.jar +0 -0
  53. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java-util/3.5.1/protobuf-java-util-3.5.1.jar +0 -0
  54. data/vendor/jar-dependencies/com/google/protobuf/protobuf-java/3.5.1/protobuf-java-3.5.1.jar +0 -0
  55. data/vendor/jar-dependencies/commons-codec/commons-codec/1.3/commons-codec-1.3.jar +0 -0
  56. data/vendor/jar-dependencies/commons-logging/commons-logging/1.1.1/commons-logging-1.1.1.jar +0 -0
  57. data/vendor/jar-dependencies/io/grpc/grpc-context/1.9.0/grpc-context-1.9.0.jar +0 -0
  58. data/vendor/jar-dependencies/io/opencensus/opencensus-api/0.11.1/opencensus-api-0.11.1.jar +0 -0
  59. data/vendor/jar-dependencies/io/opencensus/opencensus-contrib-http-util/0.11.1/opencensus-contrib-http-util-0.11.1.jar +0 -0
  60. data/vendor/jar-dependencies/joda-time/joda-time/2.9.2/joda-time-2.9.2.jar +0 -0
  61. data/vendor/jar-dependencies/org/apache/httpcomponents/httpclient/4.0.1/httpclient-4.0.1.jar +0 -0
  62. data/vendor/jar-dependencies/org/apache/httpcomponents/httpcore/4.0.1/httpcore-4.0.1.jar +0 -0
  63. data/vendor/jar-dependencies/org/codehaus/jackson/jackson-core-asl/1.9.11/jackson-core-asl-1.9.11.jar +0 -0
  64. data/vendor/jar-dependencies/org/threeten/threetenbp/1.3.3/threetenbp-1.3.3.jar +0 -0
  65. metadata +191 -0
@@ -0,0 +1,92 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright 2018 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ require 'java'
17
+ require 'logstash-input-google_cloud_storage_jars.rb'
18
+
19
+ module LogStash
20
+ module Inputs
21
+ module CloudStorage
22
+ # BlobAdapter exposes parts of `com.google.cloud.storage.Blob` for use in
23
+ # the plugin for easier mocking and future-proofing.
24
+ class BlobAdapter
25
+ def initialize(blob)
26
+ @blob = blob
27
+ end
28
+
29
+ def name
30
+ @blob.getName()
31
+ end
32
+
33
+ def attributes
34
+ {
35
+ 'bucket' => @blob.getBucket(),
36
+ 'metadata' => @blob.getMetadata(),
37
+ 'name' => @blob.getName(),
38
+ 'md5' => @blob.getMd5(),
39
+ 'crc32c' => @blob.getCrc32c(),
40
+ 'generation' => @blob.getGeneration()
41
+ }
42
+ end
43
+
44
+ java_import 'com.google.cloud.storage.Blob$BlobSourceOption'
45
+ def delete!
46
+ @blob.delete(BlobSourceOption.generationMatch())
47
+ end
48
+
49
+ def set_metadata!(key, value)
50
+ new_metadata = { key => value }
51
+ @blob.toBuilder().setMetadata(new_metadata).build().update()
52
+ end
53
+
54
+ def metadata
55
+ @blob.getMetadata()
56
+ end
57
+
58
+ def generation
59
+ @blob.getGeneration()
60
+ end
61
+
62
+ def line_attributes(line_number)
63
+ attrs = attributes
64
+
65
+ attrs['line'] = line_number
66
+ attrs['line_id'] = "#{uri}:#{line_number}@#{generation}"
67
+
68
+ attrs
69
+ end
70
+
71
+ def uri
72
+ "gs://#{@blob.getBucket()}/#{name}"
73
+ end
74
+
75
+ java_import 'java.nio.file.Paths'
76
+ def download_to(path)
77
+ temp_path = Paths.get(path)
78
+ @blob.downloadTo(temp_path)
79
+ end
80
+
81
+ def with_downloaded(temp_directory)
82
+ temp_file = ::File.join(temp_directory, SecureRandom.uuid)
83
+ download_to(temp_file)
84
+
85
+ yield temp_file
86
+
87
+ FileUtils.remove_entry_secure(temp_file, true)
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,99 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright 2018 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module LogStash
18
+ module Inputs
19
+ module CloudStorage
20
+ # BlobFilter acts as a .filter for BlobAdapters.
21
+ class BlobFilter
22
+ # Initialize the filter.
23
+ # Throws an exception if the regular expressions could not compile.
24
+ def initialize(logger, include_regex, exclude_regex, metadata_key, processed_db)
25
+ @logger = logger
26
+ @include_regex = compile_regex(include_regex)
27
+ @exclude_regex = compile_regex(exclude_regex)
28
+ @metadata_key = metadata_key
29
+ @processed_db = processed_db
30
+
31
+ @logger.info('Turn on debugging to explain why blobs are filtered.')
32
+ end
33
+
34
+ # should_process? returns true if the blob matches all the
35
+ # user-provided requirements to download and extract events.
36
+ def should_process?(blob)
37
+ @logger.debug("Found blob: #{blob.name}")
38
+
39
+ # Evaluate all conditions because the operations are cheap and give the
40
+ # user a complete idea of why a blob was included/excluded.
41
+ conditions = [
42
+ not_already_run?(blob),
43
+ included?(blob),
44
+ not_excluded?(blob),
45
+ metadata_does_not_exist?(blob)
46
+ ]
47
+
48
+ conditions.all?
49
+ end
50
+
51
+ private
52
+
53
+ def compile_regex(regex)
54
+ Regexp.new(regex)
55
+ rescue StandardError => e
56
+ raise "Could not compile regex '#{regex}': #{e}"
57
+ end
58
+
59
+ def not_already_run?(blob)
60
+ result = @processed_db.nil? || !@processed_db.already_processed?(blob)
61
+
62
+ explain('Not included in ProcessedDB', result)
63
+ end
64
+
65
+ def included?(blob)
66
+ explain('Matches include regex', name_matches(blob.name, @include_regex))
67
+ end
68
+
69
+ def not_excluded?(blob)
70
+ explain('Does not match exclude regex', !name_matches(blob.name, @exclude_regex))
71
+ end
72
+
73
+ def metadata_does_not_exist?(blob)
74
+ # an empty key means the user doesn't want to store/check the metadata
75
+ return true if @metadata_key.empty?
76
+
77
+ has_key = !blob.metadata.nil? && blob.metadata.key?(@metadata_key)
78
+
79
+ explain('Does not have metadata key', !has_key)
80
+ end
81
+
82
+ def explain(message, result)
83
+ pf = result ? 'pass' : 'fail'
84
+
85
+ @logger.debug(" - #{message}? (#{pf})")
86
+
87
+ result
88
+ end
89
+
90
+ def name_matches(name, regex)
91
+ match = regex.match(name)
92
+ return false if match.nil?
93
+
94
+ match.pre_match.empty? && match.post_match.empty?
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,87 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright 2018 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ require 'thread'
17
+ require 'java'
18
+ require 'logstash-input-google_cloud_storage_jars.rb'
19
+ require 'logstash/inputs/cloud_storage/blob_adapter'
20
+
21
+ module LogStash
22
+ module Inputs
23
+ module CloudStorage
24
+ # Client provides all the required transport and authentication setup for the plugin.
25
+ class Client
26
+ def initialize(bucket, json_key_path, logger)
27
+ @logger = logger
28
+ @bucket = bucket
29
+
30
+ # create client
31
+ @storage = initialize_storage json_key_path
32
+ end
33
+
34
+ def list_blobs
35
+ @storage.list(@bucket).iterateAll().each do |blobname|
36
+ yield LogStash::Inputs::CloudStorage::BlobAdapter.new(blobname)
37
+ end
38
+ rescue Java::ComGoogleCloudStorage::StorageException => e
39
+ raise "Error listing bucket contents: #{e}"
40
+ end
41
+
42
+ private
43
+
44
+ def initialize_storage(json_key_path)
45
+ com.google.cloud.storage.StorageOptions.newBuilder()
46
+ .setCredentials(credentials(json_key_path))
47
+ .setHeaderProvider(http_headers)
48
+ .setRetrySettings(retry_settings)
49
+ .build()
50
+ .getService()
51
+ end
52
+
53
+ java_import 'com.google.auth.oauth2.GoogleCredentials'
54
+ def credentials(json_key_path)
55
+ return GoogleCredentials.getApplicationDefault() if json_key_path.empty?
56
+
57
+ key_file = java.io.FileInputStream.new(json_key_path)
58
+ GoogleCredentials.fromStream(key_file)
59
+ end
60
+
61
+ java_import 'com.google.api.gax.rpc.FixedHeaderProvider'
62
+ def http_headers
63
+ gem_name = 'logstash-input-google_cloud_storage'
64
+ gem_version = '1.0.0'
65
+ user_agent = "Elastic/#{gem_name} version/#{gem_version}"
66
+
67
+ FixedHeaderProvider.create({ 'User-Agent' => user_agent })
68
+ end
69
+
70
+ java_import 'com.google.api.gax.retrying.RetrySettings'
71
+ java_import 'org.threeten.bp.Duration'
72
+ def retry_settings
73
+ # backoff values taken from com.google.api.client.util.ExponentialBackOff
74
+ RetrySettings.newBuilder()
75
+ .setInitialRetryDelay(Duration.ofMillis(500))
76
+ .setRetryDelayMultiplier(1.5)
77
+ .setMaxRetryDelay(Duration.ofSeconds(60))
78
+ .setInitialRpcTimeout(Duration.ofSeconds(20))
79
+ .setRpcTimeoutMultiplier(1.5)
80
+ .setMaxRpcTimeout(Duration.ofSeconds(20))
81
+ .setTotalTimeout(Duration.ofMinutes(15))
82
+ .build()
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright 2018 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ require 'zlib'
17
+
18
+ module LogStash
19
+ module Inputs
20
+ module CloudStorage
21
+ # FileReader provides a unified way to read different types of log files
22
+ # with predictable callbacks.
23
+ class FileReader
24
+ # read_lines reads lines from a file one at a time, optionally decoding
25
+ # the file as gzip if decode_gzip is true.
26
+ #
27
+ # Handles files with both UNIX and Windows line endings.
28
+ def self.read_lines(filename, decode_gzip, &block)
29
+ if decode_gzip && gzip?(filename)
30
+ read_gzip_lines(filename, &block)
31
+ else
32
+ read_plain_lines(filename, &block)
33
+ end
34
+ end
35
+
36
+ # gzip? returns true if the given filename has a gzip file extension.
37
+ def self.gzip?(filename)
38
+ filename.end_with? '.gz'
39
+ end
40
+
41
+ def self.read_plain_lines(filename, &block)
42
+ line_num = 1
43
+ ::File.open(filename).each do |line|
44
+ block.call(line.chomp, line_num)
45
+ line_num += 1
46
+ end
47
+ end
48
+
49
+ def self.read_gzip_lines(filename, &block)
50
+ line_num = 1
51
+ Zlib::GzipReader.open(filename).each_line do |line|
52
+ block.call(line.chomp, line_num)
53
+ line_num += 1
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright 2018 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ require 'fileutils'
17
+ require 'digest'
18
+
19
+ module LogStash
20
+ module Inputs
21
+ module CloudStorage
22
+ # ProcessedDb tracks files and generations that have already been processed.
23
+ # File names and generations are concatenated then SHA1 hashed.
24
+ # The directory structure is git-like the first 3 characters of the hash are used
25
+ # as a top level directory, and the rest is stored as a directory name within that.
26
+ # This keeps the directory count manageable.
27
+ class ProcessedDb
28
+ def initialize(db_directory)
29
+ @db_directory = db_directory
30
+ end
31
+
32
+ def already_processed?(blob)
33
+ path = encode_path(blob)
34
+ ::File.exist?(path)
35
+ end
36
+
37
+ def mark_processed(blob)
38
+ path = encode_path(blob)
39
+ FileUtils.mkdir_p(path)
40
+ end
41
+
42
+ def encode_path(blob)
43
+ key = "#{blob.generation}|#{blob.name}"
44
+ encoded = Digest::SHA1.hexdigest(key)
45
+ prefix = encoded.slice(0, 3)
46
+ suffix = encoded.slice(3..-1)
47
+
48
+ ::File.join(@db_directory, prefix, suffix)
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,130 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright 2018 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ require 'logstash/inputs/base'
17
+ require 'logstash/namespace'
18
+ require 'stud/interval'
19
+ require 'logstash/inputs/cloud_storage/client'
20
+ require 'logstash/inputs/cloud_storage/processed_db'
21
+ require 'logstash/inputs/cloud_storage/blob_filter'
22
+ require 'logstash/inputs/cloud_storage/file_reader'
23
+
24
+ # GoogleCloudStorage is an input plugin for LogStash that
25
+ # reads blobs in Cloud Storage buckets.
26
+ class LogStash::Inputs::GoogleCloudStorage < LogStash::Inputs::Base
27
+ config_name 'google_cloud_storage'
28
+
29
+ default :codec, 'plain'
30
+
31
+ # Connection Settings
32
+ config :bucket_id, :validate => :string, :required => true
33
+ config :json_key_file, :validate => :string, :default => nil
34
+ config :interval, :validate => :number, :default => 60
35
+
36
+ # Inclusion/Exclusion Criteria
37
+ config :file_matches, :validate => :string, :default => '.*\\.log(\\.gz)?'
38
+ config :file_exclude, :validate => :string, :default => '^$'
39
+ config :metadata_key, :validate => :string, :default => 'x-goog-meta-ls-gcs-input'
40
+ config :processed_db_path, :validate => :string, :default => nil
41
+
42
+ config :delete, :validate => :boolean, :default => false
43
+ config :unpack_gzip, :validate => :boolean, :default => true
44
+
45
+ # Other Criteria
46
+ config :temp_directory, :validate => :string, :default => File.join(Dir.tmpdir, 'ls-in-gcs')
47
+
48
+ # Accessors for testing
49
+ attr_accessor :event_output_queue
50
+ attr_reader :processed_db
51
+
52
+ def register
53
+ FileUtils.mkdir_p(@temp_directory) unless Dir.exist?(@temp_directory)
54
+
55
+ @client = LogStash::Inputs::CloudStorage::Client.new(@bucket_id, @json_key_file, @logger)
56
+
57
+ if @processed_db_path.nil?
58
+ ls_data = LogStash::SETTINGS.get_value('path.data')
59
+ @processed_db_path = File.join(ls_data, 'plugins', 'inputs', 'google_cloud_storage', 'db')
60
+ end
61
+
62
+ @logger.info("ProcessedDb created in: #{@processed_db_path}")
63
+
64
+ @processed_db = LogStash::Inputs::CloudStorage::ProcessedDb.new(@processed_db_path)
65
+
66
+ @blob_filter = LogStash::Inputs::CloudStorage::BlobFilter.new(@logger, @file_matches, @file_exclude, @metadata_key, @processed_db)
67
+ end
68
+
69
+ def run(queue)
70
+ @event_output_queue = queue
71
+
72
+ @main_plugin_thread = Thread.current
73
+ Stud.interval(@interval) do
74
+ list_download_process
75
+ end
76
+ end
77
+
78
+ # Fetches new files ready to be processed, downloads and processes them and finally
79
+ # runs post-processing steps.
80
+ def list_download_process
81
+ list_processable_blobs do |blob|
82
+ @logger.info("Found matching blob #{blob.uri}")
83
+ download_and_process(blob)
84
+ postprocess(blob)
85
+ end
86
+ end
87
+
88
+ def stop
89
+ # Stud events were started on the main plugin thread so stop all events relative to it.
90
+ Stud.stop!(@main_plugin_thread)
91
+ end
92
+
93
+ private
94
+
95
+ # list_processable_blobs will list blobs in the bucket and yield them if they are not filtered
96
+ def list_processable_blobs
97
+ @logger.info("Fetching blobs from #{@bucket_id}")
98
+ @client.list_blobs do |blob|
99
+ yield blob if @blob_filter.should_process?(blob)
100
+ end
101
+ end
102
+
103
+ def download_and_process(blob)
104
+ @logger.info("Downloading blob #{blob.uri}")
105
+
106
+ blob.with_downloaded(@temp_directory) do |path|
107
+ @logger.info("Reading events from #{blob.uri} (temp file: #{path})")
108
+
109
+ LogStash::Inputs::CloudStorage::FileReader.read_lines(path, @unpack_gzip) do |line, num|
110
+ extract_event(line, num, blob)
111
+ end
112
+ end
113
+ end
114
+
115
+ def postprocess(blob)
116
+ blob.set_metadata!(@metadata_key, 'processed') unless @metadata_key.empty?
117
+
118
+ blob.delete! if @delete
119
+
120
+ @processed_db.mark_processed(blob) unless @processed_db_path.empty?
121
+ end
122
+
123
+ def extract_event(line, line_num, blob)
124
+ @codec.decode(line) do |event|
125
+ decorate(event)
126
+ event.set('[@metadata][gcs]', blob.line_attributes(line_num))
127
+ @event_output_queue << event
128
+ end
129
+ end
130
+ end