logstash-input-google_cloud_storage 0.9.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +9 -0
- data/CONTRIBUTING.md +7 -0
- data/CONTRIBUTORS +11 -0
- data/Gemfile +10 -0
- data/LICENSE +11 -0
- data/README.md +95 -0
- data/docs/configuration.asciidoc +112 -0
- data/docs/examples.asciidoc +84 -0
- data/docs/index.asciidoc +43 -0
- data/docs/metadata.asciidoc +20 -0
- data/docs/resources.asciidoc +6 -0
- data/lib/logstash-input-google_cloud_storage_jars.rb +38 -0
- data/lib/logstash/inputs/cloud_storage/blob_adapter.rb +92 -0
- data/lib/logstash/inputs/cloud_storage/blob_filter.rb +99 -0
- data/lib/logstash/inputs/cloud_storage/client.rb +87 -0
- data/lib/logstash/inputs/cloud_storage/file_reader.rb +59 -0
- data/lib/logstash/inputs/cloud_storage/processed_db.rb +53 -0
- data/lib/logstash/inputs/google_cloud_storage.rb +130 -0
- data/logstash-input-google_cloud_storage.gemspec +49 -0
- data/spec/fixtures/credentials.json +8 -0
- data/spec/fixtures/helloworld.log +2 -0
- data/spec/fixtures/helloworld.log.gz +0 -0
- data/spec/inputs/cloud_storage/blob_adapter_spec.rb +95 -0
- data/spec/inputs/cloud_storage/blob_filter_spec.rb +92 -0
- data/spec/inputs/cloud_storage/client_spec.rb +33 -0
- data/spec/inputs/cloud_storage/file_reader_spec.rb +76 -0
- data/spec/inputs/cloud_storage/processed_db_spec.rb +82 -0
- data/spec/inputs/google_cloud_storage_spec.rb +127 -0
- data/vendor/jar-dependencies/com/fasterxml/jackson/core/jackson-core/2.1.3/jackson-core-2.1.3.jar +0 -0
- data/vendor/jar-dependencies/com/google/api-client/google-api-client/1.23.0/google-api-client-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/api/api-common/1.5.0/api-common-1.5.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/api/gax-httpjson/0.40.0/gax-httpjson-0.40.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/api/gax/1.23.0/gax-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/api/grpc/proto-google-common-protos/1.7.0/proto-google-common-protos-1.7.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/api/grpc/proto-google-iam-v1/0.8.0/proto-google-iam-v1-0.8.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/apis/google-api-services-storage/v1-rev114-1.23.0/google-api-services-storage-v1-rev114-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/auth/google-auth-library-credentials/0.9.0/google-auth-library-credentials-0.9.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/auth/google-auth-library-oauth2-http/0.9.0/google-auth-library-oauth2-http-0.9.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/cloud/google-cloud-core-http/1.25.0/google-cloud-core-http-1.25.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/cloud/google-cloud-core/1.25.0/google-cloud-core-1.25.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/cloud/google-cloud-storage/1.25.0/google-cloud-storage-1.25.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/code/findbugs/jsr305/3.0.1/jsr305-3.0.1.jar +0 -0
- data/vendor/jar-dependencies/com/google/code/gson/gson/2.7/gson-2.7.jar +0 -0
- data/vendor/jar-dependencies/com/google/errorprone/error_prone_annotations/2.2.0/error_prone_annotations-2.2.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/guava/guava-jdk5/17.0/guava-jdk5-17.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/guava/guava/20.0/guava-20.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/http-client/google-http-client-appengine/1.23.0/google-http-client-appengine-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson/1.23.0/google-http-client-jackson-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/http-client/google-http-client-jackson2/1.23.0/google-http-client-jackson2-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/http-client/google-http-client/1.23.0/google-http-client-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/oauth-client/google-oauth-client/1.23.0/google-oauth-client-1.23.0.jar +0 -0
- data/vendor/jar-dependencies/com/google/protobuf/protobuf-java-util/3.5.1/protobuf-java-util-3.5.1.jar +0 -0
- data/vendor/jar-dependencies/com/google/protobuf/protobuf-java/3.5.1/protobuf-java-3.5.1.jar +0 -0
- data/vendor/jar-dependencies/commons-codec/commons-codec/1.3/commons-codec-1.3.jar +0 -0
- data/vendor/jar-dependencies/commons-logging/commons-logging/1.1.1/commons-logging-1.1.1.jar +0 -0
- data/vendor/jar-dependencies/io/grpc/grpc-context/1.9.0/grpc-context-1.9.0.jar +0 -0
- data/vendor/jar-dependencies/io/opencensus/opencensus-api/0.11.1/opencensus-api-0.11.1.jar +0 -0
- data/vendor/jar-dependencies/io/opencensus/opencensus-contrib-http-util/0.11.1/opencensus-contrib-http-util-0.11.1.jar +0 -0
- data/vendor/jar-dependencies/joda-time/joda-time/2.9.2/joda-time-2.9.2.jar +0 -0
- data/vendor/jar-dependencies/org/apache/httpcomponents/httpclient/4.0.1/httpclient-4.0.1.jar +0 -0
- data/vendor/jar-dependencies/org/apache/httpcomponents/httpcore/4.0.1/httpcore-4.0.1.jar +0 -0
- data/vendor/jar-dependencies/org/codehaus/jackson/jackson-core-asl/1.9.11/jackson-core-asl-1.9.11.jar +0 -0
- data/vendor/jar-dependencies/org/threeten/threetenbp/1.3.3/threetenbp-1.3.3.jar +0 -0
- metadata +191 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
# Copyright 2018 Google LLC
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
require 'java'
|
|
17
|
+
require 'logstash-input-google_cloud_storage_jars.rb'
|
|
18
|
+
|
|
19
|
+
module LogStash
|
|
20
|
+
module Inputs
|
|
21
|
+
module CloudStorage
|
|
22
|
+
# BlobAdapter exposes parts of `com.google.cloud.storage.Blob` for use in
|
|
23
|
+
# the plugin for easier mocking and future-proofing.
|
|
24
|
+
class BlobAdapter
|
|
25
|
+
def initialize(blob)
|
|
26
|
+
@blob = blob
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def name
|
|
30
|
+
@blob.getName()
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def attributes
|
|
34
|
+
{
|
|
35
|
+
'bucket' => @blob.getBucket(),
|
|
36
|
+
'metadata' => @blob.getMetadata(),
|
|
37
|
+
'name' => @blob.getName(),
|
|
38
|
+
'md5' => @blob.getMd5(),
|
|
39
|
+
'crc32c' => @blob.getCrc32c(),
|
|
40
|
+
'generation' => @blob.getGeneration()
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
java_import 'com.google.cloud.storage.Blob$BlobSourceOption'
|
|
45
|
+
def delete!
|
|
46
|
+
@blob.delete(BlobSourceOption.generationMatch())
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def set_metadata!(key, value)
|
|
50
|
+
new_metadata = { key => value }
|
|
51
|
+
@blob.toBuilder().setMetadata(new_metadata).build().update()
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def metadata
|
|
55
|
+
@blob.getMetadata()
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def generation
|
|
59
|
+
@blob.getGeneration()
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def line_attributes(line_number)
|
|
63
|
+
attrs = attributes
|
|
64
|
+
|
|
65
|
+
attrs['line'] = line_number
|
|
66
|
+
attrs['line_id'] = "#{uri}:#{line_number}@#{generation}"
|
|
67
|
+
|
|
68
|
+
attrs
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def uri
|
|
72
|
+
"gs://#{@blob.getBucket()}/#{name}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
java_import 'java.nio.file.Paths'
|
|
76
|
+
def download_to(path)
|
|
77
|
+
temp_path = Paths.get(path)
|
|
78
|
+
@blob.downloadTo(temp_path)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def with_downloaded(temp_directory)
|
|
82
|
+
temp_file = ::File.join(temp_directory, SecureRandom.uuid)
|
|
83
|
+
download_to(temp_file)
|
|
84
|
+
|
|
85
|
+
yield temp_file
|
|
86
|
+
|
|
87
|
+
FileUtils.remove_entry_secure(temp_file, true)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
# Copyright 2018 Google LLC
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
module LogStash
|
|
18
|
+
module Inputs
|
|
19
|
+
module CloudStorage
|
|
20
|
+
# BlobFilter acts as a .filter for BlobAdapters.
|
|
21
|
+
class BlobFilter
|
|
22
|
+
# Initialize the filter.
|
|
23
|
+
# Throws an exception if the regular expressions could not compile.
|
|
24
|
+
def initialize(logger, include_regex, exclude_regex, metadata_key, processed_db)
|
|
25
|
+
@logger = logger
|
|
26
|
+
@include_regex = compile_regex(include_regex)
|
|
27
|
+
@exclude_regex = compile_regex(exclude_regex)
|
|
28
|
+
@metadata_key = metadata_key
|
|
29
|
+
@processed_db = processed_db
|
|
30
|
+
|
|
31
|
+
@logger.info('Turn on debugging to explain why blobs are filtered.')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# should_process? returns true if the blob matches all the
|
|
35
|
+
# user-provided requirements to download and extract events.
|
|
36
|
+
def should_process?(blob)
|
|
37
|
+
@logger.debug("Found blob: #{blob.name}")
|
|
38
|
+
|
|
39
|
+
# Evaluate all conditions because the operations are cheap and give the
|
|
40
|
+
# user a complete idea of why a blob was included/excluded.
|
|
41
|
+
conditions = [
|
|
42
|
+
not_already_run?(blob),
|
|
43
|
+
included?(blob),
|
|
44
|
+
not_excluded?(blob),
|
|
45
|
+
metadata_does_not_exist?(blob)
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
conditions.all?
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def compile_regex(regex)
|
|
54
|
+
Regexp.new(regex)
|
|
55
|
+
rescue StandardError => e
|
|
56
|
+
raise "Could not compile regex '#{regex}': #{e}"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def not_already_run?(blob)
|
|
60
|
+
result = @processed_db.nil? || !@processed_db.already_processed?(blob)
|
|
61
|
+
|
|
62
|
+
explain('Not included in ProcessedDB', result)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def included?(blob)
|
|
66
|
+
explain('Matches include regex', name_matches(blob.name, @include_regex))
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def not_excluded?(blob)
|
|
70
|
+
explain('Does not match exclude regex', !name_matches(blob.name, @exclude_regex))
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def metadata_does_not_exist?(blob)
|
|
74
|
+
# an empty key means the user doesn't want to store/check the metadata
|
|
75
|
+
return true if @metadata_key.empty?
|
|
76
|
+
|
|
77
|
+
has_key = !blob.metadata.nil? && blob.metadata.key?(@metadata_key)
|
|
78
|
+
|
|
79
|
+
explain('Does not have metadata key', !has_key)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def explain(message, result)
|
|
83
|
+
pf = result ? 'pass' : 'fail'
|
|
84
|
+
|
|
85
|
+
@logger.debug(" - #{message}? (#{pf})")
|
|
86
|
+
|
|
87
|
+
result
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def name_matches(name, regex)
|
|
91
|
+
match = regex.match(name)
|
|
92
|
+
return false if match.nil?
|
|
93
|
+
|
|
94
|
+
match.pre_match.empty? && match.post_match.empty?
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
# Copyright 2018 Google LLC
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
require 'thread'
|
|
17
|
+
require 'java'
|
|
18
|
+
require 'logstash-input-google_cloud_storage_jars.rb'
|
|
19
|
+
require 'logstash/inputs/cloud_storage/blob_adapter'
|
|
20
|
+
|
|
21
|
+
module LogStash
|
|
22
|
+
module Inputs
|
|
23
|
+
module CloudStorage
|
|
24
|
+
# Client provides all the required transport and authentication setup for the plugin.
|
|
25
|
+
class Client
|
|
26
|
+
def initialize(bucket, json_key_path, logger)
|
|
27
|
+
@logger = logger
|
|
28
|
+
@bucket = bucket
|
|
29
|
+
|
|
30
|
+
# create client
|
|
31
|
+
@storage = initialize_storage json_key_path
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def list_blobs
|
|
35
|
+
@storage.list(@bucket).iterateAll().each do |blobname|
|
|
36
|
+
yield LogStash::Inputs::CloudStorage::BlobAdapter.new(blobname)
|
|
37
|
+
end
|
|
38
|
+
rescue Java::ComGoogleCloudStorage::StorageException => e
|
|
39
|
+
raise "Error listing bucket contents: #{e}"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def initialize_storage(json_key_path)
|
|
45
|
+
com.google.cloud.storage.StorageOptions.newBuilder()
|
|
46
|
+
.setCredentials(credentials(json_key_path))
|
|
47
|
+
.setHeaderProvider(http_headers)
|
|
48
|
+
.setRetrySettings(retry_settings)
|
|
49
|
+
.build()
|
|
50
|
+
.getService()
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
java_import 'com.google.auth.oauth2.GoogleCredentials'
|
|
54
|
+
def credentials(json_key_path)
|
|
55
|
+
return GoogleCredentials.getApplicationDefault() if json_key_path.empty?
|
|
56
|
+
|
|
57
|
+
key_file = java.io.FileInputStream.new(json_key_path)
|
|
58
|
+
GoogleCredentials.fromStream(key_file)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
java_import 'com.google.api.gax.rpc.FixedHeaderProvider'
|
|
62
|
+
def http_headers
|
|
63
|
+
gem_name = 'logstash-input-google_cloud_storage'
|
|
64
|
+
gem_version = '1.0.0'
|
|
65
|
+
user_agent = "Elastic/#{gem_name} version/#{gem_version}"
|
|
66
|
+
|
|
67
|
+
FixedHeaderProvider.create({ 'User-Agent' => user_agent })
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
java_import 'com.google.api.gax.retrying.RetrySettings'
|
|
71
|
+
java_import 'org.threeten.bp.Duration'
|
|
72
|
+
def retry_settings
|
|
73
|
+
# backoff values taken from com.google.api.client.util.ExponentialBackOff
|
|
74
|
+
RetrySettings.newBuilder()
|
|
75
|
+
.setInitialRetryDelay(Duration.ofMillis(500))
|
|
76
|
+
.setRetryDelayMultiplier(1.5)
|
|
77
|
+
.setMaxRetryDelay(Duration.ofSeconds(60))
|
|
78
|
+
.setInitialRpcTimeout(Duration.ofSeconds(20))
|
|
79
|
+
.setRpcTimeoutMultiplier(1.5)
|
|
80
|
+
.setMaxRpcTimeout(Duration.ofSeconds(20))
|
|
81
|
+
.setTotalTimeout(Duration.ofMinutes(15))
|
|
82
|
+
.build()
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
# Copyright 2018 Google LLC
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
require 'zlib'
|
|
17
|
+
|
|
18
|
+
module LogStash
|
|
19
|
+
module Inputs
|
|
20
|
+
module CloudStorage
|
|
21
|
+
# FileReader provides a unified way to read different types of log files
|
|
22
|
+
# with predictable callbacks.
|
|
23
|
+
class FileReader
|
|
24
|
+
# read_lines reads lines from a file one at a time, optionally decoding
|
|
25
|
+
# the file as gzip if decode_gzip is true.
|
|
26
|
+
#
|
|
27
|
+
# Handles files with both UNIX and Windows line endings.
|
|
28
|
+
def self.read_lines(filename, decode_gzip, &block)
|
|
29
|
+
if decode_gzip && gzip?(filename)
|
|
30
|
+
read_gzip_lines(filename, &block)
|
|
31
|
+
else
|
|
32
|
+
read_plain_lines(filename, &block)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# gzip? returns true if the given filename has a gzip file extension.
|
|
37
|
+
def self.gzip?(filename)
|
|
38
|
+
filename.end_with? '.gz'
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def self.read_plain_lines(filename, &block)
|
|
42
|
+
line_num = 1
|
|
43
|
+
::File.open(filename).each do |line|
|
|
44
|
+
block.call(line.chomp, line_num)
|
|
45
|
+
line_num += 1
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def self.read_gzip_lines(filename, &block)
|
|
50
|
+
line_num = 1
|
|
51
|
+
Zlib::GzipReader.open(filename).each_line do |line|
|
|
52
|
+
block.call(line.chomp, line_num)
|
|
53
|
+
line_num += 1
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
# Copyright 2018 Google LLC
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
require 'fileutils'
|
|
17
|
+
require 'digest'
|
|
18
|
+
|
|
19
|
+
module LogStash
|
|
20
|
+
module Inputs
|
|
21
|
+
module CloudStorage
|
|
22
|
+
# ProcessedDb tracks files and generations that have already been processed.
|
|
23
|
+
# File names and generations are concatenated then SHA1 hashed.
|
|
24
|
+
# The directory structure is git-like the first 3 characters of the hash are used
|
|
25
|
+
# as a top level directory, and the rest is stored as a directory name within that.
|
|
26
|
+
# This keeps the directory count manageable.
|
|
27
|
+
class ProcessedDb
|
|
28
|
+
def initialize(db_directory)
|
|
29
|
+
@db_directory = db_directory
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def already_processed?(blob)
|
|
33
|
+
path = encode_path(blob)
|
|
34
|
+
::File.exist?(path)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def mark_processed(blob)
|
|
38
|
+
path = encode_path(blob)
|
|
39
|
+
FileUtils.mkdir_p(path)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def encode_path(blob)
|
|
43
|
+
key = "#{blob.generation}|#{blob.name}"
|
|
44
|
+
encoded = Digest::SHA1.hexdigest(key)
|
|
45
|
+
prefix = encoded.slice(0, 3)
|
|
46
|
+
suffix = encoded.slice(3..-1)
|
|
47
|
+
|
|
48
|
+
::File.join(@db_directory, prefix, suffix)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
# Copyright 2018 Google LLC
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
require 'logstash/inputs/base'
|
|
17
|
+
require 'logstash/namespace'
|
|
18
|
+
require 'stud/interval'
|
|
19
|
+
require 'logstash/inputs/cloud_storage/client'
|
|
20
|
+
require 'logstash/inputs/cloud_storage/processed_db'
|
|
21
|
+
require 'logstash/inputs/cloud_storage/blob_filter'
|
|
22
|
+
require 'logstash/inputs/cloud_storage/file_reader'
|
|
23
|
+
|
|
24
|
+
# GoogleCloudStorage is an input plugin for LogStash that
|
|
25
|
+
# reads blobs in Cloud Storage buckets.
|
|
26
|
+
class LogStash::Inputs::GoogleCloudStorage < LogStash::Inputs::Base
|
|
27
|
+
config_name 'google_cloud_storage'
|
|
28
|
+
|
|
29
|
+
default :codec, 'plain'
|
|
30
|
+
|
|
31
|
+
# Connection Settings
|
|
32
|
+
config :bucket_id, :validate => :string, :required => true
|
|
33
|
+
config :json_key_file, :validate => :string, :default => nil
|
|
34
|
+
config :interval, :validate => :number, :default => 60
|
|
35
|
+
|
|
36
|
+
# Inclusion/Exclusion Criteria
|
|
37
|
+
config :file_matches, :validate => :string, :default => '.*\\.log(\\.gz)?'
|
|
38
|
+
config :file_exclude, :validate => :string, :default => '^$'
|
|
39
|
+
config :metadata_key, :validate => :string, :default => 'x-goog-meta-ls-gcs-input'
|
|
40
|
+
config :processed_db_path, :validate => :string, :default => nil
|
|
41
|
+
|
|
42
|
+
config :delete, :validate => :boolean, :default => false
|
|
43
|
+
config :unpack_gzip, :validate => :boolean, :default => true
|
|
44
|
+
|
|
45
|
+
# Other Criteria
|
|
46
|
+
config :temp_directory, :validate => :string, :default => File.join(Dir.tmpdir, 'ls-in-gcs')
|
|
47
|
+
|
|
48
|
+
# Accessors for testing
|
|
49
|
+
attr_accessor :event_output_queue
|
|
50
|
+
attr_reader :processed_db
|
|
51
|
+
|
|
52
|
+
def register
|
|
53
|
+
FileUtils.mkdir_p(@temp_directory) unless Dir.exist?(@temp_directory)
|
|
54
|
+
|
|
55
|
+
@client = LogStash::Inputs::CloudStorage::Client.new(@bucket_id, @json_key_file, @logger)
|
|
56
|
+
|
|
57
|
+
if @processed_db_path.nil?
|
|
58
|
+
ls_data = LogStash::SETTINGS.get_value('path.data')
|
|
59
|
+
@processed_db_path = File.join(ls_data, 'plugins', 'inputs', 'google_cloud_storage', 'db')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
@logger.info("ProcessedDb created in: #{@processed_db_path}")
|
|
63
|
+
|
|
64
|
+
@processed_db = LogStash::Inputs::CloudStorage::ProcessedDb.new(@processed_db_path)
|
|
65
|
+
|
|
66
|
+
@blob_filter = LogStash::Inputs::CloudStorage::BlobFilter.new(@logger, @file_matches, @file_exclude, @metadata_key, @processed_db)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def run(queue)
|
|
70
|
+
@event_output_queue = queue
|
|
71
|
+
|
|
72
|
+
@main_plugin_thread = Thread.current
|
|
73
|
+
Stud.interval(@interval) do
|
|
74
|
+
list_download_process
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Fetches new files ready to be processed, downloads and processes them and finally
|
|
79
|
+
# runs post-processing steps.
|
|
80
|
+
def list_download_process
|
|
81
|
+
list_processable_blobs do |blob|
|
|
82
|
+
@logger.info("Found matching blob #{blob.uri}")
|
|
83
|
+
download_and_process(blob)
|
|
84
|
+
postprocess(blob)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def stop
|
|
89
|
+
# Stud events were started on the main plugin thread so stop all events relative to it.
|
|
90
|
+
Stud.stop!(@main_plugin_thread)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
# list_processable_blobs will list blobs in the bucket and yield them if they are not filtered
|
|
96
|
+
def list_processable_blobs
|
|
97
|
+
@logger.info("Fetching blobs from #{@bucket_id}")
|
|
98
|
+
@client.list_blobs do |blob|
|
|
99
|
+
yield blob if @blob_filter.should_process?(blob)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def download_and_process(blob)
|
|
104
|
+
@logger.info("Downloading blob #{blob.uri}")
|
|
105
|
+
|
|
106
|
+
blob.with_downloaded(@temp_directory) do |path|
|
|
107
|
+
@logger.info("Reading events from #{blob.uri} (temp file: #{path})")
|
|
108
|
+
|
|
109
|
+
LogStash::Inputs::CloudStorage::FileReader.read_lines(path, @unpack_gzip) do |line, num|
|
|
110
|
+
extract_event(line, num, blob)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def postprocess(blob)
|
|
116
|
+
blob.set_metadata!(@metadata_key, 'processed') unless @metadata_key.empty?
|
|
117
|
+
|
|
118
|
+
blob.delete! if @delete
|
|
119
|
+
|
|
120
|
+
@processed_db.mark_processed(blob) unless @processed_db_path.empty?
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def extract_event(line, line_num, blob)
|
|
124
|
+
@codec.decode(line) do |event|
|
|
125
|
+
decorate(event)
|
|
126
|
+
event.set('[@metadata][gcs]', blob.line_attributes(line_num))
|
|
127
|
+
@event_output_queue << event
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|