logstash-filter-fingerprint 3.3.1 → 3.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/docs/index.asciidoc +8 -9
- data/lib/logstash/filters/fingerprint.rb +55 -4
- data/logstash-filter-fingerprint.gemspec +2 -3
- data/spec/filters/fingerprint_spec.rb +92 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 314340e6e62bfa1dc34740e1f44af4313668c797dc99cb15b8fc37519f601c75
|
4
|
+
data.tar.gz: bba012a2b02065879152ac51fe926e8ba8beef8ed96d577b3aecb4555a255dce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb420df3e75d292c83cf6e7f6488ff6bad9b194d45787ee43c1e2b2fadb9fc038b42b6ebb8d2403b852d111799ebc139819109a559abd59692693ca8d7fed42b
|
7
|
+
data.tar.gz: 451164868428c63860da386b856c4ea0adff10ce290ae24dc4a5d45a7b8ab12a6c90c74bf352bcab0af702cec820d9cba1fcf04889ed02bac63a45b93037d03a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 3.4.1
|
2
|
+
- Added backward compatibility of timestamp format to provide consistent fingerprint [#67](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/67)
|
3
|
+
|
4
|
+
## 3.4.0
|
5
|
+
- Added support for 128bit murmur variant [#66](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/66).
|
6
|
+
|
7
|
+
## 3.3.2
|
8
|
+
- [DOC] Clarify behavior when key is set [#65](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/65).
|
9
|
+
|
1
10
|
## 3.3.1
|
2
11
|
- Force encoding to UTF-8 when concatenating sources to generate fingerprint [#64](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/64)
|
3
12
|
|
data/docs/index.asciidoc
CHANGED
@@ -23,7 +23,7 @@ include::{include_path}/plugin_header.asciidoc[]
|
|
23
23
|
Create consistent hashes (fingerprints) of one or more fields and store
|
24
24
|
the result in a new field.
|
25
25
|
|
26
|
-
You can use this plugin to create consistent document ids when
|
26
|
+
You can use this plugin to create consistent document ids when events are
|
27
27
|
inserted into Elasticsearch. This approach means that existing documents can be
|
28
28
|
updated instead of creating new documents.
|
29
29
|
|
@@ -60,7 +60,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
60
60
|
| <<plugins-{type}s-{plugin}-concatenate_all_fields>> |<<boolean,boolean>>|No
|
61
61
|
| <<plugins-{type}s-{plugin}-ecs_compatibility>> | <<string,string>>|No
|
62
62
|
| <<plugins-{type}s-{plugin}-key>> |<<string,string>>|No
|
63
|
-
| <<plugins-{type}s-{plugin}-method>> |<<string,string>>, one of `["SHA1", "SHA256", "SHA384", "SHA512", "MD5", "MURMUR3", "IPV4_NETWORK", "UUID", "PUNCTUATION"]`|Yes
|
63
|
+
| <<plugins-{type}s-{plugin}-method>> |<<string,string>>, one of `["SHA1", "SHA256", "SHA384", "SHA512", "MD5", "MURMUR3", "MURMUR3_128", IPV4_NETWORK", "UUID", "PUNCTUATION"]`|Yes
|
64
64
|
| <<plugins-{type}s-{plugin}-source>> |<<array,array>>|No
|
65
65
|
| <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
|
66
66
|
|=======================================================================
|
@@ -76,7 +76,7 @@ filter plugins.
|
|
76
76
|
* Value type is <<boolean,boolean>>
|
77
77
|
* Default value is `false`
|
78
78
|
|
79
|
-
When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512` and `
|
79
|
+
When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5` and `MURMUR3_128` fingerprint methods will produce
|
80
80
|
base64 encoded rather than hex encoded strings.
|
81
81
|
|
82
82
|
[id="plugins-{type}s-{plugin}-concatenate_sources"]
|
@@ -174,17 +174,16 @@ With other methods, optionally fill in the HMAC key.
|
|
174
174
|
===== `method`
|
175
175
|
|
176
176
|
* This is a required setting.
|
177
|
-
* Value can be any of: `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5`, `MURMUR3`, `IPV4_NETWORK`, `UUID`, `PUNCTUATION`
|
177
|
+
* Value can be any of: `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5`, `MURMUR3`, `MURMUR3_128`, `IPV4_NETWORK`, `UUID`, `PUNCTUATION`
|
178
178
|
* Default value is `"SHA1"`
|
179
179
|
|
180
180
|
The fingerprint method to use.
|
181
181
|
|
182
|
-
If set to `SHA1`, `SHA256`, `SHA384`, `SHA512`, or `MD5` and a key is set,
|
183
|
-
|
184
|
-
|
185
|
-
be used.
|
182
|
+
If set to `SHA1`, `SHA256`, `SHA384`, `SHA512`, or `MD5` and a key is set, the
|
183
|
+
corresponding cryptographic hash function and the keyed-hash (HMAC) digest function
|
184
|
+
are used to generate the fingerprint.
|
186
185
|
|
187
|
-
If set to `MURMUR3` the non-cryptographic
|
186
|
+
If set to `MURMUR3` or `MURMUR3_128` the non-cryptographic MurmurHash function (either the 32-bit or 128-bit implementation, respectively) will be used.
|
188
187
|
|
189
188
|
If set to `IPV4_NETWORK` the input data needs to be a IPv4 address and
|
190
189
|
the hash value will be the masked-out address using the number of bits
|
@@ -23,6 +23,29 @@ require "logstash/plugin_mixins/ecs_compatibility_support"
|
|
23
23
|
# https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID].
|
24
24
|
# To generate UUIDs, prefer the <<plugins-filters-uuid,uuid filter>>.
|
25
25
|
class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
26
|
+
|
27
|
+
##
|
28
|
+
# Logstash 8+ has variable-length serialization of timestamps
|
29
|
+
# that do not include subsecond info for whole-second timestamps.
|
30
|
+
# For backward-compatibility we refine the implementation to use
|
31
|
+
# our own three-decimal-place formatter for whole-second
|
32
|
+
# timestamps.
|
33
|
+
if LOGSTASH_VERSION.split('.').first.to_i >= 8
|
34
|
+
module MinimumSerializationLengthTimestamp
|
35
|
+
THREE_DECIMAL_INSTANT_FORMATTER = java.time.format.DateTimeFormatterBuilder.new.appendInstant(3).toFormatter
|
36
|
+
refine LogStash::Timestamp do
|
37
|
+
def to_s
|
38
|
+
return super unless nsec == 0
|
39
|
+
THREE_DECIMAL_INSTANT_FORMATTER.format(to_java.toInstant)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
using MinimumSerializationLengthTimestamp
|
44
|
+
end
|
45
|
+
|
46
|
+
INTEGER_MAX_32BIT = (1 << 31) - 1
|
47
|
+
INTEGER_MIN_32BIT = -(1 << 31)
|
48
|
+
|
26
49
|
include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
|
27
50
|
|
28
51
|
config_name "fingerprint"
|
@@ -40,8 +63,8 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
40
63
|
# With other methods, optionally fill in the HMAC key.
|
41
64
|
config :key, :validate => :string
|
42
65
|
|
43
|
-
# When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512` and `
|
44
|
-
# base64 encoded rather than hex encoded strings.
|
66
|
+
# When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5` and `MURMUR3_128` fingerprint
|
67
|
+
# methods will produce base64 encoded rather than hex encoded strings.
|
45
68
|
config :base64encode, :validate => :boolean, :default => false
|
46
69
|
|
47
70
|
# The fingerprint method to use.
|
@@ -51,7 +74,9 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
51
74
|
# the fingerprint. When a key set, the keyed-hash (HMAC) digest function will
|
52
75
|
# be used.
|
53
76
|
#
|
54
|
-
# If set to `MURMUR3` the non-cryptographic MurmurHash
|
77
|
+
# If set to `MURMUR3` or `MURMUR3_128` the non-cryptographic MurmurHash
|
78
|
+
# function (either the 32-bit or 128-bit implementation, respectively)
|
79
|
+
# will be used.
|
55
80
|
#
|
56
81
|
# If set to `IPV4_NETWORK` the input data needs to be a IPv4 address and
|
57
82
|
# the hash value will be the masked-out address using the number of bits
|
@@ -64,7 +89,7 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
64
89
|
# If set to `UUID`, a
|
65
90
|
# https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID] will
|
66
91
|
# be generated. The result will be random and thus not a consistent hash.
|
67
|
-
config :method, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "IPV4_NETWORK", "UUID", "PUNCTUATION"], :required => true, :default => 'SHA1'
|
92
|
+
config :method, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "MURMUR3_128", "IPV4_NETWORK", "UUID", "PUNCTUATION"], :required => true, :default => 'SHA1'
|
68
93
|
|
69
94
|
# When set to `true` and `method` isn't `UUID` or `PUNCTUATION`, the
|
70
95
|
# plugin concatenates the names and values of all fields given in the
|
@@ -102,6 +127,8 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
102
127
|
class << self; alias_method :fingerprint, :fingerprint_ipv4_network; end
|
103
128
|
when :MURMUR3
|
104
129
|
class << self; alias_method :fingerprint, :fingerprint_murmur3; end
|
130
|
+
when :MURMUR3_128
|
131
|
+
class << self; alias_method :fingerprint, :fingerprint_murmur3_128; end
|
105
132
|
when :UUID
|
106
133
|
# nothing
|
107
134
|
when :PUNCTUATION
|
@@ -210,6 +237,30 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
210
237
|
end
|
211
238
|
end
|
212
239
|
|
240
|
+
def fingerprint_murmur3_128(value)
|
241
|
+
if value.is_a?(Integer)
|
242
|
+
if (INTEGER_MIN_32BIT <= value) && (value <= INTEGER_MAX_32BIT)
|
243
|
+
if @base64encode
|
244
|
+
[MurmurHash3::V128.int32_hash(value, 2).pack("L*")].pack("m").chomp!
|
245
|
+
else
|
246
|
+
MurmurHash3::V128.int32_hash(value, 2).pack("L*").unpack("H*")[0]
|
247
|
+
end
|
248
|
+
else
|
249
|
+
if @base64encode
|
250
|
+
[MurmurHash3::V128.int64_hash(value, 2).pack("L*")].pack("m").chomp!
|
251
|
+
else
|
252
|
+
MurmurHash3::V128.int64_hash(value, 2).pack("L*").unpack("H*")[0]
|
253
|
+
end
|
254
|
+
end
|
255
|
+
else
|
256
|
+
if @base64encode
|
257
|
+
MurmurHash3::V128.str_base64digest(value.to_s, 2)
|
258
|
+
else
|
259
|
+
MurmurHash3::V128.str_hexdigest(value.to_s, 2)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
213
264
|
def select_digest(method)
|
214
265
|
case method
|
215
266
|
when :SHA1
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-fingerprint'
|
4
|
-
s.version = '3.
|
4
|
+
s.version = '3.4.1'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = "Fingerprints fields by replacing values with a consistent hash"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -21,8 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
|
22
22
|
# Gem dependencies
|
23
23
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
24
|
-
s.add_runtime_dependency "murmurhash3"
|
24
|
+
s.add_runtime_dependency "murmurhash3" #(MIT license)
|
25
25
|
s.add_development_dependency 'logstash-devutils'
|
26
26
|
s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~>1.2'
|
27
27
|
end
|
28
|
-
|
@@ -19,6 +19,10 @@ describe LogStash::Filters::Fingerprint, :ecs_compatibility_support, :aggregate_
|
|
19
19
|
plugin.filter(event)
|
20
20
|
end
|
21
21
|
|
22
|
+
def ge_version_8
|
23
|
+
LOGSTASH_VERSION.split('.').first.to_i >= 8
|
24
|
+
end
|
25
|
+
|
22
26
|
context "with a string field" do
|
23
27
|
let(:data) { {"clientip" => "123.123.123.123" } }
|
24
28
|
let(:config) { super().merge("source" => ["clientip" ]) }
|
@@ -50,6 +54,59 @@ describe LogStash::Filters::Fingerprint, :ecs_compatibility_support, :aggregate_
|
|
50
54
|
end
|
51
55
|
end
|
52
56
|
|
57
|
+
describe "the MURMUR3_128 method" do
|
58
|
+
let(:fingerprint_method) { "MURMUR3_128" }
|
59
|
+
|
60
|
+
context "string hex encoding" do
|
61
|
+
it "fingerprints the value" do
|
62
|
+
expect(fingerprint).to eq("41cbc4056eed401d091dfbeabf7ea9e0")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context "string base64 encoding" do
|
67
|
+
let(:config) { super().merge("base64encode" => true) }
|
68
|
+
it "fingerprints the value" do
|
69
|
+
expect(fingerprint).to eq("QcvEBW7tQB0JHfvqv36p4A==")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
context "int32 hex encoding" do
|
74
|
+
let(:config) { super().merge("base64encode" => false) }
|
75
|
+
let(:data) { {"clientip" => 123 } }
|
76
|
+
|
77
|
+
it "fingerprints the value" do
|
78
|
+
expect(fingerprint).to eq("286816c693ac410ed63e1430dcd6f6fe")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
context "int32 base64 encoding" do
|
83
|
+
let(:config) { super().merge("base64encode" => true) }
|
84
|
+
let(:data) { {"clientip" => 123 } }
|
85
|
+
|
86
|
+
it "fingerprints the value" do
|
87
|
+
expect(fingerprint).to eq("KGgWxpOsQQ7WPhQw3Nb2/g==")
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context "int64 hex encoding" do
|
92
|
+
let(:config) { super().merge("base64encode" => false) }
|
93
|
+
let(:data) { {"clientip" => 2148483647 } }
|
94
|
+
|
95
|
+
it "fingerprints the value" do
|
96
|
+
expect(fingerprint).to eq("fdc7699a82556c8c584131f0133ee989")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
context "int64 base64 encoding" do
|
101
|
+
let(:config) { super().merge("base64encode" => true) }
|
102
|
+
let(:data) { {"clientip" => 2148483647 } }
|
103
|
+
|
104
|
+
it "fingerprints the value" do
|
105
|
+
expect(fingerprint).to eq("/cdpmoJVbIxYQTHwEz7piQ==")
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
53
110
|
describe "the SHA1 method" do
|
54
111
|
let(:fingerprint_method) { "SHA1" }
|
55
112
|
|
@@ -220,7 +277,7 @@ describe LogStash::Filters::Fingerprint, :ecs_compatibility_support, :aggregate_
|
|
220
277
|
end
|
221
278
|
|
222
279
|
context 'Timestamps' do
|
223
|
-
epoch_time
|
280
|
+
let(:epoch_time) { Time.at(0).gmtime }
|
224
281
|
let(:config) { super().merge("source" => ['@timestamp']) }
|
225
282
|
|
226
283
|
describe 'OpenSSL Fingerprinting' do
|
@@ -239,6 +296,40 @@ describe LogStash::Filters::Fingerprint, :ecs_compatibility_support, :aggregate_
|
|
239
296
|
expect(fingerprint).to eq(743372282)
|
240
297
|
end
|
241
298
|
end
|
299
|
+
|
300
|
+
describe 'MURMUR3_128 Fingerprinting' do
|
301
|
+
let(:fingerprint_method) { "MURMUR3_128" }
|
302
|
+
let(:data) { { "@timestamp" => epoch_time } }
|
303
|
+
it "fingerprints the timestamp correctly" do
|
304
|
+
expect(fingerprint).to eq('37785b62a8cae473acc315d39b66d86e')
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
describe "fractional seconds" do
|
309
|
+
let(:fingerprint_method) { "MURMUR3" }
|
310
|
+
let(:data) { { "@timestamp" => epoch_time } }
|
311
|
+
|
312
|
+
describe "millisecond" do
|
313
|
+
let(:epoch_time) { LogStash::Timestamp.new('2000-01-01T05:00:00.12Z') }
|
314
|
+
it "fingerprints the timestamp correctly" do
|
315
|
+
expect(fingerprint).to eq(4263087275)
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
describe "microsecond" do
|
320
|
+
let(:epoch_time) { LogStash::Timestamp.new('2000-01-01T05:00:00.123456Z') }
|
321
|
+
it "fingerprints the timestamp correctly" do
|
322
|
+
expect(fingerprint).to eq(4188855160)
|
323
|
+
end
|
324
|
+
end if ge_version_8
|
325
|
+
|
326
|
+
describe "nanosecond" do
|
327
|
+
let(:epoch_time) { LogStash::Timestamp.new('2000-01-01T05:00:00.123456789Z') }
|
328
|
+
it "fingerprints the timestamp correctly" do
|
329
|
+
expect(fingerprint).to eq(3520111535)
|
330
|
+
end
|
331
|
+
end if ge_version_8
|
332
|
+
end
|
242
333
|
end
|
243
334
|
|
244
335
|
describe "post fingerprint execution triggers" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-fingerprint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,8 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
111
|
- !ruby/object:Gem::Version
|
112
112
|
version: '0'
|
113
113
|
requirements: []
|
114
|
-
|
115
|
-
rubygems_version: 2.6.13
|
114
|
+
rubygems_version: 3.1.6
|
116
115
|
signing_key:
|
117
116
|
specification_version: 4
|
118
117
|
summary: Fingerprints fields by replacing values with a consistent hash
|