logstash-filter-fingerprint 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/docs/index.asciidoc +8 -9
- data/lib/logstash/filters/fingerprint.rb +40 -6
- data/logstash-filter-fingerprint.gemspec +2 -3
- data/spec/filters/fingerprint_spec.rb +78 -0
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25ac9ded3114af17962469d77481c6992d65778a9dcf67012aaa3c7fb63c6ff7
|
4
|
+
data.tar.gz: d42a1fc7ff16d940f5634cb5f14ee2b4f2216176bdf7b90a8e7e6ed23cc95909
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc8e8df09287df6f5f0649397f0b6b326396508afb101b73b7b51ca95802c314719a74440657059114ddab7b5d47cd65b7a48babea76e2b341cbdf46753f4da6
|
7
|
+
data.tar.gz: 605eec24d6a4587e565acfe35c405beb6ff782410ed23c955e3d7e938e9a8fb3f633f40bc282f7ccaec77be07dfe233843ce2564dceb6cbe87dbe7fec0759668
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 3.4.0
|
2
|
+
- Added support for 128bit murmur variant [#66](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/66).
|
3
|
+
|
4
|
+
## 3.3.2
|
5
|
+
- [DOC] Clarify behavior when key is set [#65](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/65).
|
6
|
+
|
7
|
+
## 3.3.1
|
8
|
+
- Force encoding to UTF-8 when concatenating sources to generate fingerprint [#64](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/64)
|
9
|
+
|
1
10
|
## 3.3.0
|
2
11
|
- Add ECS compatibility [#62](https://github.com/logstash-plugins/logstash-filter-fingerprint/pull/62)
|
3
12
|
|
data/docs/index.asciidoc
CHANGED
@@ -23,7 +23,7 @@ include::{include_path}/plugin_header.asciidoc[]
|
|
23
23
|
Create consistent hashes (fingerprints) of one or more fields and store
|
24
24
|
the result in a new field.
|
25
25
|
|
26
|
-
You can use this plugin to create consistent document ids when
|
26
|
+
You can use this plugin to create consistent document ids when events are
|
27
27
|
inserted into Elasticsearch. This approach means that existing documents can be
|
28
28
|
updated instead of creating new documents.
|
29
29
|
|
@@ -60,7 +60,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
60
60
|
| <<plugins-{type}s-{plugin}-concatenate_all_fields>> |<<boolean,boolean>>|No
|
61
61
|
| <<plugins-{type}s-{plugin}-ecs_compatibility>> | <<string,string>>|No
|
62
62
|
| <<plugins-{type}s-{plugin}-key>> |<<string,string>>|No
|
63
|
-
| <<plugins-{type}s-{plugin}-method>> |<<string,string>>, one of `["SHA1", "SHA256", "SHA384", "SHA512", "MD5", "MURMUR3", "IPV4_NETWORK", "UUID", "PUNCTUATION"]`|Yes
|
63
|
+
| <<plugins-{type}s-{plugin}-method>> |<<string,string>>, one of `["SHA1", "SHA256", "SHA384", "SHA512", "MD5", "MURMUR3", "MURMUR3_128", IPV4_NETWORK", "UUID", "PUNCTUATION"]`|Yes
|
64
64
|
| <<plugins-{type}s-{plugin}-source>> |<<array,array>>|No
|
65
65
|
| <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
|
66
66
|
|=======================================================================
|
@@ -76,7 +76,7 @@ filter plugins.
|
|
76
76
|
* Value type is <<boolean,boolean>>
|
77
77
|
* Default value is `false`
|
78
78
|
|
79
|
-
When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512` and `
|
79
|
+
When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5` and `MURMUR3_128` fingerprint methods will produce
|
80
80
|
base64 encoded rather than hex encoded strings.
|
81
81
|
|
82
82
|
[id="plugins-{type}s-{plugin}-concatenate_sources"]
|
@@ -174,17 +174,16 @@ With other methods, optionally fill in the HMAC key.
|
|
174
174
|
===== `method`
|
175
175
|
|
176
176
|
* This is a required setting.
|
177
|
-
* Value can be any of: `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5`, `MURMUR3`, `IPV4_NETWORK`, `UUID`, `PUNCTUATION`
|
177
|
+
* Value can be any of: `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5`, `MURMUR3`, `MURMUR3_128`, `IPV4_NETWORK`, `UUID`, `PUNCTUATION`
|
178
178
|
* Default value is `"SHA1"`
|
179
179
|
|
180
180
|
The fingerprint method to use.
|
181
181
|
|
182
|
-
If set to `SHA1`, `SHA256`, `SHA384`, `SHA512`, or `MD5` and a key is set,
|
183
|
-
|
184
|
-
|
185
|
-
be used.
|
182
|
+
If set to `SHA1`, `SHA256`, `SHA384`, `SHA512`, or `MD5` and a key is set, the
|
183
|
+
corresponding cryptographic hash function and the keyed-hash (HMAC) digest function
|
184
|
+
are used to generate the fingerprint.
|
186
185
|
|
187
|
-
If set to `MURMUR3` the non-cryptographic
|
186
|
+
If set to `MURMUR3` or `MURMUR3_128` the non-cryptographic MurmurHash function (either the 32-bit or 128-bit implementation, respectively) will be used.
|
188
187
|
|
189
188
|
If set to `IPV4_NETWORK` the input data needs to be a IPv4 address and
|
190
189
|
the hash value will be the masked-out address using the number of bits
|
@@ -23,6 +23,10 @@ require "logstash/plugin_mixins/ecs_compatibility_support"
|
|
23
23
|
# https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID].
|
24
24
|
# To generate UUIDs, prefer the <<plugins-filters-uuid,uuid filter>>.
|
25
25
|
class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
26
|
+
|
27
|
+
INTEGER_MAX_32BIT = (1 << 31) - 1
|
28
|
+
INTEGER_MIN_32BIT = -(1 << 31)
|
29
|
+
|
26
30
|
include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
|
27
31
|
|
28
32
|
config_name "fingerprint"
|
@@ -40,8 +44,8 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
40
44
|
# With other methods, optionally fill in the HMAC key.
|
41
45
|
config :key, :validate => :string
|
42
46
|
|
43
|
-
# When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512` and `
|
44
|
-
# base64 encoded rather than hex encoded strings.
|
47
|
+
# When set to `true`, the `SHA1`, `SHA256`, `SHA384`, `SHA512`, `MD5` and `MURMUR3_128` fingerprint
|
48
|
+
# methods will produce base64 encoded rather than hex encoded strings.
|
45
49
|
config :base64encode, :validate => :boolean, :default => false
|
46
50
|
|
47
51
|
# The fingerprint method to use.
|
@@ -51,7 +55,9 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
51
55
|
# the fingerprint. When a key set, the keyed-hash (HMAC) digest function will
|
52
56
|
# be used.
|
53
57
|
#
|
54
|
-
# If set to `MURMUR3` the non-cryptographic MurmurHash
|
58
|
+
# If set to `MURMUR3` or `MURMUR3_128` the non-cryptographic MurmurHash
|
59
|
+
# function (either the 32-bit or 128-bit implementation, respectively)
|
60
|
+
# will be used.
|
55
61
|
#
|
56
62
|
# If set to `IPV4_NETWORK` the input data needs to be a IPv4 address and
|
57
63
|
# the hash value will be the masked-out address using the number of bits
|
@@ -64,7 +70,7 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
64
70
|
# If set to `UUID`, a
|
65
71
|
# https://en.wikipedia.org/wiki/Universally_unique_identifier[UUID] will
|
66
72
|
# be generated. The result will be random and thus not a consistent hash.
|
67
|
-
config :method, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "IPV4_NETWORK", "UUID", "PUNCTUATION"], :required => true, :default => 'SHA1'
|
73
|
+
config :method, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "MURMUR3_128", "IPV4_NETWORK", "UUID", "PUNCTUATION"], :required => true, :default => 'SHA1'
|
68
74
|
|
69
75
|
# When set to `true` and `method` isn't `UUID` or `PUNCTUATION`, the
|
70
76
|
# plugin concatenates the names and values of all fields given in the
|
@@ -102,6 +108,8 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
102
108
|
class << self; alias_method :fingerprint, :fingerprint_ipv4_network; end
|
103
109
|
when :MURMUR3
|
104
110
|
class << self; alias_method :fingerprint, :fingerprint_murmur3; end
|
111
|
+
when :MURMUR3_128
|
112
|
+
class << self; alias_method :fingerprint, :fingerprint_murmur3_128; end
|
105
113
|
when :UUID
|
106
114
|
# nothing
|
107
115
|
when :PUNCTUATION
|
@@ -128,11 +136,13 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
128
136
|
to_string = ""
|
129
137
|
if @concatenate_all_fields
|
130
138
|
deep_sort_hashes(event.to_hash).each do |k,v|
|
131
|
-
|
139
|
+
# Force encoding to UTF-8 to get around https://github.com/jruby/jruby/issues/6748
|
140
|
+
to_string << "|#{k}|#{v}".force_encoding("UTF-8")
|
132
141
|
end
|
133
142
|
else
|
134
143
|
@source.sort.each do |k|
|
135
|
-
|
144
|
+
# Force encoding to UTF-8 to get around https://github.com/jruby/jruby/issues/6748
|
145
|
+
to_string << "|#{k}|#{deep_sort_hashes(event.get(k))}".force_encoding("UTF-8")
|
136
146
|
end
|
137
147
|
end
|
138
148
|
to_string << "|"
|
@@ -208,6 +218,30 @@ class LogStash::Filters::Fingerprint < LogStash::Filters::Base
|
|
208
218
|
end
|
209
219
|
end
|
210
220
|
|
221
|
+
def fingerprint_murmur3_128(value)
|
222
|
+
if value.is_a?(Integer)
|
223
|
+
if (INTEGER_MIN_32BIT <= value) && (value <= INTEGER_MAX_32BIT)
|
224
|
+
if @base64encode
|
225
|
+
[MurmurHash3::V128.int32_hash(value, 2).pack("L*")].pack("m").chomp!
|
226
|
+
else
|
227
|
+
MurmurHash3::V128.int32_hash(value, 2).pack("L*").unpack("H*")[0]
|
228
|
+
end
|
229
|
+
else
|
230
|
+
if @base64encode
|
231
|
+
[MurmurHash3::V128.int64_hash(value, 2).pack("L*")].pack("m").chomp!
|
232
|
+
else
|
233
|
+
MurmurHash3::V128.int64_hash(value, 2).pack("L*").unpack("H*")[0]
|
234
|
+
end
|
235
|
+
end
|
236
|
+
else
|
237
|
+
if @base64encode
|
238
|
+
MurmurHash3::V128.str_base64digest(value.to_s, 2)
|
239
|
+
else
|
240
|
+
MurmurHash3::V128.str_hexdigest(value.to_s, 2)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
211
245
|
def select_digest(method)
|
212
246
|
case method
|
213
247
|
when :SHA1
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-fingerprint'
|
4
|
-
s.version = '3.
|
4
|
+
s.version = '3.4.0'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = "Fingerprints fields by replacing values with a consistent hash"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -21,8 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
|
22
22
|
# Gem dependencies
|
23
23
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
24
|
-
s.add_runtime_dependency "murmurhash3"
|
24
|
+
s.add_runtime_dependency "murmurhash3" #(MIT license)
|
25
25
|
s.add_development_dependency 'logstash-devutils'
|
26
26
|
s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~>1.2'
|
27
27
|
end
|
28
|
-
|
@@ -50,6 +50,59 @@ describe LogStash::Filters::Fingerprint, :ecs_compatibility_support, :aggregate_
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
+
describe "the MURMUR3_128 method" do
|
54
|
+
let(:fingerprint_method) { "MURMUR3_128" }
|
55
|
+
|
56
|
+
context "string hex encoding" do
|
57
|
+
it "fingerprints the value" do
|
58
|
+
expect(fingerprint).to eq("41cbc4056eed401d091dfbeabf7ea9e0")
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
context "string base64 encoding" do
|
63
|
+
let(:config) { super().merge("base64encode" => true) }
|
64
|
+
it "fingerprints the value" do
|
65
|
+
expect(fingerprint).to eq("QcvEBW7tQB0JHfvqv36p4A==")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
context "int32 hex encoding" do
|
70
|
+
let(:config) { super().merge("base64encode" => false) }
|
71
|
+
let(:data) { {"clientip" => 123 } }
|
72
|
+
|
73
|
+
it "fingerprints the value" do
|
74
|
+
expect(fingerprint).to eq("286816c693ac410ed63e1430dcd6f6fe")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context "int32 base64 encoding" do
|
79
|
+
let(:config) { super().merge("base64encode" => true) }
|
80
|
+
let(:data) { {"clientip" => 123 } }
|
81
|
+
|
82
|
+
it "fingerprints the value" do
|
83
|
+
expect(fingerprint).to eq("KGgWxpOsQQ7WPhQw3Nb2/g==")
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context "int64 hex encoding" do
|
88
|
+
let(:config) { super().merge("base64encode" => false) }
|
89
|
+
let(:data) { {"clientip" => 2148483647 } }
|
90
|
+
|
91
|
+
it "fingerprints the value" do
|
92
|
+
expect(fingerprint).to eq("fdc7699a82556c8c584131f0133ee989")
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
context "int64 base64 encoding" do
|
97
|
+
let(:config) { super().merge("base64encode" => true) }
|
98
|
+
let(:data) { {"clientip" => 2148483647 } }
|
99
|
+
|
100
|
+
it "fingerprints the value" do
|
101
|
+
expect(fingerprint).to eq("/cdpmoJVbIxYQTHwEz7piQ==")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
53
106
|
describe "the SHA1 method" do
|
54
107
|
let(:fingerprint_method) { "SHA1" }
|
55
108
|
|
@@ -192,6 +245,23 @@ describe LogStash::Filters::Fingerprint, :ecs_compatibility_support, :aggregate_
|
|
192
245
|
end
|
193
246
|
end
|
194
247
|
|
248
|
+
context "when utf-8 chars used" do
|
249
|
+
let(:config) { super().merge("source" => ['field1', 'field2']) }
|
250
|
+
let(:data) { {"field1"=>[{"inner_key"=>"🂡"}, {"1"=>"2"}], "field2"=>"🂡"} }
|
251
|
+
it "fingerprints the value of the last value" do
|
252
|
+
# SHA1 of "|field1|inner_key|🂡|1|2|field2|🂡|"
|
253
|
+
expect(fingerprint).to eq("58fa9e0e60c9f0d24b51d84cddb26732a39eeb3d")
|
254
|
+
end
|
255
|
+
|
256
|
+
describe "with concatenate_sources" do
|
257
|
+
let(:config) { super().merge("concatenate_sources" => true) }
|
258
|
+
it "fingerprints the value of concatenated key/pairs" do
|
259
|
+
# SHA1 of "|field1|inner_key|🂡|1|2|field2|🂡|"
|
260
|
+
expect(fingerprint).to eq("d74f41841c7cdc793a97c218d2ff18064a5f1950")
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
195
265
|
describe "PUNCTUATION method" do
|
196
266
|
let(:fingerprint_method) { 'PUNCTUATION' }
|
197
267
|
let(:config) { super().merge("source" => 'field1') }
|
@@ -222,6 +292,14 @@ describe LogStash::Filters::Fingerprint, :ecs_compatibility_support, :aggregate_
|
|
222
292
|
expect(fingerprint).to eq(743372282)
|
223
293
|
end
|
224
294
|
end
|
295
|
+
|
296
|
+
describe 'MURMUR3_128 Fingerprinting' do
|
297
|
+
let(:fingerprint_method) { "MURMUR3_128" }
|
298
|
+
let(:data) { { "@timestamp" => epoch_time } }
|
299
|
+
it "fingerprints the timestamp correctly" do
|
300
|
+
expect(fingerprint).to eq("37785b62a8cae473acc315d39b66d86e")
|
301
|
+
end
|
302
|
+
end
|
225
303
|
end
|
226
304
|
|
227
305
|
describe "post fingerprint execution triggers" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-fingerprint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-05-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,8 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
111
|
- !ruby/object:Gem::Version
|
112
112
|
version: '0'
|
113
113
|
requirements: []
|
114
|
-
|
115
|
-
rubygems_version: 2.6.13
|
114
|
+
rubygems_version: 3.1.6
|
116
115
|
signing_key:
|
117
116
|
specification_version: 4
|
118
117
|
summary: Fingerprints fields by replacing values with a consistent hash
|