fluent-plugin-influxdb-deduplication 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +4 -0
- data/README.md +125 -25
- data/VERSION +1 -1
- data/lib/fluent/plugin/filter_influxdb_deduplication.rb +73 -21
- data/test/test_filter_influxdb_deduplication.rb +242 -17
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 79e774ac8c57b020efdd13befa8820f8cb89c32eb2b42fc94a181a5aef462a65
|
4
|
+
data.tar.gz: 3dce1619fd1fa37e508d8e61e849c4ebcad9b1f635c05024411e19832c8daf46
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d97b6a3ba2c9676c3d34d9893ed82d842c5a59448e42a438d132343f56325a2b3d6fffe3f2b7494d1d71e211c4ff1b8891c7bf04371660680105320d973075cc
|
7
|
+
data.tar.gz: 023dbdb42131021fa610e0cf8e6ef29707497cc2714c5b1e7efd5c3dd1e26a8952abafae69dcae348462c8998c1e6b92487f8237457a14187dc6462d0dc272e8
|
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# [Fluentd](https://www.fluentd.org/) filter plugin to deduplicate records for InfluxDB
|
2
2
|
|
3
|
-
A filter plugin that implements the deduplication techniques described in
|
4
|
-
|
3
|
+
A filter plugin that implements the deduplication techniques described in
|
4
|
+
the [InfluxDB doc](https://docs.influxdata.com/influxdb/v2.0/write-data/best-practices/duplicate-points/).
|
5
5
|
|
6
6
|
## Installation
|
7
7
|
|
@@ -11,43 +11,52 @@ Using RubyGems:
|
|
11
11
|
fluent-gem install fluent-plugin-influxdb-deduplication
|
12
12
|
```
|
13
13
|
|
14
|
-
|
15
14
|
## Configuration
|
16
15
|
|
17
16
|
### Deduplicate by incrementing the timestamp
|
18
17
|
|
19
|
-
The filter plugin reads the fluentd record event time with a precision
|
20
|
-
|
18
|
+
Each data point is assigned a unique timestamp. The filter plugin reads the fluentd record event time with a precision
|
19
|
+
to the second, and stores it in a field with a precision to the nanosecond. Any sequence of record with the same
|
20
|
+
timestamp has a timestamp incremented by 1 nanosecond.
|
21
21
|
|
22
22
|
<filter pattern>
|
23
23
|
@type influxdb_deduplication
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
|
25
|
+
<time>
|
26
|
+
# field to store the deduplicated timestamp
|
27
|
+
key my_key_field
|
28
|
+
</time>
|
27
29
|
</filter>
|
28
30
|
|
29
31
|
For example, the following input records:
|
30
32
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
33
|
+
| Fluentd Event Time | Record |
|
34
|
+
|---|---|
|
35
|
+
| 1613910640 | { "k1" => 0, "k2" => "value0" } |
|
36
|
+
| 1613910640 | { "k1" => 1, "k2" => "value1" } |
|
37
|
+
| 1613910640 | { "k1" => 2, "k2" => "value2" } |
|
38
|
+
| 1613910641 | { "k1" => 3, "k3" => "value3" } |
|
35
39
|
|
36
|
-
Would
|
40
|
+
Would become on output:
|
37
41
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
+
| Fluentd Event Time | Record |
|
43
|
+
|---|---|
|
44
|
+
| 1613910640 | { "k1" => 0, "k2" => "value0", "my_key_field" => 1613910640000000000 } |
|
45
|
+
| 1613910640 | { "k1" => 1, "k2" => "value1", "my_key_field" => 1613910640000000001 } |
|
46
|
+
| 1613910640 | { "k1" => 2, "k2" => "value2", "my_key_field" => 1613910640000000002 } |
|
47
|
+
| 1613910641 | { "k1" => 3, "k3" => "value3", "my_key_field" => 1613910643000000000 } |
|
42
48
|
|
43
|
-
The time key field can then be passed as is to
|
44
|
-
Example configuration on nginx
|
49
|
+
The time key field can then be passed as is to
|
50
|
+
the [fluent-plugin-influxdb-v2](https://github.com/influxdata/influxdb-plugin-fluent). Example configuration on nginx
|
51
|
+
logs:
|
45
52
|
|
46
53
|
<filter nginx.access>
|
47
54
|
@type influxdb_deduplication
|
48
55
|
|
49
|
-
|
50
|
-
|
56
|
+
<time>
|
57
|
+
# field to store the deduplicated timestamp
|
58
|
+
key my_key_field
|
59
|
+
</time>
|
51
60
|
</filter>
|
52
61
|
|
53
62
|
<match nginx.access>
|
@@ -59,7 +68,7 @@ Example configuration on nginx logs:
|
|
59
68
|
bucket my-bucket
|
60
69
|
org my-org
|
61
70
|
|
62
|
-
# the influxdb2
|
71
|
+
# the influxdb2 time_key must be set to the same value as the influxdb_deduplication time.key
|
63
72
|
time_key my_key_field
|
64
73
|
|
65
74
|
# the timestamp precision must be set to ns
|
@@ -74,13 +83,104 @@ The data can then be queried as a table and viewed in [Grafana](https://grafana.
|
|
74
83
|
from(bucket: "my-bucket")
|
75
84
|
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
76
85
|
|> pivot(
|
77
|
-
rowKey:["_time"],
|
86
|
+
rowKey: ["_time"],
|
78
87
|
columnKey: ["_field"],
|
79
88
|
valueColumn: "_value"
|
80
89
|
)
|
81
90
|
|> keep(columns: ["_time", "request_method", "status", "remote_addr", "request_uri"])
|
82
91
|
|
83
|
-
|
84
92
|
### Deduplicate by adding a sequence tag
|
85
93
|
|
86
|
-
|
94
|
+
Each record is assigned a sequence number, the output record can be uniquely identified by the pair (fluentd_event_time,
|
95
|
+
sequence_number). The event time is untouched so no precision is lost for time.
|
96
|
+
|
97
|
+
<filter pattern>
|
98
|
+
@type influxdb_deduplication
|
99
|
+
|
100
|
+
<tag>
|
101
|
+
# field to store the deduplicated timestamp
|
102
|
+
key my_key_field
|
103
|
+
</tag>
|
104
|
+
</filter>
|
105
|
+
|
106
|
+
For example, the following input records:
|
107
|
+
|
108
|
+
| Fluentd Event Time | Record |
|
109
|
+
|---|---|
|
110
|
+
| 1613910640 | { "k1" => 0, "k2" => "value0" } |
|
111
|
+
| 1613910640 | { "k1" => 1, "k2" => "value1" } |
|
112
|
+
| 1613910640 | { "k1" => 2, "k2" => "value2" } |
|
113
|
+
| 1613910641 | { "k1" => 3, "k3" => "value3" } |
|
114
|
+
|
115
|
+
Would become on output:
|
116
|
+
|
117
|
+
| Fluentd Event Time | Record |
|
118
|
+
|---|---|
|
119
|
+
| 1613910640 | { "k1" => 0, "k2" => "value0", "my_key_field" => 0 } |
|
120
|
+
| 1613910640 | { "k1" => 1, "k2" => "value1", "my_key_field" => 1 } |
|
121
|
+
| 1613910640 | { "k1" => 2, "k2" => "value2", "my_key_field" => 2 } |
|
122
|
+
| 1613910641 | { "k1" => 3, "k3" => "value3", "my_key_field" => 0 } |
|
123
|
+
|
124
|
+
The sequence tag should be passed in the tag parameters
|
125
|
+
of [fluent-plugin-influxdb-v2](https://github.com/influxdata/influxdb-plugin-fluent). Example configuration on nginx
|
126
|
+
logs:
|
127
|
+
|
128
|
+
<filter nginx.access>
|
129
|
+
@type influxdb_deduplication
|
130
|
+
|
131
|
+
<time>
|
132
|
+
# field to store the deduplicated timestamp
|
133
|
+
key my_key_field
|
134
|
+
</time>
|
135
|
+
</filter>
|
136
|
+
|
137
|
+
<match nginx.access>
|
138
|
+
@type influxdb2
|
139
|
+
|
140
|
+
# setup the access to your InfluxDB v2 instance
|
141
|
+
url https://localhost:8086
|
142
|
+
token my-token
|
143
|
+
bucket my-bucket
|
144
|
+
org my-org
|
145
|
+
|
146
|
+
# the influxdb2 time_key is not specified so the fluentd event time is used
|
147
|
+
# time_key
|
148
|
+
|
149
|
+
# there's no requirements on the time_precision value this time
|
150
|
+
# time_precision ns
|
151
|
+
|
152
|
+
# "my_key_field" must be passed to influxdb's tag_keys
|
153
|
+
tag_keys ["request_method", "status", "my_key_field"]
|
154
|
+
field_keys ["remote_addr", "request_uri"]
|
155
|
+
</match>
|
156
|
+
|
157
|
+
The data can then be queried as a table and viewed in [Grafana](https://grafana.com/) for example with the flux query:
|
158
|
+
|
159
|
+
from(bucket: "my-bucket")
|
160
|
+
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|
161
|
+
|> pivot(
|
162
|
+
rowKey: ["_time", "my_key_field"],
|
163
|
+
columnKey: ["_field"],
|
164
|
+
valueColumn: "_value"
|
165
|
+
)
|
166
|
+
|> keep(columns: ["_time", "request_method", "status", "remote_addr", "request_uri"])
|
167
|
+
|
168
|
+
### Detecting out of order records
|
169
|
+
|
170
|
+
This filter plugin expects the fluentd event timestamps of the incoming record to increase and never decrease.
|
171
|
+
Optionally, a order key can be added to indicate if the record arrived in order or not. For example with this config
|
172
|
+
|
173
|
+
<filter pattern>
|
174
|
+
@type influxdb_deduplication
|
175
|
+
|
176
|
+
order_key order_field
|
177
|
+
|
178
|
+
<time>
|
179
|
+
# field to store the deduplicated timestamp
|
180
|
+
key my_key_field
|
181
|
+
</time>
|
182
|
+
</filter>
|
183
|
+
|
184
|
+
Without order key, out of order records are dropped to avoid previous data points being overridden. With a order key,
|
185
|
+
out of order records will still be pushed but with `order_field = false`. Out of order records are not deduplicated but
|
186
|
+
they will be apparent in influxdb.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
@@ -4,21 +4,30 @@ module Fluent
|
|
4
4
|
class Plugin::InfluxdbDeduplicationFilter < Plugin::Filter
|
5
5
|
Fluent::Plugin.register_filter('influxdb_deduplication', self)
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
The output time key to use.
|
10
|
-
DESC
|
7
|
+
desc "If not nil, the corresponding field takes the value true if the record arrived in order."
|
8
|
+
config_param :order_key, :string, default: nil
|
11
9
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
config_section :time, param_name: :time, multi: false, required: false do
|
11
|
+
desc "The output time key to use."
|
12
|
+
config_param :key, :string
|
13
|
+
end
|
14
|
+
|
15
|
+
config_section :tag, param_name: :tag, multi: false, required: false do
|
16
|
+
desc "The output sequence tag to use."
|
17
|
+
config_param :key, :string
|
18
|
+
end
|
16
19
|
|
17
20
|
def configure(conf)
|
18
21
|
super
|
19
22
|
|
20
|
-
|
21
|
-
raise Fluent::ConfigError, "
|
23
|
+
if @time == nil and @tag == nil
|
24
|
+
raise Fluent::ConfigError, "one of tag or time deduplication needs to be set."
|
25
|
+
elsif @time != nil and @tag != nil
|
26
|
+
raise Fluent::ConfigError, "tag and time deduplication are mutually exclusive."
|
27
|
+
elsif @time != nil and (@time.key == nil or @time.key == "")
|
28
|
+
raise Fluent::ConfigError, "an output 'key' field is required for time deduplication"
|
29
|
+
elsif @tag != nil and (@tag == nil or @tag.key == "")
|
30
|
+
raise Fluent::ConfigError, "an output 'key' field is required for tag deduplication"
|
22
31
|
end
|
23
32
|
end
|
24
33
|
|
@@ -30,6 +39,14 @@ If not nil, the field takes the value true if the record arrives in order and fa
|
|
30
39
|
end
|
31
40
|
|
32
41
|
def filter(tag, time, record)
|
42
|
+
if @time
|
43
|
+
time_deduplication(time, record)
|
44
|
+
else
|
45
|
+
tag_deduplication(time, record)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def time_deduplication(time, record)
|
33
50
|
if time.is_a?(Integer)
|
34
51
|
input_time = Fluent::EventTime.new(time)
|
35
52
|
elsif time.is_a?(Fluent::EventTime)
|
@@ -43,33 +60,68 @@ If not nil, the field takes the value true if the record arrives in order and fa
|
|
43
60
|
|
44
61
|
if input_time.sec < @last_timestamp
|
45
62
|
@log.debug("out of sequence timestamp")
|
46
|
-
if @
|
47
|
-
record[@
|
48
|
-
record[@
|
63
|
+
if @order_key
|
64
|
+
record[@order_key] = false
|
65
|
+
record[@time.key] = nano_time
|
49
66
|
else
|
50
67
|
@log.debug("out of order record dropped")
|
51
68
|
return nil
|
52
69
|
end
|
53
|
-
elsif input_time.sec == @last_timestamp
|
70
|
+
elsif input_time.sec == @last_timestamp and @sequence < 999999999
|
54
71
|
@sequence = @sequence + 1
|
55
|
-
record[@
|
56
|
-
if @
|
57
|
-
record[@
|
72
|
+
record[@time.key] = nano_time + @sequence
|
73
|
+
if @order_key
|
74
|
+
record[@order_key] = true
|
58
75
|
end
|
59
|
-
elsif input_time.sec == @last_timestamp
|
76
|
+
elsif input_time.sec == @last_timestamp and @sequence == 999999999
|
60
77
|
@log.error("received more then 999999999 records in a second")
|
61
78
|
return nil
|
62
79
|
else
|
63
80
|
@sequence = 0
|
64
81
|
@last_timestamp = input_time.sec
|
65
|
-
record[@
|
66
|
-
if @
|
67
|
-
record[@
|
82
|
+
record[@time.key] = nano_time
|
83
|
+
if @order_key
|
84
|
+
record[@order_key] = true
|
68
85
|
end
|
69
86
|
end
|
70
87
|
|
71
88
|
record
|
72
89
|
end
|
73
90
|
|
91
|
+
def tag_deduplication(time, record)
|
92
|
+
if time.is_a?(Integer)
|
93
|
+
input_time = time
|
94
|
+
elsif time.is_a?(Fluent::EventTime)
|
95
|
+
input_time = time.sec * 1000000000 + time.nsec
|
96
|
+
else
|
97
|
+
@log.error("unreadable time")
|
98
|
+
return nil
|
99
|
+
end
|
100
|
+
|
101
|
+
if input_time < @last_timestamp
|
102
|
+
@log.debug("out of sequence timestamp")
|
103
|
+
if @order_key
|
104
|
+
record[@order_key] = false
|
105
|
+
else
|
106
|
+
@log.debug("out of order record dropped")
|
107
|
+
return nil
|
108
|
+
end
|
109
|
+
elsif input_time == @last_timestamp
|
110
|
+
@sequence = @sequence + 1
|
111
|
+
record[@tag.key] = @sequence
|
112
|
+
if @order_key
|
113
|
+
record[@order_key] = true
|
114
|
+
end
|
115
|
+
else
|
116
|
+
@sequence = 0
|
117
|
+
@last_timestamp = input_time
|
118
|
+
record[@tag.key] = 0
|
119
|
+
if @order_key
|
120
|
+
record[@order_key] = true
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
record
|
125
|
+
end
|
74
126
|
end
|
75
127
|
end
|
@@ -14,22 +14,76 @@ class InfluxdbDeduplicationFilterTest < Test::Unit::TestCase
|
|
14
14
|
Fluent::Test::Driver::Filter.new(Fluent::Plugin::InfluxdbDeduplicationFilter).configure(conf)
|
15
15
|
end
|
16
16
|
|
17
|
-
def
|
18
|
-
|
19
|
-
|
17
|
+
def test_configure_time
|
18
|
+
create_driver %[
|
19
|
+
<time>
|
20
|
+
key my_time_key
|
21
|
+
</time>
|
20
22
|
]
|
21
23
|
|
22
|
-
|
23
|
-
|
24
|
+
assert_raises Fluent::ConfigError do
|
25
|
+
create_driver %[
|
26
|
+
<time>
|
27
|
+
</time>
|
28
|
+
]
|
29
|
+
end
|
30
|
+
|
31
|
+
assert_raises Fluent::ConfigError do
|
32
|
+
create_driver %[
|
33
|
+
<time>
|
34
|
+
key
|
35
|
+
</time>
|
36
|
+
]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_configure_tag
|
41
|
+
create_driver %[
|
42
|
+
<tag>
|
43
|
+
key my_tag_key
|
44
|
+
</tag>
|
45
|
+
]
|
24
46
|
|
47
|
+
assert_raises Fluent::ConfigError do
|
48
|
+
create_driver %[
|
49
|
+
<tag>
|
50
|
+
</tag>
|
51
|
+
]
|
52
|
+
end
|
53
|
+
|
54
|
+
assert_raises Fluent::ConfigError do
|
55
|
+
create_driver %[
|
56
|
+
<tag>
|
57
|
+
key
|
58
|
+
</tag>
|
59
|
+
]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_configuration_needed
|
25
64
|
assert_raises Fluent::ConfigError do
|
26
65
|
create_driver ""
|
27
66
|
end
|
28
67
|
end
|
29
68
|
|
30
|
-
def
|
69
|
+
def test_time_and_tag_exclusivity
|
70
|
+
assert_raises Fluent::ConfigError do
|
71
|
+
create_driver %[
|
72
|
+
<time>
|
73
|
+
key my_time_key
|
74
|
+
</time>
|
75
|
+
<tag>
|
76
|
+
key my_tag_key
|
77
|
+
</tag>
|
78
|
+
]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_time_in_sequence
|
31
83
|
d = create_driver %[
|
32
|
-
|
84
|
+
<time>
|
85
|
+
key time_key
|
86
|
+
</time>
|
33
87
|
]
|
34
88
|
|
35
89
|
time0 = Fluent::EventTime.new(1613910640)
|
@@ -52,9 +106,40 @@ class InfluxdbDeduplicationFilterTest < Test::Unit::TestCase
|
|
52
106
|
], d.filtered
|
53
107
|
end
|
54
108
|
|
55
|
-
def
|
109
|
+
def test_time_in_sequence_integer_time
|
110
|
+
d = create_driver %[
|
111
|
+
<time>
|
112
|
+
key time_key
|
113
|
+
</time>
|
114
|
+
]
|
115
|
+
|
116
|
+
time0 = 1613910640
|
117
|
+
time1 = 1613910643
|
118
|
+
|
119
|
+
d.run(default_tag: @tag) do
|
120
|
+
d.feed(time0, { "k1" => 0 })
|
121
|
+
d.feed(time0, { "k1" => 1 })
|
122
|
+
d.feed(time0, { "k1" => 2 })
|
123
|
+
d.feed(time1, { "k1" => 3 })
|
124
|
+
d.feed(time1, { "k1" => 4 })
|
125
|
+
end
|
126
|
+
|
127
|
+
assert_equal d.instance.instance_variable_get(:@last_timestamp), 1613910643
|
128
|
+
|
129
|
+
assert_equal [
|
130
|
+
[time0, { "k1" => 0, "time_key" => 1613910640000000000 }],
|
131
|
+
[time0, { "k1" => 1, "time_key" => 1613910640000000001 }],
|
132
|
+
[time0, { "k1" => 2, "time_key" => 1613910640000000002 }],
|
133
|
+
[time1, { "k1" => 3, "time_key" => 1613910643000000000 }],
|
134
|
+
[time1, { "k1" => 4, "time_key" => 1613910643000000001 }]
|
135
|
+
], d.filtered
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_time_out_of_sequence_dropped
|
56
139
|
d = create_driver %[
|
57
|
-
|
140
|
+
<time>
|
141
|
+
key time_key
|
142
|
+
</time>
|
58
143
|
]
|
59
144
|
|
60
145
|
time0 = Fluent::EventTime.new(1613910640)
|
@@ -76,10 +161,12 @@ class InfluxdbDeduplicationFilterTest < Test::Unit::TestCase
|
|
76
161
|
], d.filtered
|
77
162
|
end
|
78
163
|
|
79
|
-
def
|
164
|
+
def test_time_order_field
|
80
165
|
d = create_driver %[
|
81
|
-
|
82
|
-
|
166
|
+
order_key order_field
|
167
|
+
<time>
|
168
|
+
key time_key
|
169
|
+
</time>
|
83
170
|
]
|
84
171
|
|
85
172
|
time0 = Fluent::EventTime.new(1613910640)
|
@@ -94,12 +181,150 @@ class InfluxdbDeduplicationFilterTest < Test::Unit::TestCase
|
|
94
181
|
end
|
95
182
|
|
96
183
|
assert_equal [
|
97
|
-
[time0, { "k1" => 0, "time_key" => 1613910640000000000, "
|
98
|
-
[time1, { "k1" => 1, "time_key" => 1613910643000000000, "
|
99
|
-
[time0, { "k1" => 2, "time_key" => 1613910640000000000, "
|
100
|
-
[time1, { "k1" => 3, "time_key" => 1613910643000000001, "
|
101
|
-
[time1, { "k1" => 4, "time_key" => 1613910643000000002, "
|
184
|
+
[time0, { "k1" => 0, "time_key" => 1613910640000000000, "order_field" => true }],
|
185
|
+
[time1, { "k1" => 1, "time_key" => 1613910643000000000, "order_field" => true }],
|
186
|
+
[time0, { "k1" => 2, "time_key" => 1613910640000000000, "order_field" => false }],
|
187
|
+
[time1, { "k1" => 3, "time_key" => 1613910643000000001, "order_field" => true }],
|
188
|
+
[time1, { "k1" => 4, "time_key" => 1613910643000000002, "order_field" => true }]
|
102
189
|
], d.filtered
|
103
190
|
end
|
104
191
|
|
192
|
+
def test_time_max_sequence
|
193
|
+
d = create_driver %[
|
194
|
+
<time>
|
195
|
+
key time_key
|
196
|
+
</time>
|
197
|
+
]
|
198
|
+
|
199
|
+
time0 = Fluent::EventTime.new(1613910640)
|
200
|
+
time1 = Fluent::EventTime.new(1613910641)
|
201
|
+
|
202
|
+
d.run(default_tag: @tag) do
|
203
|
+
d.feed(time0, { "k1" => 0 })
|
204
|
+
d.instance.instance_variable_set(:@sequence, 999999998)
|
205
|
+
d.feed(time0, { "k1" => 1 })
|
206
|
+
d.feed(time0, { "k1" => 2 })
|
207
|
+
d.feed(time1, { "k1" => 3 })
|
208
|
+
d.feed(time1, { "k1" => 4 })
|
209
|
+
end
|
210
|
+
|
211
|
+
assert_equal [
|
212
|
+
[time0, { "k1" => 0, "time_key" => 1613910640000000000 }],
|
213
|
+
[time0, { "k1" => 1, "time_key" => 1613910640999999999 }],
|
214
|
+
[time1, { "k1" => 3, "time_key" => 1613910641000000000 }],
|
215
|
+
[time1, { "k1" => 4, "time_key" => 1613910641000000001 }]
|
216
|
+
], d.filtered
|
217
|
+
end
|
218
|
+
|
219
|
+
def test_tag_in_sequence
|
220
|
+
d = create_driver %[
|
221
|
+
<tag>
|
222
|
+
key tag_key
|
223
|
+
</tag>
|
224
|
+
]
|
225
|
+
|
226
|
+
time0 = Fluent::EventTime.new(1613910640)
|
227
|
+
time1 = Fluent::EventTime.new(1613910643)
|
228
|
+
|
229
|
+
d.run(default_tag: @tag) do
|
230
|
+
d.feed(time0, { "k1" => 0 })
|
231
|
+
d.feed(time0, { "k1" => 1 })
|
232
|
+
d.feed(time0, { "k1" => 2 })
|
233
|
+
d.feed(time1, { "k1" => 3 })
|
234
|
+
d.feed(time1, { "k1" => 4 })
|
235
|
+
end
|
236
|
+
|
237
|
+
assert_equal d.instance.instance_variable_get(:@last_timestamp), 1613910643000000000
|
238
|
+
|
239
|
+
assert_equal [
|
240
|
+
[time0, { "k1" => 0, "tag_key" => 0 }],
|
241
|
+
[time0, { "k1" => 1, "tag_key" => 1 }],
|
242
|
+
[time0, { "k1" => 2, "tag_key" => 2 }],
|
243
|
+
[time1, { "k1" => 3, "tag_key" => 0 }],
|
244
|
+
[time1, { "k1" => 4, "tag_key" => 1 }]
|
245
|
+
], d.filtered
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_tag_in_sequence_integer_time
|
249
|
+
d = create_driver %[
|
250
|
+
<tag>
|
251
|
+
key tag_key
|
252
|
+
</tag>
|
253
|
+
]
|
254
|
+
|
255
|
+
time0 = 1613910640
|
256
|
+
time1 = 1613910643
|
257
|
+
|
258
|
+
d.run(default_tag: @tag) do
|
259
|
+
d.feed(time0, { "k1" => 0 })
|
260
|
+
d.feed(time0, { "k1" => 1 })
|
261
|
+
d.feed(time0, { "k1" => 2 })
|
262
|
+
d.feed(time1, { "k1" => 3 })
|
263
|
+
d.feed(time1, { "k1" => 4 })
|
264
|
+
end
|
265
|
+
|
266
|
+
assert_equal d.instance.instance_variable_get(:@last_timestamp), 1613910643
|
267
|
+
|
268
|
+
assert_equal [
|
269
|
+
[time0, { "k1" => 0, "tag_key" => 0 }],
|
270
|
+
[time0, { "k1" => 1, "tag_key" => 1 }],
|
271
|
+
[time0, { "k1" => 2, "tag_key" => 2 }],
|
272
|
+
[time1, { "k1" => 3, "tag_key" => 0 }],
|
273
|
+
[time1, { "k1" => 4, "tag_key" => 1 }]
|
274
|
+
], d.filtered
|
275
|
+
end
|
276
|
+
|
277
|
+
def test_tag_out_of_sequence_dropped
|
278
|
+
d = create_driver %[
|
279
|
+
<tag>
|
280
|
+
key tag_key
|
281
|
+
</tag>
|
282
|
+
]
|
283
|
+
|
284
|
+
time0 = Fluent::EventTime.new(1613910640)
|
285
|
+
time1 = Fluent::EventTime.new(1613910643)
|
286
|
+
|
287
|
+
d.run(default_tag: @tag) do
|
288
|
+
d.feed(time0, { "k1" => 0 })
|
289
|
+
d.feed(time1, { "k1" => 1 })
|
290
|
+
d.feed(time0, { "k1" => 2 })
|
291
|
+
d.feed(time1, { "k1" => 3 })
|
292
|
+
d.feed(time1, { "k1" => 4 })
|
293
|
+
end
|
294
|
+
|
295
|
+
assert_equal [
|
296
|
+
[time0, { "k1" => 0, "tag_key" => 0 }],
|
297
|
+
[time1, { "k1" => 1, "tag_key" => 0 }],
|
298
|
+
[time1, { "k1" => 3, "tag_key" => 1 }],
|
299
|
+
[time1, { "k1" => 4, "tag_key" => 2 }]
|
300
|
+
], d.filtered
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_tag_order_field
|
304
|
+
d = create_driver %[
|
305
|
+
order_key order_field
|
306
|
+
<tag>
|
307
|
+
key tag_key
|
308
|
+
</tag>
|
309
|
+
]
|
310
|
+
|
311
|
+
time0 = Fluent::EventTime.new(1613910640)
|
312
|
+
time1 = Fluent::EventTime.new(1613910643)
|
313
|
+
|
314
|
+
d.run(default_tag: @tag) do
|
315
|
+
d.feed(time0, { "k1" => 0 })
|
316
|
+
d.feed(time1, { "k1" => 1 })
|
317
|
+
d.feed(time0, { "k1" => 2 })
|
318
|
+
d.feed(time1, { "k1" => 3 })
|
319
|
+
d.feed(time1, { "k1" => 4 })
|
320
|
+
end
|
321
|
+
|
322
|
+
assert_equal [
|
323
|
+
[time0, { "k1" => 0, "tag_key" => 0, "order_field" => true }],
|
324
|
+
[time1, { "k1" => 1, "tag_key" => 0, "order_field" => true }],
|
325
|
+
[time0, { "k1" => 2, "order_field" => false }],
|
326
|
+
[time1, { "k1" => 3, "tag_key" => 1, "order_field" => true }],
|
327
|
+
[time1, { "k1" => 4, "tag_key" => 2, "order_field" => true }]
|
328
|
+
], d.filtered
|
329
|
+
end
|
105
330
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-influxdb-deduplication
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marc Adams
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-02
|
11
|
+
date: 2021-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|