fluent-plugin-filter-list 0.7.4 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/Gemfile +1 -1
- data/lib/fluent/plugin/out_filter_list/version.rb +1 -1
- data/lib/matcher.rb +185 -53
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e248391649cea2b7fa1ef396a5ae5bcdae45959775296b58dd845d334709993
|
4
|
+
data.tar.gz: 7ca0d518e0aae01a31a218f20613952202ec07eace82152393f3f186d896d9cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 386303ab0eee0b66080c20db661f8498baafd99fd5f1c76fab156c001dd32228a096bbe6e3caa74d0ece4889f2a23390b4bb407c2850ba6c48d0118871cfeff9
|
7
|
+
data.tar.gz: af54b9cb60bfa64207e8281a4b8c2219dc6c63f8e83413b9707f68555036165fe17eb4aa43fa0c92994faf8ed8022808c9674a1994f12a3aed15693e6c8700b0
|
data/.rubocop.yml
CHANGED
data/Gemfile
CHANGED
data/lib/matcher.rb
CHANGED
@@ -7,27 +7,19 @@ module Matchers
|
|
7
7
|
|
8
8
|
def initialize(patterns)
|
9
9
|
patterns = (patterns || []).compact.reject(&:empty?)
|
10
|
-
@
|
10
|
+
@machine = ACAutomaton.new patterns
|
11
11
|
end
|
12
12
|
|
13
13
|
def matches?(text)
|
14
|
-
|
15
|
-
text.to_s.chars.each do |char|
|
16
|
-
failure = node.failure
|
17
|
-
node = node.children[char]
|
18
|
-
|
19
|
-
return true unless node.nil? || node.output.nil?
|
20
|
-
return true unless failure.nil? || failure.output.nil?
|
14
|
+
return false if text.nil? || text == ''
|
21
15
|
|
22
|
-
|
23
|
-
|
24
|
-
node = failure if node.nil?
|
25
|
-
end
|
16
|
+
@machine.matches? text.to_s
|
17
|
+
end
|
26
18
|
|
27
|
-
|
28
|
-
return
|
19
|
+
def find(text)
|
20
|
+
return false if text.nil? || text == ''
|
29
21
|
|
30
|
-
|
22
|
+
@machine.find(text)
|
31
23
|
end
|
32
24
|
end
|
33
25
|
|
@@ -50,6 +42,30 @@ module Matchers
|
|
50
42
|
end
|
51
43
|
|
52
44
|
class Trie
|
45
|
+
class Node
|
46
|
+
attr_reader :children
|
47
|
+
|
48
|
+
def initialize
|
49
|
+
@children = {}
|
50
|
+
@children.default = nil
|
51
|
+
end
|
52
|
+
|
53
|
+
def insert(char)
|
54
|
+
@children[char] = Node.new unless @children.key?(char)
|
55
|
+
@children[char]
|
56
|
+
end
|
57
|
+
|
58
|
+
def forward(str)
|
59
|
+
children = @children
|
60
|
+
child = nil
|
61
|
+
str.chars.each do |char|
|
62
|
+
child = children[char]
|
63
|
+
children = child.children
|
64
|
+
end
|
65
|
+
child
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
53
69
|
attr_reader :root
|
54
70
|
|
55
71
|
def initialize(patterns)
|
@@ -58,38 +74,12 @@ module Matchers
|
|
58
74
|
patterns.each do |pattern|
|
59
75
|
insert(pattern)
|
60
76
|
end
|
61
|
-
build
|
62
77
|
end
|
63
78
|
|
64
79
|
def insert(pattern = '')
|
65
80
|
current_node = @root
|
66
|
-
pattern.chars.
|
81
|
+
pattern.chars.each do |char|
|
67
82
|
current_node = current_node.insert(char)
|
68
|
-
current_node.output = pattern if i == pattern.length - 1
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
def new_queue
|
73
|
-
q = Queue.new
|
74
|
-
@root.children.each_value do |child|
|
75
|
-
q.push(child)
|
76
|
-
child.failure = @root # set root on root's children's failure
|
77
|
-
end
|
78
|
-
q
|
79
|
-
end
|
80
|
-
|
81
|
-
def build
|
82
|
-
# Update failure on each node.
|
83
|
-
# Search longest matching suffix (which becomes failure) by BFS. In case no matching suffix, root becomes failure.
|
84
|
-
q = new_queue
|
85
|
-
until q.empty?
|
86
|
-
cur_node = q.pop
|
87
|
-
cur_node.children.each do |char, child|
|
88
|
-
q.push(child)
|
89
|
-
detect_node = cur_node.failure || @root
|
90
|
-
detect_node = detect_node.failure while detect_node.children[char].nil?
|
91
|
-
child.failure = detect_node.children[char]
|
92
|
-
end
|
93
83
|
end
|
94
84
|
end
|
95
85
|
|
@@ -107,20 +97,162 @@ module Matchers
|
|
107
97
|
end
|
108
98
|
end
|
109
99
|
|
110
|
-
|
111
|
-
|
112
|
-
|
100
|
+
# An AC automaton.
|
101
|
+
# Based on https://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf.
|
102
|
+
class ACAutomaton
|
103
|
+
class Node
|
104
|
+
# Manages the goto as a character -> node ID mapping.
|
105
|
+
attr_reader :goto
|
106
|
+
# Uniquely (in an automaton) assigned ID of the Node.
|
107
|
+
attr_reader :id
|
108
|
+
attr_accessor :failure
|
109
|
+
# Stores out of AC, that is the index of the patterns.
|
110
|
+
attr_accessor :out
|
111
|
+
|
112
|
+
def initialize(id: 0, goto: {}, failure: 0, out: [])
|
113
|
+
@id = id
|
114
|
+
@goto = goto
|
115
|
+
@failure = failure
|
116
|
+
@out = out
|
117
|
+
end
|
118
|
+
|
119
|
+
def root?
|
120
|
+
@id.zero?
|
121
|
+
end
|
122
|
+
|
123
|
+
def g(char)
|
124
|
+
if (next_node = @goto[char])
|
125
|
+
return next_node
|
126
|
+
end
|
127
|
+
return 0 if root?
|
128
|
+
|
129
|
+
nil
|
130
|
+
end
|
113
131
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
132
|
+
def to_s
|
133
|
+
"id: #{@id}, goto: #{@goto}, failure: #{@failure}, out: #{@out}"
|
134
|
+
end
|
135
|
+
|
136
|
+
def ==(other)
|
137
|
+
@id == other.id && @goto == other.goto && @failure == other.failure && @out == other.out
|
138
|
+
end
|
119
139
|
end
|
140
|
+
# Nodes are managed in an array. The indices are
|
141
|
+
attr_reader :nodes, :patterns
|
142
|
+
|
143
|
+
def initialize(patterns)
|
144
|
+
@nodes = []
|
145
|
+
@patterns = patterns || []
|
146
|
+
build(@patterns)
|
147
|
+
end
|
148
|
+
|
149
|
+
# Creates a new node and returns the id.
|
150
|
+
# This method is not thread safe.
|
151
|
+
def new_node
|
152
|
+
id = @nodes.size
|
153
|
+
node = Node.new(id: id)
|
154
|
+
@nodes.push(node)
|
155
|
+
id
|
156
|
+
end
|
157
|
+
|
158
|
+
def build(patterns)
|
159
|
+
build_goto(patterns)
|
160
|
+
build_failure
|
161
|
+
end
|
162
|
+
|
163
|
+
def build_goto(patterns)
|
164
|
+
root = new_node
|
165
|
+
patterns.each_with_index do |pattern, i|
|
166
|
+
q = root
|
167
|
+
pattern.chars.each do |char|
|
168
|
+
next_q = @nodes[q].goto[char]
|
169
|
+
if next_q
|
170
|
+
q = next_q
|
171
|
+
else
|
172
|
+
new_q = new_node
|
173
|
+
@nodes[q].goto[char] = new_q
|
174
|
+
q = new_q
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
@nodes[q].out.push(i)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def build_failure
|
183
|
+
queue = [0]
|
184
|
+
until queue.empty?
|
185
|
+
n = queue.shift
|
186
|
+
node = @nodes[n]
|
187
|
+
@nodes[n].goto.each do |c, next_node|
|
188
|
+
queue.push(next_node)
|
189
|
+
|
190
|
+
next if n.zero?
|
191
|
+
|
192
|
+
failure = node.failure
|
193
|
+
failure = @nodes[failure].failure while @nodes[failure].g(c).nil?
|
194
|
+
@nodes[next_node].failure = @nodes[failure].g(c)
|
195
|
+
@nodes[next_node].out.concat(@nodes[@nodes[next_node].failure].out)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def find(text)
|
201
|
+
return [] if text.nil? || text == ''
|
202
|
+
|
203
|
+
find_id(text).map do |id|
|
204
|
+
@patterns[id]
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Finds and retuns matched pattens' indices.
|
209
|
+
def find_id(text)
|
210
|
+
return [] if text.nil? || text == ''
|
211
|
+
|
212
|
+
q = 0
|
213
|
+
result = []
|
214
|
+
text.chars.each_with_index do |c, _i|
|
215
|
+
loop do
|
216
|
+
node = @nodes[q]
|
217
|
+
if (to_go_next = node.goto[c])
|
218
|
+
q = to_go_next
|
219
|
+
break
|
220
|
+
end
|
221
|
+
break if q.zero?
|
222
|
+
|
223
|
+
q = node.failure
|
224
|
+
end
|
225
|
+
|
226
|
+
out = @nodes[q].out
|
227
|
+
result.concat(out) unless out.empty?
|
228
|
+
end
|
229
|
+
|
230
|
+
result
|
231
|
+
end
|
232
|
+
|
233
|
+
# Returns true if the text matches any pattern, otherwise false.
|
234
|
+
def matches?(text)
|
235
|
+
return false if text.nil?
|
236
|
+
|
237
|
+
q = 0
|
238
|
+
text.chars.each do |c|
|
239
|
+
loop do
|
240
|
+
node = @nodes[q]
|
241
|
+
if (to_go_next = node.goto[c])
|
242
|
+
q = to_go_next
|
243
|
+
break
|
244
|
+
end
|
245
|
+
break if q.zero?
|
246
|
+
|
247
|
+
q = node.failure
|
248
|
+
end
|
249
|
+
|
250
|
+
out = @nodes[q].out
|
251
|
+
|
252
|
+
return true unless out.empty?
|
253
|
+
end
|
120
254
|
|
121
|
-
|
122
|
-
@children[char] = Node.new unless @children.key?(char)
|
123
|
-
@children[char]
|
255
|
+
false
|
124
256
|
end
|
125
257
|
end
|
126
258
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-filter-list
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shun Yanaura
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
132
|
- !ruby/object:Gem::Version
|
133
133
|
version: '0'
|
134
134
|
requirements: []
|
135
|
-
rubygems_version: 3.1.
|
135
|
+
rubygems_version: 3.1.6
|
136
136
|
signing_key:
|
137
137
|
specification_version: 4
|
138
138
|
summary: A fluentd output plugin to filter keywords from messages
|