fluent-plugin-filter-list 0.7.4 → 0.7.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6025312b2ef8ab49eba3c259f54a37d835ccac19e74bb781210cfc18e661791a
4
- data.tar.gz: e646c57736caf246e975543f5f0dafdeda23b4a62c5e3053e0316cc73477d3a4
3
+ metadata.gz: 2e248391649cea2b7fa1ef396a5ae5bcdae45959775296b58dd845d334709993
4
+ data.tar.gz: 7ca0d518e0aae01a31a218f20613952202ec07eace82152393f3f186d896d9cc
5
5
  SHA512:
6
- metadata.gz: 28746a246fc18ab753febab8b7af0647b6e7e3fe661401d10b783668c6341cd42ecca70ac3522091c447c26af8c0862de56f5bf9567c316fd25d7d9755435df8
7
- data.tar.gz: 757804606fb9536d34a9f96fe003af127b850c8bcb48b2680e93ed5acaa1cc843d8545bbbf7d6d5e3d0263ecf5396231be7a82a457f20a4c95446cb03ae82565
6
+ metadata.gz: 386303ab0eee0b66080c20db661f8498baafd99fd5f1c76fab156c001dd32228a096bbe6e3caa74d0ece4889f2a23390b4bb407c2850ba6c48d0118871cfeff9
7
+ data.tar.gz: af54b9cb60bfa64207e8281a4b8c2219dc6c63f8e83413b9707f68555036165fe17eb4aa43fa0c92994faf8ed8022808c9674a1994f12a3aed15693e6c8700b0
data/.rubocop.yml CHANGED
@@ -14,7 +14,7 @@ Style/FormatStringToken:
14
14
  Style/StringLiterals:
15
15
  Enabled: false
16
16
  Metrics/MethodLength:
17
- Max: 15
17
+ Max: 25
18
18
  Style/Documentation:
19
19
  Enabled: false
20
20
  Style/FrozenStringLiteralComment:
data/Gemfile CHANGED
@@ -4,4 +4,4 @@ source 'https://rubygems.org'
4
4
  gemspec
5
5
 
6
6
  # Added at 2017-08-14 19:47:59 +0900 by 5hun:
7
- gem 'rubocop', '~> 1.16.1', require: false, group: [:development]
7
+ gem 'rubocop', '~> 1.17.0', require: false, group: [:development]
@@ -1,7 +1,7 @@
1
1
  module Fluent
2
2
  module Plugin
3
3
  module FilterList
4
- VERSION = "0.7.4"
4
+ VERSION = "0.7.5"
5
5
  end
6
6
  end
7
7
  end
data/lib/matcher.rb CHANGED
@@ -7,27 +7,19 @@ module Matchers
7
7
 
8
8
  def initialize(patterns)
9
9
  patterns = (patterns || []).compact.reject(&:empty?)
10
- @trie = Trie.new patterns
10
+ @machine = ACAutomaton.new patterns
11
11
  end
12
12
 
13
13
  def matches?(text)
14
- node = @trie.root
15
- text.to_s.chars.each do |char|
16
- failure = node.failure
17
- node = node.children[char]
18
-
19
- return true unless node.nil? || node.output.nil?
20
- return true unless failure.nil? || failure.output.nil?
14
+ return false if text.nil? || text == ''
21
15
 
22
- # Follow failure if it exists in case pattern doesn't match
23
- node = failure.children[char] if node.nil?
24
- node = failure if node.nil?
25
- end
16
+ @machine.matches? text.to_s
17
+ end
26
18
 
27
- return false if node.nil?
28
- return true unless node.failure.nil? || node.failure.output.nil?
19
+ def find(text)
20
+ return false if text.nil? || text == ''
29
21
 
30
- !node.output.nil?
22
+ @machine.find(text)
31
23
  end
32
24
  end
33
25
 
@@ -50,6 +42,30 @@ module Matchers
50
42
  end
51
43
 
52
44
  class Trie
45
+ class Node
46
+ attr_reader :children
47
+
48
+ def initialize
49
+ @children = {}
50
+ @children.default = nil
51
+ end
52
+
53
+ def insert(char)
54
+ @children[char] = Node.new unless @children.key?(char)
55
+ @children[char]
56
+ end
57
+
58
+ def forward(str)
59
+ children = @children
60
+ child = nil
61
+ str.chars.each do |char|
62
+ child = children[char]
63
+ children = child.children
64
+ end
65
+ child
66
+ end
67
+ end
68
+
53
69
  attr_reader :root
54
70
 
55
71
  def initialize(patterns)
@@ -58,38 +74,12 @@ module Matchers
58
74
  patterns.each do |pattern|
59
75
  insert(pattern)
60
76
  end
61
- build
62
77
  end
63
78
 
64
79
  def insert(pattern = '')
65
80
  current_node = @root
66
- pattern.chars.each_with_index do |char, i|
81
+ pattern.chars.each do |char|
67
82
  current_node = current_node.insert(char)
68
- current_node.output = pattern if i == pattern.length - 1
69
- end
70
- end
71
-
72
- def new_queue
73
- q = Queue.new
74
- @root.children.each_value do |child|
75
- q.push(child)
76
- child.failure = @root # set root on root's children's failure
77
- end
78
- q
79
- end
80
-
81
- def build
82
- # Update failure on each node.
83
- # Search longest matching suffix (which becomes failure) by BFS. In case no matching suffix, root becomes failure.
84
- q = new_queue
85
- until q.empty?
86
- cur_node = q.pop
87
- cur_node.children.each do |char, child|
88
- q.push(child)
89
- detect_node = cur_node.failure || @root
90
- detect_node = detect_node.failure while detect_node.children[char].nil?
91
- child.failure = detect_node.children[char]
92
- end
93
83
  end
94
84
  end
95
85
 
@@ -107,20 +97,162 @@ module Matchers
107
97
  end
108
98
  end
109
99
 
110
- class Node
111
- attr_reader :children
112
- attr_accessor :failure, :output
100
+ # An AC automaton.
101
+ # Based on https://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf.
102
+ class ACAutomaton
103
+ class Node
104
+ # Manages the goto as a character -> node ID mapping.
105
+ attr_reader :goto
106
+ # Uniquely (in an automaton) assigned ID of the Node.
107
+ attr_reader :id
108
+ attr_accessor :failure
109
+ # Stores out of AC, that is the index of the patterns.
110
+ attr_accessor :out
111
+
112
+ def initialize(id: 0, goto: {}, failure: 0, out: [])
113
+ @id = id
114
+ @goto = goto
115
+ @failure = failure
116
+ @out = out
117
+ end
118
+
119
+ def root?
120
+ @id.zero?
121
+ end
122
+
123
+ def g(char)
124
+ if (next_node = @goto[char])
125
+ return next_node
126
+ end
127
+ return 0 if root?
128
+
129
+ nil
130
+ end
113
131
 
114
- def initialize
115
- @children = {}
116
- @children.default = nil
117
- @output = nil
118
- @failure = nil
132
+ def to_s
133
+ "id: #{@id}, goto: #{@goto}, failure: #{@failure}, out: #{@out}"
134
+ end
135
+
136
+ def ==(other)
137
+ @id == other.id && @goto == other.goto && @failure == other.failure && @out == other.out
138
+ end
119
139
  end
140
+ # Nodes are managed in an array. The indices are
141
+ attr_reader :nodes, :patterns
142
+
143
+ def initialize(patterns)
144
+ @nodes = []
145
+ @patterns = patterns || []
146
+ build(@patterns)
147
+ end
148
+
149
+ # Creates a new node and returns the id.
150
+ # This method is not thread safe.
151
+ def new_node
152
+ id = @nodes.size
153
+ node = Node.new(id: id)
154
+ @nodes.push(node)
155
+ id
156
+ end
157
+
158
+ def build(patterns)
159
+ build_goto(patterns)
160
+ build_failure
161
+ end
162
+
163
+ def build_goto(patterns)
164
+ root = new_node
165
+ patterns.each_with_index do |pattern, i|
166
+ q = root
167
+ pattern.chars.each do |char|
168
+ next_q = @nodes[q].goto[char]
169
+ if next_q
170
+ q = next_q
171
+ else
172
+ new_q = new_node
173
+ @nodes[q].goto[char] = new_q
174
+ q = new_q
175
+ end
176
+ end
177
+
178
+ @nodes[q].out.push(i)
179
+ end
180
+ end
181
+
182
+ def build_failure
183
+ queue = [0]
184
+ until queue.empty?
185
+ n = queue.shift
186
+ node = @nodes[n]
187
+ @nodes[n].goto.each do |c, next_node|
188
+ queue.push(next_node)
189
+
190
+ next if n.zero?
191
+
192
+ failure = node.failure
193
+ failure = @nodes[failure].failure while @nodes[failure].g(c).nil?
194
+ @nodes[next_node].failure = @nodes[failure].g(c)
195
+ @nodes[next_node].out.concat(@nodes[@nodes[next_node].failure].out)
196
+ end
197
+ end
198
+ end
199
+
200
+ def find(text)
201
+ return [] if text.nil? || text == ''
202
+
203
+ find_id(text).map do |id|
204
+ @patterns[id]
205
+ end
206
+ end
207
+
208
+ # Finds and retuns matched pattens' indices.
209
+ def find_id(text)
210
+ return [] if text.nil? || text == ''
211
+
212
+ q = 0
213
+ result = []
214
+ text.chars.each_with_index do |c, _i|
215
+ loop do
216
+ node = @nodes[q]
217
+ if (to_go_next = node.goto[c])
218
+ q = to_go_next
219
+ break
220
+ end
221
+ break if q.zero?
222
+
223
+ q = node.failure
224
+ end
225
+
226
+ out = @nodes[q].out
227
+ result.concat(out) unless out.empty?
228
+ end
229
+
230
+ result
231
+ end
232
+
233
+ # Returns true if the text matches any pattern, otherwise false.
234
+ def matches?(text)
235
+ return false if text.nil?
236
+
237
+ q = 0
238
+ text.chars.each do |c|
239
+ loop do
240
+ node = @nodes[q]
241
+ if (to_go_next = node.goto[c])
242
+ q = to_go_next
243
+ break
244
+ end
245
+ break if q.zero?
246
+
247
+ q = node.failure
248
+ end
249
+
250
+ out = @nodes[q].out
251
+
252
+ return true unless out.empty?
253
+ end
120
254
 
121
- def insert(char)
122
- @children[char] = Node.new unless @children.key?(char)
123
- @children[char]
255
+ false
124
256
  end
125
257
  end
126
258
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-filter-list
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.4
4
+ version: 0.7.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shun Yanaura
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-21 00:00:00.000000000 Z
11
+ date: 2021-09-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  - !ruby/object:Gem::Version
133
133
  version: '0'
134
134
  requirements: []
135
- rubygems_version: 3.1.4
135
+ rubygems_version: 3.1.6
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: A fluentd output plugin to filter keywords from messages