fluent-plugin-filter-list 0.7.4 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6025312b2ef8ab49eba3c259f54a37d835ccac19e74bb781210cfc18e661791a
4
- data.tar.gz: e646c57736caf246e975543f5f0dafdeda23b4a62c5e3053e0316cc73477d3a4
3
+ metadata.gz: 2e248391649cea2b7fa1ef396a5ae5bcdae45959775296b58dd845d334709993
4
+ data.tar.gz: 7ca0d518e0aae01a31a218f20613952202ec07eace82152393f3f186d896d9cc
5
5
  SHA512:
6
- metadata.gz: 28746a246fc18ab753febab8b7af0647b6e7e3fe661401d10b783668c6341cd42ecca70ac3522091c447c26af8c0862de56f5bf9567c316fd25d7d9755435df8
7
- data.tar.gz: 757804606fb9536d34a9f96fe003af127b850c8bcb48b2680e93ed5acaa1cc843d8545bbbf7d6d5e3d0263ecf5396231be7a82a457f20a4c95446cb03ae82565
6
+ metadata.gz: 386303ab0eee0b66080c20db661f8498baafd99fd5f1c76fab156c001dd32228a096bbe6e3caa74d0ece4889f2a23390b4bb407c2850ba6c48d0118871cfeff9
7
+ data.tar.gz: af54b9cb60bfa64207e8281a4b8c2219dc6c63f8e83413b9707f68555036165fe17eb4aa43fa0c92994faf8ed8022808c9674a1994f12a3aed15693e6c8700b0
data/.rubocop.yml CHANGED
@@ -14,7 +14,7 @@ Style/FormatStringToken:
14
14
  Style/StringLiterals:
15
15
  Enabled: false
16
16
  Metrics/MethodLength:
17
- Max: 15
17
+ Max: 25
18
18
  Style/Documentation:
19
19
  Enabled: false
20
20
  Style/FrozenStringLiteralComment:
data/Gemfile CHANGED
@@ -4,4 +4,4 @@ source 'https://rubygems.org'
4
4
  gemspec
5
5
 
6
6
  # Added at 2017-08-14 19:47:59 +0900 by 5hun:
7
- gem 'rubocop', '~> 1.16.1', require: false, group: [:development]
7
+ gem 'rubocop', '~> 1.17.0', require: false, group: [:development]
@@ -1,7 +1,7 @@
1
1
  module Fluent
2
2
  module Plugin
3
3
  module FilterList
4
- VERSION = "0.7.4"
4
+ VERSION = "0.7.5"
5
5
  end
6
6
  end
7
7
  end
data/lib/matcher.rb CHANGED
@@ -7,27 +7,19 @@ module Matchers
7
7
 
8
8
  def initialize(patterns)
9
9
  patterns = (patterns || []).compact.reject(&:empty?)
10
- @trie = Trie.new patterns
10
+ @machine = ACAutomaton.new patterns
11
11
  end
12
12
 
13
13
  def matches?(text)
14
- node = @trie.root
15
- text.to_s.chars.each do |char|
16
- failure = node.failure
17
- node = node.children[char]
18
-
19
- return true unless node.nil? || node.output.nil?
20
- return true unless failure.nil? || failure.output.nil?
14
+ return false if text.nil? || text == ''
21
15
 
22
- # Follow failure if it exists in case pattern doesn't match
23
- node = failure.children[char] if node.nil?
24
- node = failure if node.nil?
25
- end
16
+ @machine.matches? text.to_s
17
+ end
26
18
 
27
- return false if node.nil?
28
- return true unless node.failure.nil? || node.failure.output.nil?
19
+ def find(text)
20
+ return false if text.nil? || text == ''
29
21
 
30
- !node.output.nil?
22
+ @machine.find(text)
31
23
  end
32
24
  end
33
25
 
@@ -50,6 +42,30 @@ module Matchers
50
42
  end
51
43
 
52
44
  class Trie
45
+ class Node
46
+ attr_reader :children
47
+
48
+ def initialize
49
+ @children = {}
50
+ @children.default = nil
51
+ end
52
+
53
+ def insert(char)
54
+ @children[char] = Node.new unless @children.key?(char)
55
+ @children[char]
56
+ end
57
+
58
+ def forward(str)
59
+ children = @children
60
+ child = nil
61
+ str.chars.each do |char|
62
+ child = children[char]
63
+ children = child.children
64
+ end
65
+ child
66
+ end
67
+ end
68
+
53
69
  attr_reader :root
54
70
 
55
71
  def initialize(patterns)
@@ -58,38 +74,12 @@ module Matchers
58
74
  patterns.each do |pattern|
59
75
  insert(pattern)
60
76
  end
61
- build
62
77
  end
63
78
 
64
79
  def insert(pattern = '')
65
80
  current_node = @root
66
- pattern.chars.each_with_index do |char, i|
81
+ pattern.chars.each do |char|
67
82
  current_node = current_node.insert(char)
68
- current_node.output = pattern if i == pattern.length - 1
69
- end
70
- end
71
-
72
- def new_queue
73
- q = Queue.new
74
- @root.children.each_value do |child|
75
- q.push(child)
76
- child.failure = @root # set root on root's children's failure
77
- end
78
- q
79
- end
80
-
81
- def build
82
- # Update failure on each node.
83
- # Search longest matching suffix (which becomes failure) by BFS. In case no matching suffix, root becomes failure.
84
- q = new_queue
85
- until q.empty?
86
- cur_node = q.pop
87
- cur_node.children.each do |char, child|
88
- q.push(child)
89
- detect_node = cur_node.failure || @root
90
- detect_node = detect_node.failure while detect_node.children[char].nil?
91
- child.failure = detect_node.children[char]
92
- end
93
83
  end
94
84
  end
95
85
 
@@ -107,20 +97,162 @@ module Matchers
107
97
  end
108
98
  end
109
99
 
110
- class Node
111
- attr_reader :children
112
- attr_accessor :failure, :output
100
+ # An AC automaton.
101
+ # Based on https://www.cs.uku.fi/~kilpelai/BSA05/lectures/slides04.pdf.
102
+ class ACAutomaton
103
+ class Node
104
+ # Manages the goto as a character -> node ID mapping.
105
+ attr_reader :goto
106
+ # Uniquely (in an automaton) assigned ID of the Node.
107
+ attr_reader :id
108
+ attr_accessor :failure
109
+ # Stores out of AC, that is the index of the patterns.
110
+ attr_accessor :out
111
+
112
+ def initialize(id: 0, goto: {}, failure: 0, out: [])
113
+ @id = id
114
+ @goto = goto
115
+ @failure = failure
116
+ @out = out
117
+ end
118
+
119
+ def root?
120
+ @id.zero?
121
+ end
122
+
123
+ def g(char)
124
+ if (next_node = @goto[char])
125
+ return next_node
126
+ end
127
+ return 0 if root?
128
+
129
+ nil
130
+ end
113
131
 
114
- def initialize
115
- @children = {}
116
- @children.default = nil
117
- @output = nil
118
- @failure = nil
132
+ def to_s
133
+ "id: #{@id}, goto: #{@goto}, failure: #{@failure}, out: #{@out}"
134
+ end
135
+
136
+ def ==(other)
137
+ @id == other.id && @goto == other.goto && @failure == other.failure && @out == other.out
138
+ end
119
139
  end
140
+ # Nodes are managed in an array. The indices are
141
+ attr_reader :nodes, :patterns
142
+
143
+ def initialize(patterns)
144
+ @nodes = []
145
+ @patterns = patterns || []
146
+ build(@patterns)
147
+ end
148
+
149
+ # Creates a new node and returns the id.
150
+ # This method is not thread safe.
151
+ def new_node
152
+ id = @nodes.size
153
+ node = Node.new(id: id)
154
+ @nodes.push(node)
155
+ id
156
+ end
157
+
158
+ def build(patterns)
159
+ build_goto(patterns)
160
+ build_failure
161
+ end
162
+
163
+ def build_goto(patterns)
164
+ root = new_node
165
+ patterns.each_with_index do |pattern, i|
166
+ q = root
167
+ pattern.chars.each do |char|
168
+ next_q = @nodes[q].goto[char]
169
+ if next_q
170
+ q = next_q
171
+ else
172
+ new_q = new_node
173
+ @nodes[q].goto[char] = new_q
174
+ q = new_q
175
+ end
176
+ end
177
+
178
+ @nodes[q].out.push(i)
179
+ end
180
+ end
181
+
182
+ def build_failure
183
+ queue = [0]
184
+ until queue.empty?
185
+ n = queue.shift
186
+ node = @nodes[n]
187
+ @nodes[n].goto.each do |c, next_node|
188
+ queue.push(next_node)
189
+
190
+ next if n.zero?
191
+
192
+ failure = node.failure
193
+ failure = @nodes[failure].failure while @nodes[failure].g(c).nil?
194
+ @nodes[next_node].failure = @nodes[failure].g(c)
195
+ @nodes[next_node].out.concat(@nodes[@nodes[next_node].failure].out)
196
+ end
197
+ end
198
+ end
199
+
200
+ def find(text)
201
+ return [] if text.nil? || text == ''
202
+
203
+ find_id(text).map do |id|
204
+ @patterns[id]
205
+ end
206
+ end
207
+
208
+ # Finds and retuns matched pattens' indices.
209
+ def find_id(text)
210
+ return [] if text.nil? || text == ''
211
+
212
+ q = 0
213
+ result = []
214
+ text.chars.each_with_index do |c, _i|
215
+ loop do
216
+ node = @nodes[q]
217
+ if (to_go_next = node.goto[c])
218
+ q = to_go_next
219
+ break
220
+ end
221
+ break if q.zero?
222
+
223
+ q = node.failure
224
+ end
225
+
226
+ out = @nodes[q].out
227
+ result.concat(out) unless out.empty?
228
+ end
229
+
230
+ result
231
+ end
232
+
233
+ # Returns true if the text matches any pattern, otherwise false.
234
+ def matches?(text)
235
+ return false if text.nil?
236
+
237
+ q = 0
238
+ text.chars.each do |c|
239
+ loop do
240
+ node = @nodes[q]
241
+ if (to_go_next = node.goto[c])
242
+ q = to_go_next
243
+ break
244
+ end
245
+ break if q.zero?
246
+
247
+ q = node.failure
248
+ end
249
+
250
+ out = @nodes[q].out
251
+
252
+ return true unless out.empty?
253
+ end
120
254
 
121
- def insert(char)
122
- @children[char] = Node.new unless @children.key?(char)
123
- @children[char]
255
+ false
124
256
  end
125
257
  end
126
258
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-filter-list
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.4
4
+ version: 0.7.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shun Yanaura
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-21 00:00:00.000000000 Z
11
+ date: 2021-09-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  - !ruby/object:Gem::Version
133
133
  version: '0'
134
134
  requirements: []
135
- rubygems_version: 3.1.4
135
+ rubygems_version: 3.1.6
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: A fluentd output plugin to filter keywords from messages