msfl_visitors 1.2.1.dev1 → 1.2.1.dev2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91f283c200ab6615d376fd0bba263230f4734a4b
|
4
|
+
data.tar.gz: e5a9d85eafd0585f93314e53f5c1198628fbd833
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df7283c6be4eb3c4e0e5a98d3321449828c734cfa861189da908a698ab54353c78afd7452b99383ef6cbdd13c64a5532376b6607776102adfbe19abc5ea38a59
|
7
|
+
data.tar.gz: 4dce7a8e50ecde1219743ef727da96ded10eda3bc959cf06450c319390e2eadc9fbc6db2f906ca2db6032117337ba5257e8246bfd2b99ab64b2c95cc04a16dc1
|
@@ -1,5 +1,25 @@
|
|
1
1
|
require 'forwardable'
|
2
2
|
module MSFLVisitors
|
3
|
+
|
4
|
+
module VisitorHelpers
|
5
|
+
# Note that the ES documentation also indicates that # is a special character that requires
|
6
|
+
# escaping and that this behavior is not part of the PERL regex; however Ruby automatically escapes
|
7
|
+
# literal hashes when constructing regices
|
8
|
+
def escape_es_special_regex_chars(str)
|
9
|
+
str.gsub(/([@&<>~])/) { |m| "\\#{m}" }
|
10
|
+
end
|
11
|
+
|
12
|
+
def escaped_regex_helper(escaped_str)
|
13
|
+
exp = escape_es_special_regex_chars "#{escaped_str}"
|
14
|
+
# why you must use #inspect, not #to_s. @link http://ruby-doc.org/core-1.9.3/Regexp.html#method-i-3D-7E
|
15
|
+
%r[.*#{exp}.*]
|
16
|
+
end
|
17
|
+
|
18
|
+
def composable_expr_for(regex_as_literal_string)
|
19
|
+
regex_as_literal_string[3..-4]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
3
23
|
class Visitor
|
4
24
|
|
5
25
|
attr_accessor :clauses, :current_clause
|
@@ -49,18 +69,13 @@ module MSFLVisitors
|
|
49
69
|
[{clause: root.accept(self)}].concat(clauses).reject { |c| c[:clause] == "" }
|
50
70
|
end
|
51
71
|
|
52
|
-
# Note that the ES documentation also indicates that # is a special character that requires
|
53
|
-
# escaping and that this behavior is not part of the PERL regex; however Ruby automatically escapes
|
54
|
-
# literal hashes when constructing regices
|
55
|
-
def escape_es_special_regex_chars(str)
|
56
|
-
str.gsub(/([@&<>~])/) { |m| "\\#{m}" }
|
57
|
-
end
|
58
|
-
|
59
72
|
private
|
60
73
|
|
61
74
|
attr_reader :mode
|
62
75
|
|
63
76
|
class TermFilterVisitor
|
77
|
+
include VisitorHelpers
|
78
|
+
|
64
79
|
def initialize(visitor)
|
65
80
|
@visitor = visitor
|
66
81
|
end
|
@@ -75,11 +90,7 @@ module MSFLVisitors
|
|
75
90
|
Nodes::Match => '=~',
|
76
91
|
}
|
77
92
|
|
78
|
-
|
79
|
-
exp = visitor.escape_es_special_regex_chars "#{escaped_str}"
|
80
|
-
# why you must use #inspect, not #to_s. @link http://ruby-doc.org/core-1.9.3/Regexp.html#method-i-3D-7E
|
81
|
-
%r[.*#{exp}.*]
|
82
|
-
end
|
93
|
+
|
83
94
|
|
84
95
|
def visit(node)
|
85
96
|
case node
|
@@ -100,7 +111,8 @@ module MSFLVisitors
|
|
100
111
|
|
101
112
|
when Nodes::Match
|
102
113
|
if node.right.is_a? Nodes::Set
|
103
|
-
|
114
|
+
escaped_str_frags = node.right.contents.map { |right_child| composable_expr_for(MSFLVisitors::Nodes::Regex.new(right_child.value.to_s).accept(visitor).inspect) }
|
115
|
+
escaped_str = escaped_str_frags.join('|')
|
104
116
|
"#{node.left.accept(visitor)} #{BINARY_OPERATORS[node.class]} " + %r[.*#{escaped_str}.*].inspect
|
105
117
|
else
|
106
118
|
"#{node.left.accept(visitor)} #{BINARY_OPERATORS[node.class]} " + MSFLVisitors::Nodes::Regex.new(node.right.value.to_s).accept(visitor).inspect
|
@@ -145,6 +157,7 @@ module MSFLVisitors
|
|
145
157
|
end
|
146
158
|
|
147
159
|
class AggregationsVisitor
|
160
|
+
include VisitorHelpers
|
148
161
|
def initialize(visitor)
|
149
162
|
@visitor = visitor
|
150
163
|
end
|
@@ -192,7 +205,8 @@ module MSFLVisitors
|
|
192
205
|
Nodes::Dataset
|
193
206
|
node.value
|
194
207
|
when Nodes::Regex
|
195
|
-
Regexp.escape(node.value.to_s)
|
208
|
+
esc = Regexp.escape("#{node.value.to_s}")
|
209
|
+
composable_expr_for(escaped_regex_helper(esc).inspect)
|
196
210
|
|
197
211
|
when Nodes::GreaterThan,
|
198
212
|
Nodes::GreaterThanEqual,
|
data/msfl_visitors.gemspec
CHANGED
@@ -14,6 +14,7 @@ describe MSFLVisitors::Visitor do
|
|
14
14
|
|
15
15
|
describe "#escape_es_special_regex_chars" do
|
16
16
|
|
17
|
+
include MSFLVisitors::VisitorHelpers
|
17
18
|
{
|
18
19
|
'ab@cd' => 'ab\@cd',
|
19
20
|
'ab&cd' => 'ab\&cd',
|
@@ -23,7 +24,7 @@ describe MSFLVisitors::Visitor do
|
|
23
24
|
}.each do |str, expected|
|
24
25
|
|
25
26
|
it "escapes '#{str}' as '#{expected}'" do
|
26
|
-
expect(
|
27
|
+
expect(escape_es_special_regex_chars str).to eq expected
|
27
28
|
end
|
28
29
|
end
|
29
30
|
end
|
@@ -235,6 +235,15 @@ describe MSFLVisitors::Visitor do
|
|
235
235
|
expected = /.*a\ \#sentence\@\ contain\&ing\ \<lucene\>\ cha\~rs.*/
|
236
236
|
expect(result).to eq expected
|
237
237
|
end
|
238
|
+
|
239
|
+
context "when using the Aggregations visitor" do
|
240
|
+
|
241
|
+
before { visitor.mode = :aggregations }
|
242
|
+
|
243
|
+
it "escapes lucene specific special characters" do
|
244
|
+
expect(result).to eq "a\\ \\#sentence\\@\\ contain\\&ing\\ \\<lucene\\>\\ cha\\~rs"
|
245
|
+
end
|
246
|
+
end
|
238
247
|
end
|
239
248
|
|
240
249
|
context "when the regex contains characters that require escaping" do
|
@@ -251,8 +260,8 @@ describe MSFLVisitors::Visitor do
|
|
251
260
|
|
252
261
|
before { visitor.mode = :aggregations }
|
253
262
|
|
254
|
-
it "returns: 'this\\
|
255
|
-
expect(result).to eq "this\\
|
263
|
+
it "returns: 'this\\ \\/\\ needs\\ to\\ %\\ \\{be,escaped\\}\\ \\*\\.\\ \\^\\[or\\]\\ \\|\\ \\\\else'" do
|
264
|
+
expect(result).to eq "this\\ \\/\\ needs\\ to\\ %\\ \\{be,escaped\\}\\ \\*\\.\\ \\^\\[or\\]\\ \\|\\ \\\\else"
|
256
265
|
end
|
257
266
|
end
|
258
267
|
end
|
@@ -289,6 +298,15 @@ describe MSFLVisitors::Visitor do
|
|
289
298
|
end
|
290
299
|
end
|
291
300
|
|
301
|
+
context "when using the Aggregations visitor" do
|
302
|
+
|
303
|
+
before { visitor.mode = :aggregations }
|
304
|
+
|
305
|
+
it %(results in: { agg_field_name: :lhs, operator: :match, test_value: "this\\ \\(ne\\&eds\\)\\ to\\ be\\*\\ escaped" }) do
|
306
|
+
expected = { agg_field_name: :lhs, operator: :match, test_value: "this\\ \\(ne\\&eds\\)\\ to\\ be\\*\\ escaped" }
|
307
|
+
expect(result).to eq expected
|
308
|
+
end
|
309
|
+
end
|
292
310
|
end
|
293
311
|
|
294
312
|
context "when the right hand side is a Set node containing Value nodes" do
|
@@ -324,6 +342,15 @@ describe MSFLVisitors::Visitor do
|
|
324
342
|
it "results in: { agg_field_name: :lhs, operator: :match, test_value: \"foo|bar|baz\" }" do
|
325
343
|
expect(result).to eq({agg_field_name: :lhs, operator: :match, test_value: "foo|bar|baz"})
|
326
344
|
end
|
345
|
+
|
346
|
+
context "when one of the members of the Set requires escaping" do
|
347
|
+
|
348
|
+
let(:foo_node) { MSFLVisitors::Nodes::Word.new "please&*escape me" }
|
349
|
+
|
350
|
+
it "results in { agg_field_name: :lhs, operator: :match, test_value: \"please\\&\\*escape\\ me }" do
|
351
|
+
expected = { agg_field_name: :lhs, operator: :match, test_value: "please\\&\\*escape\\ me" }
|
352
|
+
end
|
353
|
+
end
|
327
354
|
end
|
328
355
|
end
|
329
356
|
end
|