human-ql 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,226 @@
1
+ #--
2
+ # Copyright (c) 2016 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You may
6
+ # obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ module HumanQL
18
+
19
+ # Normalizes and imposes various limitations on query abstract
20
+ # syntax trees (ASTs).
21
+ class TreeNormalizer
22
+
23
+ # Allow nested scopes?
24
+ # Default: false -> nested scope nodes are removed.
25
+ attr_accessor :nested_scope
26
+
27
+ # Allow nested :not (in other words, double negatives)?
28
+ # Default: false -> nested :not nodes are removed.
29
+ attr_accessor :nested_not
30
+
31
+ # Allow unconstrained :not?
32
+ # Queries containing an unsconstrained :not may be costly to
33
+ # execute. If false the unconstrained :not will be removed.
34
+ #
35
+ # A :not node is considered "constrained" if it has an :and
36
+ # ancestor with at least one contraint argument. A constraint
37
+ # argument is a term, phrase, or :and node matching this same
38
+ # criteria, or an :or node where *all* arguments match this
39
+ # criteria. See also #scope_can_constrain.
40
+ # Default: true
41
+ attr_accessor :unconstrained_not
42
+
43
+ # Does a scope count as a constraint?
44
+ # Default: true -> a scope is a constraint if its argument is a
45
+ # constraint.
46
+ # If it depends on the scope, you can override
47
+ # #scope_can_constrain? with this logic.
48
+ attr_accessor :scope_can_constrain
49
+
50
+ # Allow SCOPE within :not?
51
+ # * If set to `:invert` normalizes `[:not, ['SCOPE', 'a']]` to
52
+ # `['SCOPE', [:not, 'a']]`.
53
+ # * If set to `false`, the nested scope node is removed.
54
+ # * For either :invert or false, the scope node is otherwise
55
+ # removed if found below a :not node.
56
+ # Default: :invert
57
+ attr_accessor :not_scope
58
+
59
+ # Allow only scopes combined with :and condition?
60
+ # Default: false
61
+ attr_accessor :scope_and_only
62
+
63
+ # Allow scope at root or first level only?
64
+ # Default: false
65
+ attr_accessor :scope_at_top_only
66
+
67
+ # Construct given options that are applied via same name setters
68
+ # on self.
69
+ def initialize( opts = {} )
70
+ @nested_scope = false
71
+ @nested_not = false
72
+ @unconstrained_not = true
73
+ @scope_can_constrain = true
74
+ @scope_at_top_only = false
75
+ @scope_and_only = false
76
+ @not_scope = :invert
77
+
78
+ opts.each do |name,val|
79
+ send( name.to_s + '=', val )
80
+ end
81
+ end
82
+
83
+ # Return a new normalized AST from the given AST root node.
84
+ def normalize( node )
85
+ node = normalize_1( node, EMPTY_STACK, @unconstrained_not )
86
+ if ( @not_scope != true ) || @scope_and_only || @scope_at_top_only
87
+ node = normalize_2( node, EMPTY_STACK )
88
+ end
89
+ node
90
+ end
91
+
92
+ protected
93
+
94
+ def scope_can_constrain?( scope )
95
+ @scope_can_constrain
96
+ end
97
+
98
+ EMPTY_STACK = [].freeze
99
+
100
+ # Return true if node is a valid constraint
101
+ def constraint?( node )
102
+ op,*args = node
103
+ if ! node.is_a?( Array )
104
+ true
105
+ elsif args.empty?
106
+ false
107
+ else
108
+ case op
109
+ when :and
110
+ args.any? { |a| constraint?( a ) }
111
+ when :or
112
+ args.all? { |a| constraint?( a ) }
113
+ when :phrase
114
+ true
115
+ when String
116
+ scope_can_constrain?( op ) && constraint?( args.first )
117
+ else
118
+ false
119
+ end
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ def normalize_1( node, ops, constrained )
126
+ op,*args = node
127
+ if ! node.is_a?( Array )
128
+ op
129
+ elsif args.empty?
130
+ nil
131
+ else
132
+
133
+ case op
134
+ when :and
135
+ unless constrained
136
+ constrained = args.any? { |a| constraint?( a ) }
137
+ end
138
+ when String #scope
139
+ args = args[0,1] if args.length > 1
140
+ if !@nested_scope
141
+ outer = ops.find { |o| o.is_a?( String ) }
142
+ if outer == op
143
+ return args.first
144
+ elsif outer
145
+ return nil
146
+ end
147
+ end
148
+ when :not
149
+ args = args[0,1] if args.length > 1
150
+ return nil if !constrained || ( !@nested_not && ops.rindex(:not) )
151
+ end
152
+
153
+ a_ops = ops.dup.push( op )
154
+ out = []
155
+ args.each do |a|
156
+ a = normalize_1( a, a_ops, constrained )
157
+ if a.is_a?( Array ) && a[0] == op
158
+ out += a[1..-1]
159
+ elsif a # filter nil
160
+ out << a
161
+ end
162
+ end
163
+
164
+ if ( op == :and || op == :or ) && out.length < 2
165
+ out[0]
166
+ elsif out.empty?
167
+ nil
168
+ else
169
+ out.unshift( op )
170
+ end
171
+ end
172
+ end
173
+
174
+ def normalize_2( node, ops )
175
+ op,*args = node
176
+ if ! node.is_a?( Array )
177
+ op
178
+ elsif args.empty?
179
+ nil
180
+ else
181
+ case op
182
+ when String #scope
183
+ if @scope_at_top_only && ops.length > 1
184
+ return nil
185
+ end
186
+ if @scope_and_only && !ops.all? { |o| o == :and }
187
+ return nil
188
+ end
189
+ when :not
190
+ if @not_scope == :invert
191
+ na = args[0]
192
+ if na.is_a?( Array ) && na[0].is_a?( String )
193
+ op, na[0] = na[0], op
194
+ return [ op, na ]
195
+ end
196
+ end
197
+ end
198
+
199
+ a_ops = ops.dup.push( op )
200
+ out = []
201
+ args.each do |a|
202
+ a = normalize_2( a, a_ops )
203
+ if a.is_a?( Array ) && a[0] == op
204
+ out += a[1..-1]
205
+ elsif a # filter nil
206
+ out << a
207
+ end
208
+ end
209
+
210
+ if ( op == :and || op == :or ) && out.length < 2
211
+ out[0]
212
+ elsif out.empty?
213
+ nil
214
+ # If scope still found below a :not, remove it. With :invert,
215
+ # this implies nodes intervening
216
+ elsif @not_scope != true && op.is_a?( String ) && ops.rindex(:not)
217
+ nil
218
+ else
219
+ out.unshift( op )
220
+ end
221
+ end
222
+ end
223
+
224
+ end
225
+
226
+ end
@@ -0,0 +1,24 @@
1
+ #--
2
+ # Copyright (c) 2016 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'rubygems'
18
+ require 'bundler/setup'
19
+ require 'minitest/autorun'
20
+
21
+ unless RUBY_PLATFORM =~ /java/
22
+ require 'pg'
23
+ require 'sequel'
24
+ end
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #--
4
+ # Copyright (c) 2016 David Kellum
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
7
+ # may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15
+ # implied. See the License for the specific language governing
16
+ # permissions and limitations under the License.
17
+ #++
18
+
19
+ require_relative 'setup.rb'
20
+
21
+ require 'human-ql/postgresql_custom_parser'
22
+ require 'human-ql/postgresql_generator'
23
+ require 'human-ql/tree_normalizer'
24
+
25
+ class TestPostgresqlFuzz < Minitest::Test
26
+ DB = Sequel.connect( "postgres://localhost/human_ql_test" )
27
+
28
+ PG_VERSION =
29
+ begin
30
+ v = DB &&
31
+ DB["select current_setting('server_version') as v"].first[:v]
32
+ v &&= v.split('.').map(&:to_i)
33
+ v || []
34
+ end
35
+
36
+ TC = HumanQL::PostgreSQLCustomParser.new( pg_version: PG_VERSION )
37
+ DN = HumanQL::TreeNormalizer.new
38
+
39
+ PG = HumanQL::PostgreSQLGenerator.new
40
+
41
+ PASSES = if $0 == __FILE__
42
+ 100
43
+ else
44
+ 1
45
+ end
46
+
47
+ # Assert that parsing via PG to_tsquery(generated) doesn't fail
48
+ def assert_pg_parse( hq )
49
+ ast = TC.parse( hq )
50
+ ast = DN.normalize( ast )
51
+ if ast
52
+ pg = PG.generate( ast )
53
+ begin
54
+ rt = DB["select to_tsquery(?) as tsquery", pg].first[:tsquery]
55
+ refute_nil( rt, hq )
56
+ rescue Sequel::DatabaseError => e
57
+ fail( "On query #{hq.inspect} -> #{ast.inspect}: #{ e.to_s }" )
58
+ end
59
+ else
60
+ pass
61
+ end
62
+ end
63
+
64
+ # Starting point query
65
+ GENERIC_Q = 'ape | ( boy -"cat dog" )'.freeze
66
+
67
+ # Characters which are likely to cause trouble
68
+ RANDOM_C = '({"\'a !:* ,^#:/-0.123e-9)<>'.freeze
69
+
70
+ PASSES.times do |i|
71
+ define_method( "test_fuzz_#{i}" ) do
72
+ 1000.times do
73
+ s = rand( GENERIC_Q.length )
74
+ l = rand( GENERIC_Q.length * 2 )
75
+ q = GENERIC_Q[s,l]
76
+ 20.times do
77
+ if rand(3) == 1
78
+ q[rand(q.length+1)] = fuzz
79
+ end
80
+ end
81
+ assert_pg_parse( q )
82
+ end
83
+ end
84
+ end
85
+
86
+ def fuzz
87
+ RANDOM_C[rand(RANDOM_C.length)]
88
+ end
89
+
90
+ end if defined?( ::Sequel )
@@ -0,0 +1,171 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #--
4
+ # Copyright (c) 2016 David Kellum
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
7
+ # may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15
+ # implied. See the License for the specific language governing
16
+ # permissions and limitations under the License.
17
+ #++
18
+
19
+ require_relative 'setup.rb'
20
+
21
+ require 'human-ql/postgresql_custom_parser'
22
+ require 'human-ql/postgresql_generator'
23
+
24
+ class TestPostgresqlGenerator < Minitest::Test
25
+ DB = if defined?( ::Sequel )
26
+ Sequel.connect( "postgres://localhost/human_ql_test" )
27
+ end
28
+
29
+ PG_VERSION =
30
+ begin
31
+ v = DB &&
32
+ DB["select current_setting('server_version') as v"].first[:v]
33
+ v &&= v.split('.').map(&:to_i)
34
+ v || []
35
+ end
36
+
37
+ TC = HumanQL::PostgreSQLCustomParser.new( verbose: ARGV.include?('--verbose'),
38
+ pg_version: PG_VERSION )
39
+ PG = HumanQL::PostgreSQLGenerator.new
40
+
41
+ def pg_gte_9_6?
42
+ ( PG_VERSION <=> [9,6] ) >= 0
43
+ end
44
+
45
+ def assert_gen( expected_pg, hq )
46
+ ast = TC.parse( hq )
47
+ pg = PG.generate( ast )
48
+ assert_equal( expected_pg, pg, ast )
49
+ end
50
+
51
+ # Assert that the round-trip representation via PG
52
+ # to_tsquery(generated) (and back to text) doesn't error and is as
53
+ # expected.
54
+ def assert_tsq( expected, hq )
55
+ if DB
56
+ ast = TC.parse( hq )
57
+ pg = PG.generate( ast )
58
+ rt = DB["select to_tsquery(?) as tsquery", pg].first[:tsquery]
59
+ assert_equal( expected, rt, ast )
60
+ end
61
+ end
62
+
63
+ def test_gen_term
64
+ assert_gen( 'ape', 'ape' )
65
+ assert_tsq( "'ape'", 'ape' )
66
+ end
67
+
68
+ def test_gen_and
69
+ assert_gen( 'ape & boy', 'ape boy' )
70
+ assert_tsq( "'ape' & 'boy'", 'ape boy' )
71
+ end
72
+
73
+ def test_gen_phrase
74
+ if pg_gte_9_6?
75
+ assert_gen( 'ape <-> boy', '"ape boy"' )
76
+ assert_tsq( "'ape' <-> 'boy'", '"ape boy"' )
77
+ else
78
+ assert_gen( 'ape & boy', '"ape boy"' )
79
+ assert_tsq( "'ape' & 'boy'", '"ape boy"' )
80
+ end
81
+ end
82
+
83
+ def test_phrase_with_danger
84
+ skip( "For postgresql 9.6+" ) unless pg_gte_9_6?
85
+ assert_gen( '_ <-> boy', '": boy"' )
86
+ assert_tsq( "'boy'", '": boy"' )
87
+ end
88
+
89
+ def test_gen_empty
90
+ assert_gen( nil, '' )
91
+ end
92
+
93
+ def test_gen_not
94
+ assert_gen( '!ape', '-ape' )
95
+ assert_tsq( "!'ape'", '-ape' )
96
+ end
97
+
98
+ def test_gen_not_stop
99
+ assert_gen( '!the', '-the' )
100
+ assert_tsq( "", '-the' )
101
+ end
102
+
103
+ def test_gen_or
104
+ assert_gen( '(ape | boy)', 'ape|boy' )
105
+ assert_tsq( "'ape' | 'boy'", 'ape|boy' )
106
+ end
107
+
108
+ def test_gen_or_stop
109
+ assert_gen( '(the | boy)', 'the|boy' )
110
+ assert_tsq( "'boy'", 'the|boy' )
111
+ end
112
+
113
+ def test_gen_not_phrase
114
+ skip( "For postgresql 9.6+" ) unless pg_gte_9_6?
115
+ assert_gen( '!(ape <-> boy)', '-"ape boy"' )
116
+ assert_tsq( "!( 'ape' <-> 'boy' )", '-"ape boy"' )
117
+ end
118
+
119
+ def test_gen_precedence_1
120
+ assert_gen( '(ape | boy) & cat', 'ape | boy cat' )
121
+ assert_tsq( "( 'ape' | 'boy' ) & 'cat'", 'ape | boy cat' )
122
+ end
123
+
124
+ def test_gen_precedence_2
125
+ assert_gen( 'ape & (boy | cat)', 'ape boy | cat' )
126
+ assert_tsq( "'ape' & ( 'boy' | 'cat' )", 'ape boy | cat' )
127
+ end
128
+
129
+ def test_gen_precedence_3
130
+ assert_gen( '(ape | !boy) & cat', 'ape | - boy cat' )
131
+ assert_tsq( "( 'ape' | !'boy' ) & 'cat'", 'ape | - boy cat' )
132
+ end
133
+
134
+ def test_gen_precedence_4
135
+ assert_gen( '(!ape | boy) & cat', '-ape | boy cat' )
136
+ assert_tsq( "( !'ape' | 'boy' ) & 'cat'", '-ape | boy cat' )
137
+ end
138
+
139
+ def test_gen_precedence_5
140
+ assert_gen( '(ape | boy) & !cat', 'ape | boy -cat' )
141
+ assert_tsq( "( 'ape' | 'boy' ) & !'cat'", 'ape | boy -cat' )
142
+ end
143
+
144
+ def test_gen_precedence_6
145
+ assert_gen( '(ape | boy) & !cat & dog', 'ape | boy -cat dog' )
146
+ assert_tsq( "( 'ape' | 'boy' ) & !'cat' & 'dog'", 'ape | boy -cat dog' )
147
+ end
148
+
149
+ def test_funk_1
150
+ assert_gen( "!(, & _y)", "-( , 'y -)" )
151
+ assert_tsq( "!'y'", "-( ,'y -)" )
152
+
153
+ assert_gen( "!(, & _y) & c3", "|-( , 'y -)c3" )
154
+ assert_tsq( "!'y' & 'c3'", "|-( ,'y -)c3" )
155
+ end
156
+
157
+ def test_funk_2
158
+ if pg_gte_9_6?
159
+ # Crashes PG 9.6 beta 1-2, fixed in beta 3.
160
+ assert_tsq( "'boy' & 'cat'", "-(a -boy) & cat" )
161
+ else
162
+ # PG 9.5 doesn't normalize away the double not
163
+ assert_tsq( "!( !'boy' ) & 'cat'", "-(a -boy) & cat" )
164
+ end
165
+ end
166
+
167
+ def test_gen_or_not
168
+ assert_tsq( "'ape' & ( !'boy' | !'cat' )", "ape & ( -boy | -cat )" )
169
+ end
170
+
171
+ end