human-ql 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ #--
2
+ # Copyright (c) 2016 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You may
6
+ # obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ module HumanQL
18
+
19
+ # Normalizes and imposes various limitations on query abstract
20
+ # syntax trees (ASTs).
21
+ class TreeNormalizer
22
+
23
+ # Allow nested scopes?
24
+ # Default: false -> nested scope nodes are removed.
25
+ attr_accessor :nested_scope
26
+
27
+ # Allow nested :not (in other words, double negatives)?
28
+ # Default: false -> nested :not nodes are removed.
29
+ attr_accessor :nested_not
30
+
31
+ # Allow unconstrained :not?
32
+ # Queries containing an unsconstrained :not may be costly to
33
+ # execute. If false the unconstrained :not will be removed.
34
+ #
35
+ # A :not node is considered "constrained" if it has an :and
36
+ # ancestor with at least one contraint argument. A constraint
37
+ # argument is a term, phrase, or :and node matching this same
38
+ # criteria, or an :or node where *all* arguments match this
39
+ # criteria. See also #scope_can_constrain.
40
+ # Default: true
41
+ attr_accessor :unconstrained_not
42
+
43
+ # Does a scope count as a constraint?
44
+ # Default: true -> a scope is a constraint if its argument is a
45
+ # constraint.
46
+ # If it depends on the scope, you can override
47
+ # #scope_can_constrain? with this logic.
48
+ attr_accessor :scope_can_constrain
49
+
50
+ # Allow SCOPE within :not?
51
+ # * If set to `:invert` normalizes `[:not, ['SCOPE', 'a']]` to
52
+ # `['SCOPE', [:not, 'a']]`.
53
+ # * If set to `false`, the nested scope node is removed.
54
+ # * For either :invert or false, the scope node is otherwise
55
+ # removed if found below a :not node.
56
+ # Default: :invert
57
+ attr_accessor :not_scope
58
+
59
+ # Allow only scopes combined with :and condition?
60
+ # Default: false
61
+ attr_accessor :scope_and_only
62
+
63
+ # Allow scope at root or first level only?
64
+ # Default: false
65
+ attr_accessor :scope_at_top_only
66
+
67
+ # Construct given options that are applied via same name setters
68
+ # on self.
69
+ def initialize( opts = {} )
70
+ @nested_scope = false
71
+ @nested_not = false
72
+ @unconstrained_not = true
73
+ @scope_can_constrain = true
74
+ @scope_at_top_only = false
75
+ @scope_and_only = false
76
+ @not_scope = :invert
77
+
78
+ opts.each do |name,val|
79
+ send( name.to_s + '=', val )
80
+ end
81
+ end
82
+
83
+ # Return a new normalized AST from the given AST root node.
84
+ def normalize( node )
85
+ node = normalize_1( node, EMPTY_STACK, @unconstrained_not )
86
+ if ( @not_scope != true ) || @scope_and_only || @scope_at_top_only
87
+ node = normalize_2( node, EMPTY_STACK )
88
+ end
89
+ node
90
+ end
91
+
92
+ protected
93
+
94
+ def scope_can_constrain?( scope )
95
+ @scope_can_constrain
96
+ end
97
+
98
+ EMPTY_STACK = [].freeze
99
+
100
+ # Return true if node is a valid constraint
101
+ def constraint?( node )
102
+ op,*args = node
103
+ if ! node.is_a?( Array )
104
+ true
105
+ elsif args.empty?
106
+ false
107
+ else
108
+ case op
109
+ when :and
110
+ args.any? { |a| constraint?( a ) }
111
+ when :or
112
+ args.all? { |a| constraint?( a ) }
113
+ when :phrase
114
+ true
115
+ when String
116
+ scope_can_constrain?( op ) && constraint?( args.first )
117
+ else
118
+ false
119
+ end
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ def normalize_1( node, ops, constrained )
126
+ op,*args = node
127
+ if ! node.is_a?( Array )
128
+ op
129
+ elsif args.empty?
130
+ nil
131
+ else
132
+
133
+ case op
134
+ when :and
135
+ unless constrained
136
+ constrained = args.any? { |a| constraint?( a ) }
137
+ end
138
+ when String #scope
139
+ args = args[0,1] if args.length > 1
140
+ if !@nested_scope
141
+ outer = ops.find { |o| o.is_a?( String ) }
142
+ if outer == op
143
+ return args.first
144
+ elsif outer
145
+ return nil
146
+ end
147
+ end
148
+ when :not
149
+ args = args[0,1] if args.length > 1
150
+ return nil if !constrained || ( !@nested_not && ops.rindex(:not) )
151
+ end
152
+
153
+ a_ops = ops.dup.push( op )
154
+ out = []
155
+ args.each do |a|
156
+ a = normalize_1( a, a_ops, constrained )
157
+ if a.is_a?( Array ) && a[0] == op
158
+ out += a[1..-1]
159
+ elsif a # filter nil
160
+ out << a
161
+ end
162
+ end
163
+
164
+ if ( op == :and || op == :or ) && out.length < 2
165
+ out[0]
166
+ elsif out.empty?
167
+ nil
168
+ else
169
+ out.unshift( op )
170
+ end
171
+ end
172
+ end
173
+
174
+ def normalize_2( node, ops )
175
+ op,*args = node
176
+ if ! node.is_a?( Array )
177
+ op
178
+ elsif args.empty?
179
+ nil
180
+ else
181
+ case op
182
+ when String #scope
183
+ if @scope_at_top_only && ops.length > 1
184
+ return nil
185
+ end
186
+ if @scope_and_only && !ops.all? { |o| o == :and }
187
+ return nil
188
+ end
189
+ when :not
190
+ if @not_scope == :invert
191
+ na = args[0]
192
+ if na.is_a?( Array ) && na[0].is_a?( String )
193
+ op, na[0] = na[0], op
194
+ return [ op, na ]
195
+ end
196
+ end
197
+ end
198
+
199
+ a_ops = ops.dup.push( op )
200
+ out = []
201
+ args.each do |a|
202
+ a = normalize_2( a, a_ops )
203
+ if a.is_a?( Array ) && a[0] == op
204
+ out += a[1..-1]
205
+ elsif a # filter nil
206
+ out << a
207
+ end
208
+ end
209
+
210
+ if ( op == :and || op == :or ) && out.length < 2
211
+ out[0]
212
+ elsif out.empty?
213
+ nil
214
+ # If scope still found below a :not, remove it. With :invert,
215
+ # this implies nodes intervening
216
+ elsif @not_scope != true && op.is_a?( String ) && ops.rindex(:not)
217
+ nil
218
+ else
219
+ out.unshift( op )
220
+ end
221
+ end
222
+ end
223
+
224
+ end
225
+
226
+ end
@@ -0,0 +1,24 @@
1
+ #--
2
+ # Copyright (c) 2016 David Kellum
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
5
+ # may not use this file except in compliance with the License. You
6
+ # may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13
+ # implied. See the License for the specific language governing
14
+ # permissions and limitations under the License.
15
+ #++
16
+
17
+ require 'rubygems'
18
+ require 'bundler/setup'
19
+ require 'minitest/autorun'
20
+
21
+ unless RUBY_PLATFORM =~ /java/
22
+ require 'pg'
23
+ require 'sequel'
24
+ end
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #--
4
+ # Copyright (c) 2016 David Kellum
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
7
+ # may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15
+ # implied. See the License for the specific language governing
16
+ # permissions and limitations under the License.
17
+ #++
18
+
19
+ require_relative 'setup.rb'
20
+
21
+ require 'human-ql/postgresql_custom_parser'
22
+ require 'human-ql/postgresql_generator'
23
+ require 'human-ql/tree_normalizer'
24
+
25
+ class TestPostgresqlFuzz < Minitest::Test
26
+ DB = Sequel.connect( "postgres://localhost/human_ql_test" )
27
+
28
+ PG_VERSION =
29
+ begin
30
+ v = DB &&
31
+ DB["select current_setting('server_version') as v"].first[:v]
32
+ v &&= v.split('.').map(&:to_i)
33
+ v || []
34
+ end
35
+
36
+ TC = HumanQL::PostgreSQLCustomParser.new( pg_version: PG_VERSION )
37
+ DN = HumanQL::TreeNormalizer.new
38
+
39
+ PG = HumanQL::PostgreSQLGenerator.new
40
+
41
+ PASSES = if $0 == __FILE__
42
+ 100
43
+ else
44
+ 1
45
+ end
46
+
47
+ # Assert that parsing via PG to_tsquery(generated) doesn't fail
48
+ def assert_pg_parse( hq )
49
+ ast = TC.parse( hq )
50
+ ast = DN.normalize( ast )
51
+ if ast
52
+ pg = PG.generate( ast )
53
+ begin
54
+ rt = DB["select to_tsquery(?) as tsquery", pg].first[:tsquery]
55
+ refute_nil( rt, hq )
56
+ rescue Sequel::DatabaseError => e
57
+ fail( "On query #{hq.inspect} -> #{ast.inspect}: #{ e.to_s }" )
58
+ end
59
+ else
60
+ pass
61
+ end
62
+ end
63
+
64
+ # Starting point query
65
+ GENERIC_Q = 'ape | ( boy -"cat dog" )'.freeze
66
+
67
+ # Characters which are likely to cause trouble
68
+ RANDOM_C = '({"\'a !:* ,^#:/-0.123e-9)<>'.freeze
69
+
70
+ PASSES.times do |i|
71
+ define_method( "test_fuzz_#{i}" ) do
72
+ 1000.times do
73
+ s = rand( GENERIC_Q.length )
74
+ l = rand( GENERIC_Q.length * 2 )
75
+ q = GENERIC_Q[s,l]
76
+ 20.times do
77
+ if rand(3) == 1
78
+ q[rand(q.length+1)] = fuzz
79
+ end
80
+ end
81
+ assert_pg_parse( q )
82
+ end
83
+ end
84
+ end
85
+
86
+ def fuzz
87
+ RANDOM_C[rand(RANDOM_C.length)]
88
+ end
89
+
90
+ end if defined?( ::Sequel )
@@ -0,0 +1,171 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #--
4
+ # Copyright (c) 2016 David Kellum
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License"); you
7
+ # may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15
+ # implied. See the License for the specific language governing
16
+ # permissions and limitations under the License.
17
+ #++
18
+
19
+ require_relative 'setup.rb'
20
+
21
+ require 'human-ql/postgresql_custom_parser'
22
+ require 'human-ql/postgresql_generator'
23
+
24
+ class TestPostgresqlGenerator < Minitest::Test
25
+ DB = if defined?( ::Sequel )
26
+ Sequel.connect( "postgres://localhost/human_ql_test" )
27
+ end
28
+
29
+ PG_VERSION =
30
+ begin
31
+ v = DB &&
32
+ DB["select current_setting('server_version') as v"].first[:v]
33
+ v &&= v.split('.').map(&:to_i)
34
+ v || []
35
+ end
36
+
37
+ TC = HumanQL::PostgreSQLCustomParser.new( verbose: ARGV.include?('--verbose'),
38
+ pg_version: PG_VERSION )
39
+ PG = HumanQL::PostgreSQLGenerator.new
40
+
41
+ def pg_gte_9_6?
42
+ ( PG_VERSION <=> [9,6] ) >= 0
43
+ end
44
+
45
+ def assert_gen( expected_pg, hq )
46
+ ast = TC.parse( hq )
47
+ pg = PG.generate( ast )
48
+ assert_equal( expected_pg, pg, ast )
49
+ end
50
+
51
+ # Assert that the round-trip representation via PG
52
+ # to_tsquery(generated) (and back to text) doesn't error and is as
53
+ # expected.
54
+ def assert_tsq( expected, hq )
55
+ if DB
56
+ ast = TC.parse( hq )
57
+ pg = PG.generate( ast )
58
+ rt = DB["select to_tsquery(?) as tsquery", pg].first[:tsquery]
59
+ assert_equal( expected, rt, ast )
60
+ end
61
+ end
62
+
63
+ def test_gen_term
64
+ assert_gen( 'ape', 'ape' )
65
+ assert_tsq( "'ape'", 'ape' )
66
+ end
67
+
68
+ def test_gen_and
69
+ assert_gen( 'ape & boy', 'ape boy' )
70
+ assert_tsq( "'ape' & 'boy'", 'ape boy' )
71
+ end
72
+
73
+ def test_gen_phrase
74
+ if pg_gte_9_6?
75
+ assert_gen( 'ape <-> boy', '"ape boy"' )
76
+ assert_tsq( "'ape' <-> 'boy'", '"ape boy"' )
77
+ else
78
+ assert_gen( 'ape & boy', '"ape boy"' )
79
+ assert_tsq( "'ape' & 'boy'", '"ape boy"' )
80
+ end
81
+ end
82
+
83
+ def test_phrase_with_danger
84
+ skip( "For postgresql 9.6+" ) unless pg_gte_9_6?
85
+ assert_gen( '_ <-> boy', '": boy"' )
86
+ assert_tsq( "'boy'", '": boy"' )
87
+ end
88
+
89
+ def test_gen_empty
90
+ assert_gen( nil, '' )
91
+ end
92
+
93
+ def test_gen_not
94
+ assert_gen( '!ape', '-ape' )
95
+ assert_tsq( "!'ape'", '-ape' )
96
+ end
97
+
98
+ def test_gen_not_stop
99
+ assert_gen( '!the', '-the' )
100
+ assert_tsq( "", '-the' )
101
+ end
102
+
103
+ def test_gen_or
104
+ assert_gen( '(ape | boy)', 'ape|boy' )
105
+ assert_tsq( "'ape' | 'boy'", 'ape|boy' )
106
+ end
107
+
108
+ def test_gen_or_stop
109
+ assert_gen( '(the | boy)', 'the|boy' )
110
+ assert_tsq( "'boy'", 'the|boy' )
111
+ end
112
+
113
+ def test_gen_not_phrase
114
+ skip( "For postgresql 9.6+" ) unless pg_gte_9_6?
115
+ assert_gen( '!(ape <-> boy)', '-"ape boy"' )
116
+ assert_tsq( "!( 'ape' <-> 'boy' )", '-"ape boy"' )
117
+ end
118
+
119
+ def test_gen_precedence_1
120
+ assert_gen( '(ape | boy) & cat', 'ape | boy cat' )
121
+ assert_tsq( "( 'ape' | 'boy' ) & 'cat'", 'ape | boy cat' )
122
+ end
123
+
124
+ def test_gen_precedence_2
125
+ assert_gen( 'ape & (boy | cat)', 'ape boy | cat' )
126
+ assert_tsq( "'ape' & ( 'boy' | 'cat' )", 'ape boy | cat' )
127
+ end
128
+
129
+ def test_gen_precedence_3
130
+ assert_gen( '(ape | !boy) & cat', 'ape | - boy cat' )
131
+ assert_tsq( "( 'ape' | !'boy' ) & 'cat'", 'ape | - boy cat' )
132
+ end
133
+
134
+ def test_gen_precedence_4
135
+ assert_gen( '(!ape | boy) & cat', '-ape | boy cat' )
136
+ assert_tsq( "( !'ape' | 'boy' ) & 'cat'", '-ape | boy cat' )
137
+ end
138
+
139
+ def test_gen_precedence_5
140
+ assert_gen( '(ape | boy) & !cat', 'ape | boy -cat' )
141
+ assert_tsq( "( 'ape' | 'boy' ) & !'cat'", 'ape | boy -cat' )
142
+ end
143
+
144
+ def test_gen_precedence_6
145
+ assert_gen( '(ape | boy) & !cat & dog', 'ape | boy -cat dog' )
146
+ assert_tsq( "( 'ape' | 'boy' ) & !'cat' & 'dog'", 'ape | boy -cat dog' )
147
+ end
148
+
149
+ def test_funk_1
150
+ assert_gen( "!(, & _y)", "-( , 'y -)" )
151
+ assert_tsq( "!'y'", "-( ,'y -)" )
152
+
153
+ assert_gen( "!(, & _y) & c3", "|-( , 'y -)c3" )
154
+ assert_tsq( "!'y' & 'c3'", "|-( ,'y -)c3" )
155
+ end
156
+
157
+ def test_funk_2
158
+ if pg_gte_9_6?
159
+ # Crashes PG 9.6 beta 1-2, fixed in beta 3.
160
+ assert_tsq( "'boy' & 'cat'", "-(a -boy) & cat" )
161
+ else
162
+ # PG 9.5 doesn't normalize away the double not
163
+ assert_tsq( "!( !'boy' ) & 'cat'", "-(a -boy) & cat" )
164
+ end
165
+ end
166
+
167
+ def test_gen_or_not
168
+ assert_tsq( "'ape' & ( !'boy' | !'cat' )", "ape & ( -boy | -cat )" )
169
+ end
170
+
171
+ end