ddql 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ module DDQL
2
+ class PostfixNullTypeOperator < Operator
3
+ attr_reader :pattern
4
+
5
+ def initialize(symbol, null_type, ordinal)
6
+ super("IS #{symbol}", "Is #{symbol}", :postfix, 9, false, :boolean, ordinal)
7
+ @null_type = null_type
8
+ @pattern = /IS\s+#{symbol}/
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ module DDQL
2
+ class QueryExpressionError < StandardError
3
+ attr_reader :cause, :expression
4
+
5
+ def initialize(expression:, cause: nil, message: nil)
6
+ @cause = cause
7
+ @expression = expression
8
+ @message = message || (cause ? cause.message : 'invalid expression')
9
+ end
10
+
11
+ def to_s
12
+ "failed to parse #{@expression}: #{@message}"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ module DDQL
2
+ module StringRefinements
3
+ refine String do
4
+ def squish
5
+ self.dup.squish!
6
+ end
7
+
8
+ def squish!
9
+ # this implementation is required (vs. just chaining message calls)
10
+ # because string! and gsub! return `nil` if nothing changes
11
+ strip!
12
+ gsub!(/[[:space:]]+/, ' ')
13
+ self
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,73 @@
1
+ require 'forwardable'
2
+
3
+ module DDQL
4
+
5
+ class Token
6
+ using StringRefinements
7
+
8
+ attr_reader :data, :type
9
+ attr_accessor :location
10
+
11
+ def initialize(data:, location: nil, type:)
12
+ @data = data
13
+ @location = location
14
+ @type = type
15
+ end
16
+
17
+ def and?
18
+ data == 'AND'
19
+ end
20
+
21
+ def as_hash
22
+ type.as_hash(data)
23
+ end
24
+
25
+ def comparison?
26
+ type.comparison?(data)
27
+ end
28
+
29
+ def complex_comparison?
30
+ type.complex_comparison?(data)
31
+ end
32
+
33
+ def infix?
34
+ type.infix?
35
+ end
36
+
37
+ def math?
38
+ type.math?(data)
39
+ end
40
+
41
+ def op_data
42
+ data.squish
43
+ end
44
+
45
+ def or?
46
+ data == 'OR'
47
+ end
48
+
49
+ def parse(parser, expression: nil)
50
+ type.parse(parser, self, expression: expression)
51
+ end
52
+
53
+ def postfix?
54
+ type.postfix?
55
+ end
56
+
57
+ def prefix?
58
+ type.prefix?
59
+ end
60
+
61
+ def simple_comparison?
62
+ type.simple_comparison?(data)
63
+ end
64
+
65
+ def to_h
66
+ type.as_hash(data)
67
+ end
68
+
69
+ def to_s
70
+ "#{type.name} : #{data}"
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,575 @@
1
+ module DDQL
2
+ class TokenType
3
+ using StringRefinements
4
+
5
+ attr_reader :label, :name, :pattern
6
+
7
+ FACTOR_PATTERN = /\[[^\]]+\]/
8
+
9
+ def self.all_types_pattern
10
+ Regexp.compile(ALL.map { |tt| "(?<#{tt.name}>#{tt.pattern})" }.join('|'))
11
+ end
12
+
13
+ def initialize(name:, pattern:, &block)
14
+ @label = name.to_s
15
+ @name = name
16
+ @pattern = pattern
17
+ @skipping = false
18
+ @data_range = 0..-1
19
+ @value_transformer = block
20
+ end
21
+
22
+ def as_hash(data)
23
+ # case name
24
+ # when :string_literal; {right: {string: data}}
25
+ # when :integer_literal; {right: {int: data}}
26
+ # when :numeric_literal, :sci_num_literal; {right: {float: data}}
27
+ # # when :lparen; {lstatement: data}
28
+ # when :rparen; Hash.new
29
+ # else
30
+ raise "subclass responsibility name[#{name}] data[#{data}]"
31
+ # end
32
+ end
33
+
34
+ def comparison?(data)
35
+ false
36
+ end
37
+
38
+ def data_from(match_data:)
39
+ match_data.named_captures[label]
40
+ end
41
+
42
+ def expression?
43
+ false
44
+ end
45
+
46
+ def factor?
47
+ false
48
+ end
49
+
50
+ def group?
51
+ false
52
+ end
53
+
54
+ def infix?
55
+ false
56
+ end
57
+
58
+ def interpret(data)
59
+ return nil if data.nil?
60
+ return data[@data_range] if @value_transformer.nil?
61
+ @value_transformer.call(data[@data_range])
62
+ end
63
+
64
+ def interpreted_data_from(match_data:)
65
+ data = data_from match_data: match_data
66
+ return nil if data.nil?
67
+ interpret data
68
+ end
69
+
70
+ def literal?
71
+ false
72
+ end
73
+
74
+ def match?(match_data:)
75
+ data_from(match_data: match_data).nil? || @skipping ? false : true
76
+ end
77
+
78
+ def parse(parser, token, expression: nil)
79
+ as_hash(token.data)
80
+ end
81
+
82
+ def postfix?
83
+ false
84
+ end
85
+
86
+ def prefix?
87
+ false
88
+ end
89
+
90
+ def screen?
91
+ false
92
+ end
93
+
94
+ def skipping!
95
+ @skipping = true
96
+ self
97
+ end
98
+
99
+ def trimming!(range=(1..-2))
100
+ @data_range = range
101
+ self
102
+ end
103
+
104
+ ### Literals
105
+
106
+ class Literal < TokenType
107
+ def initialize(name:, pattern:)
108
+ super(name: name, pattern: pattern)
109
+ trimming!
110
+ end
111
+
112
+ def as_hash(data)
113
+ # {right: {data_type => data}}
114
+ {data_type => data}
115
+ end
116
+
117
+ def data_type
118
+ raise "subclass responsibility for [#{self.class}]"
119
+ end
120
+
121
+ def literal?
122
+ true
123
+ end
124
+ end
125
+
126
+ class Currency < Literal
127
+ def initialize
128
+ super(name: :currency, pattern: /'(?!')(?<code>[A-Z]{3}):(\d+\.?\d+)'/)
129
+ @value_transformer = lambda do |s|
130
+ s = s.split(':', 2)
131
+ {currency_code: s.first, currency_value: {float: s.last.to_f}}
132
+ end
133
+ end
134
+
135
+ def as_hash(data)
136
+ # {right: data}
137
+ data
138
+ end
139
+ end
140
+
141
+ class Integer < Literal
142
+ def initialize
143
+ super(name: :integer, pattern: /'(?!')(?>[+-]?)(\d+)'/)
144
+ @value_transformer = -> (s) { s.to_i }
145
+ end
146
+
147
+ def data_type
148
+ :int
149
+ end
150
+ end
151
+
152
+ class Numeric < Literal
153
+ def initialize
154
+ super(name: :numeric, pattern: /'(?!')((?>[+-]?)(?>(?>\d+)(?>\.?)(?>\d*)|(?>\d*)(?>\.?)(?>\d+)))'/)
155
+ @value_transformer = -> (s) { s.to_f }
156
+ end
157
+
158
+ def data_type
159
+ :float
160
+ end
161
+ end
162
+
163
+ class ScientificNumeric < Literal
164
+ def initialize
165
+ super(name: :sci_num, pattern: /'(?!')([+-]?\d(\.\d+)?[Ee][+-]?[^0]\d+)'/)
166
+ @value_transformer = -> (s) { s.to_f }
167
+ end
168
+
169
+ def data_type
170
+ :float
171
+ end
172
+ end
173
+
174
+ class SpecialMarker < Literal
175
+ def initialize
176
+ super(name: :special_marker, pattern: /\$[a-zA-Z_]+/)
177
+ trimming!(1..-1)
178
+ end
179
+
180
+ def as_hash(data)
181
+ super({data.downcase.to_sym => "$#{data}"})
182
+ end
183
+
184
+ def data_type
185
+ name
186
+ end
187
+ end
188
+
189
+ class String < Literal
190
+ def initialize
191
+ super(name: :string, pattern: /'(?:[^'\\]|\\.)*?'/)
192
+ @value_transformer = -> (s) { s.gsub('\\', '') }
193
+ end
194
+
195
+ def data_type
196
+ name
197
+ end
198
+ end
199
+
200
+ ### /Literals
201
+
202
+ class Factor < TokenType
203
+ def initialize
204
+ super(name: :factor, pattern: FACTOR_PATTERN)
205
+ trimming!
206
+ end
207
+
208
+ def as_hash(data)
209
+ # {left: {name => data}}
210
+ {name => data}
211
+ end
212
+
213
+ def factor?
214
+ true
215
+ end
216
+
217
+ def parse(parser, token, expression: nil)
218
+ h = as_hash(token.data)
219
+ parser.peek&.comparison? ? {left: h} : h
220
+ end
221
+ end
222
+
223
+ class Group < TokenType
224
+ def initialize
225
+ super(name: :lparen, pattern: /\((?=[^%])/)
226
+ end
227
+
228
+ def group?
229
+ true
230
+ end
231
+
232
+ def parse(parser, token, expression: nil)
233
+ new_expression = parser.parse
234
+ parser.consume TokenType::RPAREN #if parser.peek&.data == ')'
235
+
236
+ if expression.nil?
237
+ next_token = parser.peek
238
+ if next_token && (next_token.and? || next_token.or?)
239
+ {
240
+ lstatement: new_expression,
241
+ }
242
+ else
243
+ new_expression
244
+ end
245
+ else
246
+ expression.merge(new_expression)
247
+ end
248
+ end
249
+ end
250
+
251
+ class NullOperators
252
+ include Singleton
253
+ def as_hash(data)
254
+ {op: {op_is: 'IS'}, right: {null_value_type: data.squish.split(' ').last}}
255
+ end
256
+
257
+ def comparison?
258
+ true
259
+ end
260
+ end
261
+
262
+ class Operator < TokenType
263
+ NULL_TYPES = /IS\s+(NO_INFORMATION|NOT_(APPLICABLE|COLLECTED|DISCLOSED|MEANINGFUL))/
264
+ def as_hash(data)
265
+ return NullOperators.instance.as_hash(data) if data =~ NULL_TYPES
266
+ {op: {op_symbol(data) => data}}
267
+ end
268
+
269
+ def comparison?(data)
270
+ Operators.instance.cache[data]&.comparison?
271
+ end
272
+
273
+ def complex_comparison?(data)
274
+ Operators.instance.cache[data]&.complex_comparison?
275
+ end
276
+
277
+ def math?(data)
278
+ Operators.instance.cache[data]&.math?
279
+ end
280
+
281
+ def parse(parser, token, expression: nil)
282
+ operator = Operators.instance.cache[token.op_data]
283
+ if expression.nil? && !operator.prefix?
284
+ raise "expected op[#{operator&.name}] to be part of an expression"
285
+ end
286
+ operator.parse(parser, token, expression: expression)
287
+ end
288
+
289
+ def simple_comparison?(data)
290
+ Operators.instance.cache[data]&.simple_comparison?
291
+ end
292
+
293
+ protected
294
+ def op_symbol(data)
295
+ float_map_ops = Operators.float_map_ops
296
+
297
+ case data
298
+ when '==', '='; :op_eq
299
+ when '!='; :op_ne
300
+ when '>'; :op_gt
301
+ when '>='; :op_ge
302
+ when '<'; :op_lt
303
+ when '<='; :op_le
304
+ when '+'; :op_add
305
+ when '-'; :op_subtract
306
+ when '*'; :op_multiply
307
+ when '/'; :op_divide
308
+ when '%'; :op_mod
309
+ when '^'; :op_power
310
+ when 'ON'; :op_date_on
311
+ when 'EPST'; :op_date_after_or_on
312
+ when 'EPRE'; :op_date_before_or_on
313
+ when 'PST'; :op_date_after
314
+ when 'PRE'; :op_date_before
315
+ when 'EXISTS'; :op_exist
316
+ when 'LCTN'; :op_ctn
317
+ when *float_map_ops.keys; float_map_ops[data].op_symbol
318
+ else
319
+ :"op_#{data.downcase.gsub(' ', '_')}"
320
+ end
321
+ end
322
+ end
323
+
324
+ class InfixOperator < Operator
325
+ def initialize
326
+ super(name: :infixoperator, pattern: Operators.operator_regex(:infix))
327
+ end
328
+
329
+ def infix?
330
+ true
331
+ end
332
+ end
333
+
334
+ class PrefixOperator < Operator
335
+ def initialize
336
+ super(name: :prefixoperator, pattern: Operators.operator_regex(:prefix))
337
+ end
338
+
339
+ def prefix?
340
+ true
341
+ end
342
+ end
343
+
344
+ class PostfixOperator < Operator
345
+ def initialize
346
+ super(name: :postfixoperator, pattern: Operators.operator_regex(:postfix))
347
+ end
348
+
349
+ def as_hash(data)
350
+ if data == 'YES' || data == 'NO'
351
+ {yes_no_op: {op_symbol(data) => data}}
352
+ else
353
+ super
354
+ end
355
+ end
356
+
357
+ def postfix?
358
+ true
359
+ end
360
+ end
361
+
362
+ class Query < Operator
363
+ def initialize
364
+ super(name: :query, pattern: /(?<=\{)(?<subquery>[^{}]+)(?=\{|\})/)
365
+ end
366
+
367
+ def as_hash(data)
368
+ _initialize if @sub_query_pattern.nil?
369
+ tokens = Lexer.lex(data, pattern: @sub_query_pattern, available_types: @parts)
370
+ {agg: {op_is: 'IS'}, right: {null_value_type: data.split(' ').last}}
371
+ end
372
+
373
+ def expression?
374
+ true
375
+ end
376
+
377
+ private
378
+ def _initialize
379
+ @sub_query_fields = SubQueryFields.new
380
+ @sub_query_type = SubQueryType.new
381
+ @sub_query_expression = SubQueryExpression.new
382
+ @parts = [@sub_query_fields, @sub_query_type, @sub_query_expression]
383
+ @sub_query_pattern = Regexp.compile(@parts.map do |tt|
384
+ "(?<#{tt.name}>#{tt.pattern})"
385
+ end.join('|'))
386
+ end
387
+ end
388
+
389
+ class Screen < TokenType
390
+ def initialize
391
+ super(name: :screen, pattern: /\[(screen)(#)(\d+)\]+/)
392
+ trimming!
393
+ end
394
+
395
+ def as_hash(data)
396
+ {screen: data.split('#').last.to_i}
397
+ end
398
+
399
+ def expression?
400
+ true
401
+ end
402
+
403
+ def screen?
404
+ true
405
+ end
406
+ end
407
+
408
+ class SubQuery < TokenType
409
+ def initialize
410
+ super(name: :lbrace, pattern: /\{/)
411
+ end
412
+
413
+ def expression?
414
+ true
415
+ end
416
+
417
+ def parse(parser, token, expression: nil)
418
+ new_expression = parser.parse
419
+ if parser.peek&.type == TokenType::SUB_Q_GROUP
420
+ token = parser.consume TokenType::SUB_Q_GROUP
421
+ new_expression.merge!(token.parse(parser, expression: new_expression))
422
+ end
423
+
424
+ parser.consume TokenType::RBRACE if parser.peek&.type == TokenType::RBRACE
425
+
426
+ if expression.nil?
427
+ next_token = parser.peek
428
+ if next_token && (next_token.and? || next_token.or?)
429
+ {
430
+ lstatement: new_expression,
431
+ }
432
+ else
433
+ new_expression
434
+ end
435
+ else
436
+ expression.merge(new_expression)
437
+ end
438
+ end
439
+ end
440
+
441
+ class SubQueryExpression < TokenType
442
+ def initialize
443
+ super(name: :sub_query_expression, pattern: /expression:\s*(?<sub_query_expression>[^\{\}]{5,})\s*,?\s*/)
444
+ end
445
+
446
+ def as_hash(data)
447
+ {name => data}
448
+ end
449
+
450
+ def parse(parser, token, expression: nil)
451
+ if expression.nil? || expression.keys != %i[agg sub_query_fields sub_query_type]
452
+ as_hash(token.data.strip).merge parser.parse
453
+ else
454
+ expression.merge(as_hash(token.data.strip))
455
+ end
456
+ end
457
+ end
458
+
459
+ class SubQueryFields < TokenType
460
+ def initialize
461
+ super(name: :sub_query_fields, pattern: /fields:\s*(?<sub_query_fields>#{FACTOR_PATTERN})\s*,?\s*/)
462
+ trimming!
463
+ end
464
+
465
+ def as_hash(data)
466
+ {name => {factor: data}}
467
+ end
468
+
469
+ def parse(parser, token, expression: nil)
470
+ if expression.nil? || expression.keys != %i[agg sub_query_expression sub_query_type]
471
+ as_hash(token.data).merge parser.parse
472
+ else
473
+ expression.merge(as_hash(token.data))
474
+ end
475
+ end
476
+ end
477
+
478
+ class SubQueryGrouping < TokenType
479
+ def initialize
480
+ super(name: :sub_query_grouping, pattern: /GROUP BY\s+(?<sub_query_grouping>#{FACTOR_PATTERN})/)
481
+ trimming!
482
+ end
483
+
484
+ def as_hash(data)
485
+ {name => {factor: data}}
486
+ end
487
+
488
+ def parse(parser, token, expression: nil)
489
+ if expression.nil?
490
+ raise "expected GROUP BY to be part of an expression"
491
+ end
492
+ expression.merge(as_hash(token.data))
493
+ end
494
+ end
495
+
496
+ class SubQueryType < TokenType
497
+ def initialize
498
+ super(name: :sub_query_type, pattern: /type:\s*(?<sub_query_type>IssuerCase|IssuerPerson|Issuer|Case|Person)\s*,?\s*/)
499
+ end
500
+
501
+ def as_hash(data)
502
+ {name => data}
503
+ end
504
+
505
+ def parse(parser, token, expression: nil)
506
+ if expression.nil? || expression.keys != %i[agg sub_query_expression sub_query_fields]
507
+ as_hash(token.data).merge parser.parse
508
+ else
509
+ expression.merge(as_hash(token.data))
510
+ end
511
+ end
512
+ end
513
+
514
+ class SubQueryCloser < TokenType
515
+ def initialize
516
+ super(name: :rbrace, pattern: /\}/)
517
+ end
518
+
519
+ def as_hash(_data)
520
+ Hash.new
521
+ end
522
+ end
523
+
524
+ LPAREN = Group.new
525
+ RPAREN = new(name: :rparen, pattern: /(?=[^%])\)/)
526
+ LBRACE = SubQuery.new #new(name: :lbrace, pattern: /\{/)
527
+ RBRACE = SubQueryCloser.new #(name: :rbrace, pattern: /\}/)
528
+ # TODO: Not used in DataDesk due to some bug. Should we implement and fix?
529
+ # LCAPTURE = new(name: :lcapture, pattern: /\(%/)
530
+ # RCAPTURE = new(name: :rcapture, pattern: /%\)/)
531
+ CURRENCY_LITERAL = Currency.new
532
+ INTEGER_LITERAL = Integer.new
533
+ SCI_NUM_LITERAL = ScientificNumeric.new
534
+ NUMERIC_LITERAL = Numeric.new
535
+ STRING_LITERAL = String.new
536
+ SCREEN = Screen.new
537
+ FACTOR = Factor.new
538
+ SPECIAL_MARKER = SpecialMarker.new
539
+ PREFIXOPERATOR = PrefixOperator.new
540
+ INFIXOPERATOR = InfixOperator.new
541
+ POSTFIXOPERATOR = PostfixOperator.new
542
+ # QUERY = Query.new
543
+ SUB_Q_EXPR = SubQueryExpression.new
544
+ SUB_Q_FIELDS = SubQueryFields.new
545
+ SUB_Q_GROUP = SubQueryGrouping.new
546
+ SUB_Q_TYPE = SubQueryType.new
547
+ WHITESPACE = new(name: :whitespace, pattern: /[\s]/).skipping!
548
+
549
+ ALL = [
550
+ LPAREN,
551
+ RPAREN,
552
+ # LCAPTURE,
553
+ # RCAPTURE,
554
+ LBRACE,
555
+ RBRACE,
556
+ SUB_Q_EXPR,
557
+ SUB_Q_FIELDS,
558
+ SUB_Q_TYPE,
559
+ SUB_Q_GROUP,
560
+ CURRENCY_LITERAL,
561
+ SCI_NUM_LITERAL,
562
+ INTEGER_LITERAL,
563
+ NUMERIC_LITERAL,
564
+ STRING_LITERAL,
565
+ SCREEN,
566
+ FACTOR,
567
+ SPECIAL_MARKER,
568
+ PREFIXOPERATOR,
569
+ INFIXOPERATOR,
570
+ POSTFIXOPERATOR,
571
+ # QUERY, # TODO: do we need this?
572
+ WHITESPACE,
573
+ ]
574
+ end
575
+ end