ddql 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ module DDQL
2
+ class PostfixNullTypeOperator < Operator
3
+ attr_reader :pattern
4
+
5
+ def initialize(symbol, null_type, ordinal)
6
+ super("IS #{symbol}", "Is #{symbol}", :postfix, 9, false, :boolean, ordinal)
7
+ @null_type = null_type
8
+ @pattern = /IS\s+#{symbol}/
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ module DDQL
2
+ class QueryExpressionError < StandardError
3
+ attr_reader :cause, :expression
4
+
5
+ def initialize(expression:, cause: nil, message: nil)
6
+ @cause = cause
7
+ @expression = expression
8
+ @message = message || (cause ? cause.message : 'invalid expression')
9
+ end
10
+
11
+ def to_s
12
+ "failed to parse #{@expression}: #{@message}"
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,17 @@
1
+ module DDQL
2
+ module StringRefinements
3
+ refine String do
4
+ def squish
5
+ self.dup.squish!
6
+ end
7
+
8
+ def squish!
9
+ # this implementation is required (vs. just chaining message calls)
10
+ # because string! and gsub! return `nil` if nothing changes
11
+ strip!
12
+ gsub!(/[[:space:]]+/, ' ')
13
+ self
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,73 @@
1
+ require 'forwardable'
2
+
3
+ module DDQL
4
+
5
+ class Token
6
+ using StringRefinements
7
+
8
+ attr_reader :data, :type
9
+ attr_accessor :location
10
+
11
+ def initialize(data:, location: nil, type:)
12
+ @data = data
13
+ @location = location
14
+ @type = type
15
+ end
16
+
17
+ def and?
18
+ data == 'AND'
19
+ end
20
+
21
+ def as_hash
22
+ type.as_hash(data)
23
+ end
24
+
25
+ def comparison?
26
+ type.comparison?(data)
27
+ end
28
+
29
+ def complex_comparison?
30
+ type.complex_comparison?(data)
31
+ end
32
+
33
+ def infix?
34
+ type.infix?
35
+ end
36
+
37
+ def math?
38
+ type.math?(data)
39
+ end
40
+
41
+ def op_data
42
+ data.squish
43
+ end
44
+
45
+ def or?
46
+ data == 'OR'
47
+ end
48
+
49
+ def parse(parser, expression: nil)
50
+ type.parse(parser, self, expression: expression)
51
+ end
52
+
53
+ def postfix?
54
+ type.postfix?
55
+ end
56
+
57
+ def prefix?
58
+ type.prefix?
59
+ end
60
+
61
+ def simple_comparison?
62
+ type.simple_comparison?(data)
63
+ end
64
+
65
+ def to_h
66
+ type.as_hash(data)
67
+ end
68
+
69
+ def to_s
70
+ "#{type.name} : #{data}"
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,575 @@
1
+ module DDQL
2
+ class TokenType
3
+ using StringRefinements
4
+
5
+ attr_reader :label, :name, :pattern
6
+
7
+ FACTOR_PATTERN = /\[[^\]]+\]/
8
+
9
+ def self.all_types_pattern
10
+ Regexp.compile(ALL.map { |tt| "(?<#{tt.name}>#{tt.pattern})" }.join('|'))
11
+ end
12
+
13
+ def initialize(name:, pattern:, &block)
14
+ @label = name.to_s
15
+ @name = name
16
+ @pattern = pattern
17
+ @skipping = false
18
+ @data_range = 0..-1
19
+ @value_transformer = block
20
+ end
21
+
22
+ def as_hash(data)
23
+ # case name
24
+ # when :string_literal; {right: {string: data}}
25
+ # when :integer_literal; {right: {int: data}}
26
+ # when :numeric_literal, :sci_num_literal; {right: {float: data}}
27
+ # # when :lparen; {lstatement: data}
28
+ # when :rparen; Hash.new
29
+ # else
30
+ raise "subclass responsibility name[#{name}] data[#{data}]"
31
+ # end
32
+ end
33
+
34
+ def comparison?(data)
35
+ false
36
+ end
37
+
38
+ def data_from(match_data:)
39
+ match_data.named_captures[label]
40
+ end
41
+
42
+ def expression?
43
+ false
44
+ end
45
+
46
+ def factor?
47
+ false
48
+ end
49
+
50
+ def group?
51
+ false
52
+ end
53
+
54
+ def infix?
55
+ false
56
+ end
57
+
58
+ def interpret(data)
59
+ return nil if data.nil?
60
+ return data[@data_range] if @value_transformer.nil?
61
+ @value_transformer.call(data[@data_range])
62
+ end
63
+
64
+ def interpreted_data_from(match_data:)
65
+ data = data_from match_data: match_data
66
+ return nil if data.nil?
67
+ interpret data
68
+ end
69
+
70
+ def literal?
71
+ false
72
+ end
73
+
74
+ def match?(match_data:)
75
+ data_from(match_data: match_data).nil? || @skipping ? false : true
76
+ end
77
+
78
+ def parse(parser, token, expression: nil)
79
+ as_hash(token.data)
80
+ end
81
+
82
+ def postfix?
83
+ false
84
+ end
85
+
86
+ def prefix?
87
+ false
88
+ end
89
+
90
+ def screen?
91
+ false
92
+ end
93
+
94
+ def skipping!
95
+ @skipping = true
96
+ self
97
+ end
98
+
99
+ def trimming!(range=(1..-2))
100
+ @data_range = range
101
+ self
102
+ end
103
+
104
+ ### Literals
105
+
106
+ class Literal < TokenType
107
+ def initialize(name:, pattern:)
108
+ super(name: name, pattern: pattern)
109
+ trimming!
110
+ end
111
+
112
+ def as_hash(data)
113
+ # {right: {data_type => data}}
114
+ {data_type => data}
115
+ end
116
+
117
+ def data_type
118
+ raise "subclass responsibility for [#{self.class}]"
119
+ end
120
+
121
+ def literal?
122
+ true
123
+ end
124
+ end
125
+
126
+ class Currency < Literal
127
+ def initialize
128
+ super(name: :currency, pattern: /'(?!')(?<code>[A-Z]{3}):(\d+\.?\d+)'/)
129
+ @value_transformer = lambda do |s|
130
+ s = s.split(':', 2)
131
+ {currency_code: s.first, currency_value: {float: s.last.to_f}}
132
+ end
133
+ end
134
+
135
+ def as_hash(data)
136
+ # {right: data}
137
+ data
138
+ end
139
+ end
140
+
141
+ class Integer < Literal
142
+ def initialize
143
+ super(name: :integer, pattern: /'(?!')(?>[+-]?)(\d+)'/)
144
+ @value_transformer = -> (s) { s.to_i }
145
+ end
146
+
147
+ def data_type
148
+ :int
149
+ end
150
+ end
151
+
152
+ class Numeric < Literal
153
+ def initialize
154
+ super(name: :numeric, pattern: /'(?!')((?>[+-]?)(?>(?>\d+)(?>\.?)(?>\d*)|(?>\d*)(?>\.?)(?>\d+)))'/)
155
+ @value_transformer = -> (s) { s.to_f }
156
+ end
157
+
158
+ def data_type
159
+ :float
160
+ end
161
+ end
162
+
163
+ class ScientificNumeric < Literal
164
+ def initialize
165
+ super(name: :sci_num, pattern: /'(?!')([+-]?\d(\.\d+)?[Ee][+-]?[^0]\d+)'/)
166
+ @value_transformer = -> (s) { s.to_f }
167
+ end
168
+
169
+ def data_type
170
+ :float
171
+ end
172
+ end
173
+
174
+ class SpecialMarker < Literal
175
+ def initialize
176
+ super(name: :special_marker, pattern: /\$[a-zA-Z_]+/)
177
+ trimming!(1..-1)
178
+ end
179
+
180
+ def as_hash(data)
181
+ super({data.downcase.to_sym => "$#{data}"})
182
+ end
183
+
184
+ def data_type
185
+ name
186
+ end
187
+ end
188
+
189
+ class String < Literal
190
+ def initialize
191
+ super(name: :string, pattern: /'(?:[^'\\]|\\.)*?'/)
192
+ @value_transformer = -> (s) { s.gsub('\\', '') }
193
+ end
194
+
195
+ def data_type
196
+ name
197
+ end
198
+ end
199
+
200
+ ### /Literals
201
+
202
+ class Factor < TokenType
203
+ def initialize
204
+ super(name: :factor, pattern: FACTOR_PATTERN)
205
+ trimming!
206
+ end
207
+
208
+ def as_hash(data)
209
+ # {left: {name => data}}
210
+ {name => data}
211
+ end
212
+
213
+ def factor?
214
+ true
215
+ end
216
+
217
+ def parse(parser, token, expression: nil)
218
+ h = as_hash(token.data)
219
+ parser.peek&.comparison? ? {left: h} : h
220
+ end
221
+ end
222
+
223
+ class Group < TokenType
224
+ def initialize
225
+ super(name: :lparen, pattern: /\((?=[^%])/)
226
+ end
227
+
228
+ def group?
229
+ true
230
+ end
231
+
232
+ def parse(parser, token, expression: nil)
233
+ new_expression = parser.parse
234
+ parser.consume TokenType::RPAREN #if parser.peek&.data == ')'
235
+
236
+ if expression.nil?
237
+ next_token = parser.peek
238
+ if next_token && (next_token.and? || next_token.or?)
239
+ {
240
+ lstatement: new_expression,
241
+ }
242
+ else
243
+ new_expression
244
+ end
245
+ else
246
+ expression.merge(new_expression)
247
+ end
248
+ end
249
+ end
250
+
251
+ class NullOperators
252
+ include Singleton
253
+ def as_hash(data)
254
+ {op: {op_is: 'IS'}, right: {null_value_type: data.squish.split(' ').last}}
255
+ end
256
+
257
+ def comparison?
258
+ true
259
+ end
260
+ end
261
+
262
+ class Operator < TokenType
263
+ NULL_TYPES = /IS\s+(NO_INFORMATION|NOT_(APPLICABLE|COLLECTED|DISCLOSED|MEANINGFUL))/
264
+ def as_hash(data)
265
+ return NullOperators.instance.as_hash(data) if data =~ NULL_TYPES
266
+ {op: {op_symbol(data) => data}}
267
+ end
268
+
269
+ def comparison?(data)
270
+ Operators.instance.cache[data]&.comparison?
271
+ end
272
+
273
+ def complex_comparison?(data)
274
+ Operators.instance.cache[data]&.complex_comparison?
275
+ end
276
+
277
+ def math?(data)
278
+ Operators.instance.cache[data]&.math?
279
+ end
280
+
281
+ def parse(parser, token, expression: nil)
282
+ operator = Operators.instance.cache[token.op_data]
283
+ if expression.nil? && !operator.prefix?
284
+ raise "expected op[#{operator&.name}] to be part of an expression"
285
+ end
286
+ operator.parse(parser, token, expression: expression)
287
+ end
288
+
289
+ def simple_comparison?(data)
290
+ Operators.instance.cache[data]&.simple_comparison?
291
+ end
292
+
293
+ protected
294
+ def op_symbol(data)
295
+ float_map_ops = Operators.float_map_ops
296
+
297
+ case data
298
+ when '==', '='; :op_eq
299
+ when '!='; :op_ne
300
+ when '>'; :op_gt
301
+ when '>='; :op_ge
302
+ when '<'; :op_lt
303
+ when '<='; :op_le
304
+ when '+'; :op_add
305
+ when '-'; :op_subtract
306
+ when '*'; :op_multiply
307
+ when '/'; :op_divide
308
+ when '%'; :op_mod
309
+ when '^'; :op_power
310
+ when 'ON'; :op_date_on
311
+ when 'EPST'; :op_date_after_or_on
312
+ when 'EPRE'; :op_date_before_or_on
313
+ when 'PST'; :op_date_after
314
+ when 'PRE'; :op_date_before
315
+ when 'EXISTS'; :op_exist
316
+ when 'LCTN'; :op_ctn
317
+ when *float_map_ops.keys; float_map_ops[data].op_symbol
318
+ else
319
+ :"op_#{data.downcase.gsub(' ', '_')}"
320
+ end
321
+ end
322
+ end
323
+
324
+ class InfixOperator < Operator
325
+ def initialize
326
+ super(name: :infixoperator, pattern: Operators.operator_regex(:infix))
327
+ end
328
+
329
+ def infix?
330
+ true
331
+ end
332
+ end
333
+
334
+ class PrefixOperator < Operator
335
+ def initialize
336
+ super(name: :prefixoperator, pattern: Operators.operator_regex(:prefix))
337
+ end
338
+
339
+ def prefix?
340
+ true
341
+ end
342
+ end
343
+
344
+ class PostfixOperator < Operator
345
+ def initialize
346
+ super(name: :postfixoperator, pattern: Operators.operator_regex(:postfix))
347
+ end
348
+
349
+ def as_hash(data)
350
+ if data == 'YES' || data == 'NO'
351
+ {yes_no_op: {op_symbol(data) => data}}
352
+ else
353
+ super
354
+ end
355
+ end
356
+
357
+ def postfix?
358
+ true
359
+ end
360
+ end
361
+
362
+ class Query < Operator
363
+ def initialize
364
+ super(name: :query, pattern: /(?<=\{)(?<subquery>[^{}]+)(?=\{|\})/)
365
+ end
366
+
367
+ def as_hash(data)
368
+ _initialize if @sub_query_pattern.nil?
369
+ tokens = Lexer.lex(data, pattern: @sub_query_pattern, available_types: @parts)
370
+ {agg: {op_is: 'IS'}, right: {null_value_type: data.split(' ').last}}
371
+ end
372
+
373
+ def expression?
374
+ true
375
+ end
376
+
377
+ private
378
+ def _initialize
379
+ @sub_query_fields = SubQueryFields.new
380
+ @sub_query_type = SubQueryType.new
381
+ @sub_query_expression = SubQueryExpression.new
382
+ @parts = [@sub_query_fields, @sub_query_type, @sub_query_expression]
383
+ @sub_query_pattern = Regexp.compile(@parts.map do |tt|
384
+ "(?<#{tt.name}>#{tt.pattern})"
385
+ end.join('|'))
386
+ end
387
+ end
388
+
389
+ class Screen < TokenType
390
+ def initialize
391
+ super(name: :screen, pattern: /\[(screen)(#)(\d+)\]+/)
392
+ trimming!
393
+ end
394
+
395
+ def as_hash(data)
396
+ {screen: data.split('#').last.to_i}
397
+ end
398
+
399
+ def expression?
400
+ true
401
+ end
402
+
403
+ def screen?
404
+ true
405
+ end
406
+ end
407
+
408
+ class SubQuery < TokenType
409
+ def initialize
410
+ super(name: :lbrace, pattern: /\{/)
411
+ end
412
+
413
+ def expression?
414
+ true
415
+ end
416
+
417
+ def parse(parser, token, expression: nil)
418
+ new_expression = parser.parse
419
+ if parser.peek&.type == TokenType::SUB_Q_GROUP
420
+ token = parser.consume TokenType::SUB_Q_GROUP
421
+ new_expression.merge!(token.parse(parser, expression: new_expression))
422
+ end
423
+
424
+ parser.consume TokenType::RBRACE if parser.peek&.type == TokenType::RBRACE
425
+
426
+ if expression.nil?
427
+ next_token = parser.peek
428
+ if next_token && (next_token.and? || next_token.or?)
429
+ {
430
+ lstatement: new_expression,
431
+ }
432
+ else
433
+ new_expression
434
+ end
435
+ else
436
+ expression.merge(new_expression)
437
+ end
438
+ end
439
+ end
440
+
441
+ class SubQueryExpression < TokenType
442
+ def initialize
443
+ super(name: :sub_query_expression, pattern: /expression:\s*(?<sub_query_expression>[^\{\}]{5,})\s*,?\s*/)
444
+ end
445
+
446
+ def as_hash(data)
447
+ {name => data}
448
+ end
449
+
450
+ def parse(parser, token, expression: nil)
451
+ if expression.nil? || expression.keys != %i[agg sub_query_fields sub_query_type]
452
+ as_hash(token.data.strip).merge parser.parse
453
+ else
454
+ expression.merge(as_hash(token.data.strip))
455
+ end
456
+ end
457
+ end
458
+
459
+ class SubQueryFields < TokenType
460
+ def initialize
461
+ super(name: :sub_query_fields, pattern: /fields:\s*(?<sub_query_fields>#{FACTOR_PATTERN})\s*,?\s*/)
462
+ trimming!
463
+ end
464
+
465
+ def as_hash(data)
466
+ {name => {factor: data}}
467
+ end
468
+
469
+ def parse(parser, token, expression: nil)
470
+ if expression.nil? || expression.keys != %i[agg sub_query_expression sub_query_type]
471
+ as_hash(token.data).merge parser.parse
472
+ else
473
+ expression.merge(as_hash(token.data))
474
+ end
475
+ end
476
+ end
477
+
478
+ class SubQueryGrouping < TokenType
479
+ def initialize
480
+ super(name: :sub_query_grouping, pattern: /GROUP BY\s+(?<sub_query_grouping>#{FACTOR_PATTERN})/)
481
+ trimming!
482
+ end
483
+
484
+ def as_hash(data)
485
+ {name => {factor: data}}
486
+ end
487
+
488
+ def parse(parser, token, expression: nil)
489
+ if expression.nil?
490
+ raise "expected GROUP BY to be part of an expression"
491
+ end
492
+ expression.merge(as_hash(token.data))
493
+ end
494
+ end
495
+
496
+ class SubQueryType < TokenType
497
+ def initialize
498
+ super(name: :sub_query_type, pattern: /type:\s*(?<sub_query_type>IssuerCase|IssuerPerson|Issuer|Case|Person)\s*,?\s*/)
499
+ end
500
+
501
+ def as_hash(data)
502
+ {name => data}
503
+ end
504
+
505
+ def parse(parser, token, expression: nil)
506
+ if expression.nil? || expression.keys != %i[agg sub_query_expression sub_query_fields]
507
+ as_hash(token.data).merge parser.parse
508
+ else
509
+ expression.merge(as_hash(token.data))
510
+ end
511
+ end
512
+ end
513
+
514
+ class SubQueryCloser < TokenType
515
+ def initialize
516
+ super(name: :rbrace, pattern: /\}/)
517
+ end
518
+
519
+ def as_hash(_data)
520
+ Hash.new
521
+ end
522
+ end
523
+
524
+ LPAREN = Group.new
525
+ RPAREN = new(name: :rparen, pattern: /(?=[^%])\)/)
526
+ LBRACE = SubQuery.new #new(name: :lbrace, pattern: /\{/)
527
+ RBRACE = SubQueryCloser.new #(name: :rbrace, pattern: /\}/)
528
+ # TODO: Not used in DataDesk due to some bug. Should we implement and fix?
529
+ # LCAPTURE = new(name: :lcapture, pattern: /\(%/)
530
+ # RCAPTURE = new(name: :rcapture, pattern: /%\)/)
531
+ CURRENCY_LITERAL = Currency.new
532
+ INTEGER_LITERAL = Integer.new
533
+ SCI_NUM_LITERAL = ScientificNumeric.new
534
+ NUMERIC_LITERAL = Numeric.new
535
+ STRING_LITERAL = String.new
536
+ SCREEN = Screen.new
537
+ FACTOR = Factor.new
538
+ SPECIAL_MARKER = SpecialMarker.new
539
+ PREFIXOPERATOR = PrefixOperator.new
540
+ INFIXOPERATOR = InfixOperator.new
541
+ POSTFIXOPERATOR = PostfixOperator.new
542
+ # QUERY = Query.new
543
+ SUB_Q_EXPR = SubQueryExpression.new
544
+ SUB_Q_FIELDS = SubQueryFields.new
545
+ SUB_Q_GROUP = SubQueryGrouping.new
546
+ SUB_Q_TYPE = SubQueryType.new
547
+ WHITESPACE = new(name: :whitespace, pattern: /[\s]/).skipping!
548
+
549
+ ALL = [
550
+ LPAREN,
551
+ RPAREN,
552
+ # LCAPTURE,
553
+ # RCAPTURE,
554
+ LBRACE,
555
+ RBRACE,
556
+ SUB_Q_EXPR,
557
+ SUB_Q_FIELDS,
558
+ SUB_Q_TYPE,
559
+ SUB_Q_GROUP,
560
+ CURRENCY_LITERAL,
561
+ SCI_NUM_LITERAL,
562
+ INTEGER_LITERAL,
563
+ NUMERIC_LITERAL,
564
+ STRING_LITERAL,
565
+ SCREEN,
566
+ FACTOR,
567
+ SPECIAL_MARKER,
568
+ PREFIXOPERATOR,
569
+ INFIXOPERATOR,
570
+ POSTFIXOPERATOR,
571
+ # QUERY, # TODO: do we need this?
572
+ WHITESPACE,
573
+ ]
574
+ end
575
+ end