sql_beautifier 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +2 -2
  4. data/lib/sql_beautifier/base.rb +9 -0
  5. data/lib/sql_beautifier/clauses/base.rb +2 -2
  6. data/lib/sql_beautifier/clauses/condition_clause.rb +1 -1
  7. data/lib/sql_beautifier/clauses/from.rb +15 -69
  8. data/lib/sql_beautifier/clauses/order_by.rb +12 -1
  9. data/lib/sql_beautifier/clauses/select.rb +28 -15
  10. data/lib/sql_beautifier/comment.rb +23 -0
  11. data/lib/sql_beautifier/{comment_stripper.rb → comment_parser.rb} +67 -24
  12. data/lib/sql_beautifier/condition.rb +162 -0
  13. data/lib/sql_beautifier/configuration.rb +4 -15
  14. data/lib/sql_beautifier/create_table_as.rb +127 -0
  15. data/lib/sql_beautifier/cte_definition.rb +41 -0
  16. data/lib/sql_beautifier/cte_query.rb +129 -0
  17. data/lib/sql_beautifier/expression.rb +54 -0
  18. data/lib/sql_beautifier/formatter.rb +13 -80
  19. data/lib/sql_beautifier/join.rb +69 -0
  20. data/lib/sql_beautifier/normalizer.rb +33 -59
  21. data/lib/sql_beautifier/query.rb +185 -0
  22. data/lib/sql_beautifier/scanner.rb +420 -0
  23. data/lib/sql_beautifier/sort_expression.rb +39 -0
  24. data/lib/sql_beautifier/statement_assembler.rb +4 -4
  25. data/lib/sql_beautifier/statement_splitter.rb +35 -143
  26. data/lib/sql_beautifier/table_reference.rb +52 -0
  27. data/lib/sql_beautifier/table_registry.rb +50 -124
  28. data/lib/sql_beautifier/tokenizer.rb +47 -278
  29. data/lib/sql_beautifier/types.rb +9 -0
  30. data/lib/sql_beautifier/version.rb +1 -1
  31. data/lib/sql_beautifier.rb +14 -6
  32. metadata +43 -7
  33. data/lib/sql_beautifier/comment_restorer.rb +0 -62
  34. data/lib/sql_beautifier/condition_formatter.rb +0 -127
  35. data/lib/sql_beautifier/create_table_as_formatter.rb +0 -177
  36. data/lib/sql_beautifier/cte_formatter.rb +0 -192
  37. data/lib/sql_beautifier/subquery_formatter.rb +0 -113
@@ -0,0 +1,420 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlBeautifier
4
+ class Scanner
5
+ IDENTIFIER_CHARACTER = %r{[[:alnum:]_$]}
6
+ SENTINEL_MAX_LOOKBACK = 20
7
+
8
+ attr_reader :source
9
+ attr_reader :position
10
+ attr_reader :parenthesis_depth
11
+
12
+ def initialize(source, position: 0)
13
+ @source = source
14
+ @position = position
15
+ @in_single_quote = false
16
+ @in_double_quote = false
17
+ @parenthesis_depth = 0
18
+ end
19
+
20
+ def finished?
21
+ @position >= @source.length
22
+ end
23
+
24
+ def current_char
25
+ @source[@position]
26
+ end
27
+
28
+ def peek(offset = 1)
29
+ @source[@position + offset]
30
+ end
31
+
32
+ def top_level?
33
+ @parenthesis_depth.zero? && !@in_single_quote && !@in_double_quote
34
+ end
35
+
36
+ def in_single_quote?
37
+ @in_single_quote
38
+ end
39
+
40
+ def in_double_quote?
41
+ @in_double_quote
42
+ end
43
+
44
+ def in_quoted_context?
45
+ @in_single_quote || @in_double_quote
46
+ end
47
+
48
+ def advance!(count = 1)
49
+ @position += count
50
+ end
51
+
52
+ def enter_single_quote!
53
+ @in_single_quote = true
54
+ @position += 1
55
+ end
56
+
57
+ def enter_double_quote!
58
+ @in_double_quote = true
59
+ @position += 1
60
+ end
61
+
62
+ def increment_depth!
63
+ @parenthesis_depth += 1
64
+ end
65
+
66
+ def decrement_depth!
67
+ @parenthesis_depth = [@parenthesis_depth - 1, 0].max
68
+ end
69
+
70
+ def consume_single_quoted_string!
71
+ start = @position
72
+ @position += 1
73
+
74
+ while @position < @source.length
75
+ if @source[@position] == Constants::SINGLE_QUOTE && @source[@position + 1] == Constants::SINGLE_QUOTE
76
+ @position += 2
77
+ elsif @source[@position] == Constants::SINGLE_QUOTE
78
+ @position += 1
79
+ return @source[start...@position]
80
+ else
81
+ @position += 1
82
+ end
83
+ end
84
+
85
+ @source[start...@position]
86
+ end
87
+
88
+ def consume_double_quoted_identifier!
89
+ start = @position
90
+ @position += 1
91
+
92
+ while @position < @source.length
93
+ if @source[@position] == Constants::DOUBLE_QUOTE && @source[@position + 1] == Constants::DOUBLE_QUOTE
94
+ @position += 2
95
+ elsif @source[@position] == Constants::DOUBLE_QUOTE
96
+ @position += 1
97
+ return @source[start...@position]
98
+ else
99
+ @position += 1
100
+ end
101
+ end
102
+
103
+ @source[start...@position]
104
+ end
105
+
106
+ def consume_sentinel!
107
+ start = @position
108
+ end_position = sentinel_end_position
109
+ @position = end_position
110
+
111
+ @source[start...end_position]
112
+ end
113
+
114
+ def consume_dollar_quoted_string!(delimiter)
115
+ start = @position
116
+ @position += delimiter.length
117
+
118
+ while @position < @source.length
119
+ if @source[@position, delimiter.length] == delimiter
120
+ @position += delimiter.length
121
+ return @source[start...@position]
122
+ end
123
+
124
+ @position += 1
125
+ end
126
+
127
+ @source[start...@position]
128
+ end
129
+
130
+ def skip_single_quoted_string!
131
+ @position += 1
132
+
133
+ while @position < @source.length
134
+ if @source[@position] == Constants::SINGLE_QUOTE && @source[@position + 1] == Constants::SINGLE_QUOTE
135
+ @position += 2
136
+ elsif @source[@position] == Constants::SINGLE_QUOTE
137
+ @position += 1
138
+ return
139
+ else
140
+ @position += 1
141
+ end
142
+ end
143
+ end
144
+
145
+ def skip_double_quoted_identifier!
146
+ @position += 1
147
+
148
+ while @position < @source.length
149
+ if @source[@position] == Constants::DOUBLE_QUOTE && @source[@position + 1] == Constants::DOUBLE_QUOTE
150
+ @position += 2
151
+ elsif @source[@position] == Constants::DOUBLE_QUOTE
152
+ @position += 1
153
+ return
154
+ else
155
+ @position += 1
156
+ end
157
+ end
158
+ end
159
+
160
+ def skip_sentinel!
161
+ @position = sentinel_end_position
162
+ end
163
+
164
+ def skip_dollar_quoted_string!(delimiter)
165
+ @position += delimiter.length
166
+
167
+ while @position < @source.length
168
+ if @source[@position, delimiter.length] == delimiter
169
+ @position += delimiter.length
170
+ return
171
+ end
172
+
173
+ @position += 1
174
+ end
175
+ end
176
+
177
+ def scan_quoted_or_sentinel!
178
+ return consume_sentinel! if sentinel_at?
179
+
180
+ delimiter = dollar_quote_delimiter_at
181
+ return consume_dollar_quoted_string!(delimiter) if delimiter
182
+
183
+ case current_char
184
+ when Constants::SINGLE_QUOTE
185
+ consume_single_quoted_string!
186
+ when Constants::DOUBLE_QUOTE
187
+ consume_double_quoted_identifier!
188
+ end
189
+ end
190
+
191
+ def skip_quoted_or_sentinel!
192
+ if @in_single_quote
193
+ advance_through_single_quote!
194
+ return true
195
+ end
196
+
197
+ if @in_double_quote
198
+ advance_through_double_quote!
199
+ return true
200
+ end
201
+
202
+ if sentinel_at?
203
+ skip_sentinel!
204
+ return true
205
+ end
206
+
207
+ delimiter = dollar_quote_delimiter_at
208
+ if delimiter
209
+ skip_dollar_quoted_string!(delimiter)
210
+ return true
211
+ end
212
+
213
+ false
214
+ end
215
+
216
+ def advance_through_single_quote!
217
+ if @source[@position] == Constants::SINGLE_QUOTE && @source[@position + 1] == Constants::SINGLE_QUOTE
218
+ @position += 2
219
+ elsif @source[@position] == Constants::SINGLE_QUOTE
220
+ @in_single_quote = false
221
+ @position += 1
222
+ else
223
+ @position += 1
224
+ end
225
+ end
226
+
227
+ def advance_through_double_quote!
228
+ if @source[@position] == Constants::DOUBLE_QUOTE && @source[@position + 1] == Constants::DOUBLE_QUOTE
229
+ @position += 2
230
+ elsif @source[@position] == Constants::DOUBLE_QUOTE
231
+ @in_double_quote = false
232
+ @position += 1
233
+ else
234
+ @position += 1
235
+ end
236
+ end
237
+
238
+ def sentinel_at?(at_position = @position)
239
+ @source[at_position, CommentParser::SENTINEL_PREFIX.length] == CommentParser::SENTINEL_PREFIX
240
+ end
241
+
242
+ def sentinel_end_position(from_position = @position)
243
+ closing = @source.index(CommentParser::SENTINEL_SUFFIX, from_position + CommentParser::SENTINEL_PREFIX.length)
244
+ return from_position + 1 unless closing
245
+
246
+ closing + CommentParser::SENTINEL_SUFFIX.length
247
+ end
248
+
249
+ def inside_sentinel?(at_position)
250
+ search_start = [at_position - SENTINEL_MAX_LOOKBACK, 0].max
251
+ prefix_position = @source.rindex(CommentParser::SENTINEL_PREFIX, at_position)
252
+ return false unless prefix_position && prefix_position >= search_start
253
+
254
+ at_position < sentinel_end_position(prefix_position)
255
+ end
256
+
257
+ def dollar_quote_delimiter_at(at_position = @position)
258
+ return "$$" if @source[at_position, 2] == "$$"
259
+ return unless @source[at_position] == "$"
260
+
261
+ closing_dollar_position = @source.index("$", at_position + 1)
262
+ return unless closing_dollar_position
263
+
264
+ delimiter = @source[at_position..closing_dollar_position]
265
+ tag = delimiter[1..-2]
266
+ return unless tag.match?(%r{\A[[:alpha:]_][[:alnum:]_]*\z})
267
+
268
+ delimiter
269
+ end
270
+
271
+ def keyword_at?(keyword, at_position = @position)
272
+ return false unless @source[at_position, keyword.length]&.downcase == keyword
273
+
274
+ previous_character = character_before(at_position)
275
+ next_character = character_after(at_position, keyword.length)
276
+
277
+ word_boundary?(previous_character) && word_boundary?(next_character)
278
+ end
279
+
280
+ def word_boundary?(character)
281
+ character.nil? || character !~ IDENTIFIER_CHARACTER
282
+ end
283
+
284
+ def character_before(at_position = @position)
285
+ return nil if at_position.zero?
286
+
287
+ @source[at_position - 1]
288
+ end
289
+
290
+ def character_after(at_position = @position, offset = 1)
291
+ target = at_position + offset
292
+ return nil if target >= @source.length
293
+
294
+ @source[target]
295
+ end
296
+
297
+ def escaped_single_quote?(at_position = @position)
298
+ @source[at_position] == Constants::SINGLE_QUOTE && @source[at_position + 1] == Constants::SINGLE_QUOTE
299
+ end
300
+
301
+ def escaped_double_quote?(at_position = @position)
302
+ @source[at_position] == Constants::DOUBLE_QUOTE && @source[at_position + 1] == Constants::DOUBLE_QUOTE
303
+ end
304
+
305
+ def skip_whitespace!
306
+ @position += 1 while @position < @source.length && @source[@position] =~ Constants::WHITESPACE_CHARACTER_REGEX
307
+ end
308
+
309
+ def skip_past_keyword!(keyword)
310
+ @position += keyword.length
311
+ skip_whitespace!
312
+ end
313
+
314
+ def read_identifier!
315
+ skip_whitespace!
316
+ return nil if finished?
317
+
318
+ if current_char == Constants::DOUBLE_QUOTE
319
+ read_quoted_identifier!
320
+ else
321
+ read_unquoted_identifier!
322
+ end
323
+ end
324
+
325
+ def find_matching_parenthesis(opening_position)
326
+ local_depth = 0
327
+ scan_position = opening_position
328
+
329
+ while scan_position < @source.length
330
+ character = @source[scan_position]
331
+
332
+ if @source[scan_position] == Constants::SINGLE_QUOTE
333
+ scan_position += 1
334
+ while scan_position < @source.length
335
+ if @source[scan_position] == Constants::SINGLE_QUOTE && @source[scan_position + 1] == Constants::SINGLE_QUOTE
336
+ scan_position += 2
337
+ elsif @source[scan_position] == Constants::SINGLE_QUOTE
338
+ scan_position += 1
339
+ break
340
+ else
341
+ scan_position += 1
342
+ end
343
+ end
344
+ next
345
+ end
346
+
347
+ if character == Constants::DOUBLE_QUOTE
348
+ scan_position += 1
349
+ while scan_position < @source.length
350
+ if @source[scan_position] == Constants::DOUBLE_QUOTE && @source[scan_position + 1] == Constants::DOUBLE_QUOTE
351
+ scan_position += 2
352
+ elsif @source[scan_position] == Constants::DOUBLE_QUOTE
353
+ scan_position += 1
354
+ break
355
+ else
356
+ scan_position += 1
357
+ end
358
+ end
359
+ next
360
+ end
361
+
362
+ if sentinel_at?(scan_position)
363
+ scan_position = sentinel_end_position(scan_position)
364
+ next
365
+ end
366
+
367
+ case character
368
+ when Constants::OPEN_PARENTHESIS
369
+ local_depth += 1
370
+ when Constants::CLOSE_PARENTHESIS
371
+ local_depth -= 1
372
+ return scan_position if local_depth.zero?
373
+ end
374
+
375
+ scan_position += 1
376
+ end
377
+
378
+ nil
379
+ end
380
+
381
+ def detect_conjunction_at(at_position = @position)
382
+ Constants::CONJUNCTIONS.detect do |conjunction|
383
+ keyword_at?(conjunction, at_position)
384
+ end
385
+ end
386
+
387
+ private
388
+
389
+ def read_quoted_identifier!
390
+ start = @position
391
+ @position += 1
392
+
393
+ while @position < @source.length
394
+ if @source[@position] == Constants::DOUBLE_QUOTE
395
+ if @position + 1 < @source.length && @source[@position + 1] == Constants::DOUBLE_QUOTE
396
+ @position += 2
397
+ next
398
+ end
399
+
400
+ @position += 1
401
+ break
402
+ end
403
+
404
+ @position += 1
405
+ end
406
+
407
+ return nil unless @position <= @source.length && @source[@position - 1] == Constants::DOUBLE_QUOTE
408
+
409
+ @source[start...@position]
410
+ end
411
+
412
+ def read_unquoted_identifier!
413
+ start = @position
414
+ @position += 1 while @position < @source.length && @source[@position] =~ IDENTIFIER_CHARACTER
415
+ return nil if @position == start
416
+
417
+ @source[start...@position]
418
+ end
419
+ end
420
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlBeautifier
4
+ class SortExpression < Base
5
+ DIRECTION_PATTERN = %r{\s+(asc|desc)(?:\s+(nulls\s+(?:first|last)))?\z}i
6
+ NULLS_ONLY_PATTERN = %r{\s+(nulls\s+(?:first|last))\z}i
7
+
8
+ option :expression
9
+ option :direction, default: -> {}
10
+ option :nulls, default: -> {}
11
+
12
+ def self.parse(text)
13
+ stripped = text.strip
14
+
15
+ direction_match = stripped.match(DIRECTION_PATTERN)
16
+ if direction_match
17
+ expression = stripped[0...direction_match.begin(0)].strip
18
+ direction = direction_match[1].downcase
19
+ nulls = direction_match[2]&.downcase&.squeeze(" ")
20
+ return new(expression: expression, direction: direction, nulls: nulls)
21
+ end
22
+
23
+ nulls_match = stripped.match(NULLS_ONLY_PATTERN)
24
+ if nulls_match
25
+ expression = stripped[0...nulls_match.begin(0)].strip
26
+ return new(expression: expression, nulls: nulls_match[1].downcase.squeeze(" "))
27
+ end
28
+
29
+ new(expression: stripped)
30
+ end
31
+
32
+ def render
33
+ parts = [@expression]
34
+ parts << @direction if @direction
35
+ parts << @nulls if @nulls
36
+ parts.join(" ")
37
+ end
38
+ end
39
+ end
@@ -2,8 +2,8 @@
2
2
 
3
3
  module SqlBeautifier
4
4
  class StatementAssembler
5
- def self.call(value)
6
- new(value).call
5
+ def self.call(...)
6
+ new(...).call
7
7
  end
8
8
 
9
9
  def initialize(value)
@@ -12,7 +12,7 @@ module SqlBeautifier
12
12
 
13
13
  def call
14
14
  removable_types = SqlBeautifier.config_for(:removable_comment_types)
15
- comment_result = CommentStripper.call(@value, removable_types)
15
+ comment_result = CommentParser.call(@value, removable_types)
16
16
 
17
17
  statements = StatementSplitter.split(comment_result.stripped_sql)
18
18
  formatted_statements = statements.filter_map do |statement|
@@ -25,7 +25,7 @@ module SqlBeautifier
25
25
  terminator = trailing_semicolon ? ";\n" : "\n"
26
26
 
27
27
  output = formatted_statements.join(separator) + terminator
28
- CommentRestorer.call(output, comment_result.comment_map)
28
+ CommentParser.restore(output, comment_result.comment_map)
29
29
  end
30
30
  end
31
31
  end