sql_beautifier 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,10 +20,11 @@ module SqlBeautifier
20
20
  return "#{@normalized_value}\n" if first_clause_position.nil? || first_clause_position.positive?
21
21
 
22
22
  @clauses = Tokenizer.split_into_clauses(@normalized_value)
23
+ @table_registry = TableRegistry.new(@clauses[:from]) if @clauses[:from].present?
23
24
  @parts = []
24
25
 
25
26
  append_clause!(:select, Clauses::Select)
26
- append_clause!(:from, Clauses::From)
27
+ append_from_clause!
27
28
  append_clause!(:where, Clauses::Where)
28
29
  append_clause!(:group_by, Clauses::GroupBy)
29
30
  append_clause!(:having, Clauses::Having)
@@ -33,6 +34,7 @@ module SqlBeautifier
33
34
  output = @parts.join("\n\n")
34
35
  return "#{@normalized_value}\n" if output.empty?
35
36
 
37
+ output = @table_registry.apply_aliases(output) if @table_registry
36
38
  "#{output}\n"
37
39
  end
38
40
 
@@ -44,5 +46,12 @@ module SqlBeautifier
44
46
 
45
47
  @parts << formatter_class.call(value)
46
48
  end
49
+
50
+ def append_from_clause!
51
+ value = @clauses[:from]
52
+ return unless value.present?
53
+
54
+ @parts << Clauses::From.call(value, table_registry: @table_registry)
55
+ end
47
56
  end
48
57
  end
@@ -23,13 +23,13 @@ module SqlBeautifier
23
23
 
24
24
  while @position < @source.length
25
25
  case current_character
26
- when "'"
26
+ when Constants::SINGLE_QUOTE
27
27
  consume_string_literal!
28
28
 
29
- when '"'
29
+ when Constants::DOUBLE_QUOTE
30
30
  consume_quoted_identifier!
31
31
 
32
- when %r{\s}
32
+ when Constants::WHITESPACE_CHARACTER_REGEX
33
33
  collapse_whitespace!
34
34
 
35
35
  else
@@ -50,7 +50,7 @@ module SqlBeautifier
50
50
  def collapse_whitespace!
51
51
  @output << " "
52
52
  @position += 1
53
- @position += 1 while @position < @source.length && @source[@position] =~ %r{\s}
53
+ @position += 1 while @position < @source.length && @source[@position] =~ Constants::WHITESPACE_CHARACTER_REGEX
54
54
  end
55
55
 
56
56
  def consume_string_literal!
@@ -61,10 +61,10 @@ module SqlBeautifier
61
61
  character = current_character
62
62
  @output << character
63
63
 
64
- if character == "'" && @source[@position + 1] == "'"
64
+ if character == Constants::SINGLE_QUOTE && @source[@position + 1] == Constants::SINGLE_QUOTE
65
65
  @position += 1
66
66
  @output << current_character
67
- elsif character == "'"
67
+ elsif character == Constants::SINGLE_QUOTE
68
68
  @position += 1
69
69
  return
70
70
  end
@@ -81,10 +81,10 @@ module SqlBeautifier
81
81
  while @position < @source.length
82
82
  character = current_character
83
83
 
84
- if character == '"' && @source[@position + 1] == '"'
85
- identifier << '"'
84
+ if character == Constants::DOUBLE_QUOTE && @source[@position + 1] == Constants::DOUBLE_QUOTE
85
+ identifier << Constants::DOUBLE_QUOTE
86
86
  @position += 2
87
- elsif character == '"'
87
+ elsif character == Constants::DOUBLE_QUOTE
88
88
  @position += 1
89
89
  @output << format_identifier(identifier)
90
90
  return
@@ -100,13 +100,11 @@ module SqlBeautifier
100
100
  end
101
101
 
102
102
  def format_identifier(identifier)
103
- lowercased = identifier.downcase
103
+ downcased_identifier = identifier.downcase
104
+ return downcased_identifier unless requires_quoting?(downcased_identifier)
104
105
 
105
- if requires_quoting?(lowercased)
106
- "\"#{lowercased.gsub('"', '""')}\""
107
- else
108
- lowercased
109
- end
106
+ escaped_identifier = Util.escape_double_quote(downcased_identifier)
107
+ Util.double_quote_string(escaped_identifier)
110
108
  end
111
109
 
112
110
  def requires_quoting?(identifier)
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlBeautifier
4
+ class TableRegistry
5
+ attr_reader :table_map
6
+
7
+ def initialize(from_content)
8
+ @from_content = from_content
9
+ @table_map = {}
10
+ build!
11
+ end
12
+
13
+ def alias_for(table_name)
14
+ @table_map[table_name]
15
+ end
16
+
17
+ def apply_aliases(text)
18
+ output = +""
19
+ position = 0
20
+
21
+ while position < text.length
22
+ character = text[position]
23
+
24
+ case character
25
+ when Constants::SINGLE_QUOTE
26
+ position = copy_string_literal!(text, position, output)
27
+ when Constants::DOUBLE_QUOTE
28
+ position = copy_quoted_identifier!(text, position, output)
29
+ else
30
+ replacement = find_table_replacement_at(text, position)
31
+
32
+ if replacement
33
+ table_name, table_alias = replacement
34
+
35
+ output << "#{table_alias}."
36
+ position += table_name.length + 1
37
+ else
38
+ output << character
39
+ position += 1
40
+ end
41
+ end
42
+ end
43
+
44
+ output
45
+ end
46
+
47
+ private
48
+
49
+ def build!
50
+ table_entries = extract_table_entries(@from_content)
51
+ initials_occurrence_counts = count_initials_occurrences(table_entries)
52
+ used_aliases = []
53
+
54
+ assign_aliases!(table_entries, initials_occurrence_counts, used_aliases)
55
+ @tables_by_descending_length = @table_map.keys.sort_by { |name| -name.length }.freeze
56
+ end
57
+
58
+ def count_initials_occurrences(table_entries)
59
+ occurrence_counts = Hash.new(0)
60
+
61
+ table_entries.each do |table_entry|
62
+ next if table_entry[:explicit_alias]
63
+
64
+ table_name = table_entry[:table_name]
65
+ occurrence_counts[table_initials(table_name)] += 1
66
+ end
67
+
68
+ occurrence_counts
69
+ end
70
+
71
+ def assign_aliases!(table_entries, initials_occurrence_counts, used_aliases)
72
+ duplicate_initials_counts = Hash.new(0)
73
+ collision_counts = Hash.new(0)
74
+
75
+ table_entries.each do |table_entry|
76
+ table_name = table_entry[:table_name]
77
+ explicit_alias = table_entry[:explicit_alias]
78
+
79
+ if explicit_alias
80
+ @table_map[table_name] = explicit_alias
81
+ used_aliases << explicit_alias
82
+ next
83
+ end
84
+
85
+ initials = table_initials(table_name)
86
+ duplicate_initials_counts[initials] += 1 if initials_occurrence_counts[initials] > 1
87
+
88
+ candidate_alias = begin
89
+ if initials_occurrence_counts[initials] > 1
90
+ "#{initials}#{duplicate_initials_counts[initials]}"
91
+ else
92
+ initials
93
+ end
94
+ end
95
+
96
+ if used_aliases.include?(candidate_alias)
97
+ collision_counts[initials] = [collision_counts[initials], duplicate_initials_counts[initials]].max
98
+
99
+ loop do
100
+ collision_counts[initials] += 1
101
+ candidate_alias = "#{initials}#{collision_counts[initials]}"
102
+ break unless used_aliases.include?(candidate_alias)
103
+ end
104
+ end
105
+
106
+ @table_map[table_name] = candidate_alias
107
+ used_aliases << candidate_alias
108
+ end
109
+ end
110
+
111
+ def copy_string_literal!(text, position, output)
112
+ output << text[position]
113
+ position += 1
114
+
115
+ while position < text.length
116
+ character = text[position]
117
+ output << character
118
+
119
+ if Tokenizer.escaped_single_quote?(text, position)
120
+ position += 1
121
+ output << text[position]
122
+ elsif character == Constants::SINGLE_QUOTE
123
+ return position + 1
124
+ end
125
+
126
+ position += 1
127
+ end
128
+
129
+ position
130
+ end
131
+
132
+ def copy_quoted_identifier!(text, position, output)
133
+ output << text[position]
134
+ position += 1
135
+
136
+ while position < text.length
137
+ character = text[position]
138
+ output << character
139
+
140
+ if Tokenizer.escaped_double_quote?(text, position)
141
+ position += 1
142
+ output << text[position]
143
+ elsif character == Constants::DOUBLE_QUOTE
144
+ return position + 1
145
+ end
146
+
147
+ position += 1
148
+ end
149
+
150
+ position
151
+ end
152
+
153
+ def find_table_replacement_at(text, position)
154
+ return unless Tokenizer.word_boundary?(Tokenizer.character_before(text, position))
155
+
156
+ @tables_by_descending_length.each do |table_name|
157
+ next unless text[position, table_name.length + 1] == "#{table_name}."
158
+
159
+ return [table_name, @table_map[table_name]]
160
+ end
161
+
162
+ nil
163
+ end
164
+
165
+ def table_initials(table_name)
166
+ table_name.split("_").map { |segment| segment[0] }.join
167
+ end
168
+
169
+ def extract_table_entries(from_content)
170
+ split_segments = from_content.strip.split(Constants::JOIN_KEYWORD_PATTERN)
171
+
172
+ table_entries = []
173
+
174
+ primary_segment = split_segments.shift.strip
175
+ table_entries << extract_table_entry(primary_segment)
176
+
177
+ split_segments.each_slice(2) do |_join_keyword, join_content|
178
+ next unless join_content
179
+
180
+ table_entries << extract_table_entry(join_content)
181
+ end
182
+
183
+ table_entries.compact
184
+ end
185
+
186
+ def extract_table_entry(segment_text)
187
+ table_specification = table_specification_text(segment_text)
188
+ table_name = Util.first_word(table_specification)
189
+ return unless table_name
190
+
191
+ {
192
+ table_name: table_name,
193
+ explicit_alias: extract_explicit_alias(table_specification),
194
+ }
195
+ end
196
+
197
+ def table_specification_text(segment_text)
198
+ on_keyword_position = Tokenizer.find_top_level_keyword(segment_text, "on")
199
+ return segment_text.strip unless on_keyword_position
200
+
201
+ segment_text[0...on_keyword_position].strip
202
+ end
203
+
204
+ def extract_explicit_alias(table_specification)
205
+ words = table_specification.strip.split(Constants::WHITESPACE_REGEX)
206
+ return nil if words.length < 2
207
+
208
+ if words[1] == "as"
209
+ words[2]
210
+ else
211
+ words[1]
212
+ end
213
+ end
214
+ end
215
+ end
@@ -16,8 +16,8 @@ module SqlBeautifier
16
16
 
17
17
  match_position = match.begin(0)
18
18
 
19
- previous_character = match_position.zero? ? nil : sql[match_position - 1]
20
- next_character = match_position + keyword.length >= sql.length ? nil : sql[match_position + keyword.length]
19
+ previous_character = character_before(sql, match_position)
20
+ next_character = character_after(sql, match_position, keyword.length)
21
21
 
22
22
  return match_position if word_boundary?(previous_character) && word_boundary?(next_character) && top_level?(sql, match_position)
23
23
 
@@ -48,7 +48,15 @@ module SqlBeautifier
48
48
 
49
49
  boundaries.each_with_index do |boundary, boundary_index|
50
50
  content_start = boundary[:position] + boundary[:keyword].length
51
- content_end = boundary_index + 1 < boundaries.length ? boundaries[boundary_index + 1][:position] : sql.length
51
+
52
+ content_end = begin
53
+ if boundary_index + 1 < boundaries.length
54
+ boundaries[boundary_index + 1][:position]
55
+ else
56
+ sql.length
57
+ end
58
+ end
59
+
52
60
  clause_symbol = boundary[:keyword].tr(" ", "_").to_sym
53
61
 
54
62
  clauses[clause_symbol] = sql[content_start...content_end].strip
@@ -71,10 +79,10 @@ module SqlBeautifier
71
79
  if inside_string_literal
72
80
  current_segment << character
73
81
 
74
- if character == "'" && text[position + 1] == "'"
82
+ if escaped_single_quote?(text, position)
75
83
  position += 1
76
84
  current_segment << text[position]
77
- elsif character == "'"
85
+ elsif character == Constants::SINGLE_QUOTE
78
86
  inside_string_literal = false
79
87
  end
80
88
 
@@ -85,10 +93,10 @@ module SqlBeautifier
85
93
  if inside_quoted_identifier
86
94
  current_segment << character
87
95
 
88
- if character == '"' && text[position + 1] == '"'
96
+ if escaped_double_quote?(text, position)
89
97
  position += 1
90
98
  current_segment << text[position]
91
- elsif character == '"'
99
+ elsif character == Constants::DOUBLE_QUOTE
92
100
  inside_quoted_identifier = false
93
101
  end
94
102
 
@@ -97,23 +105,23 @@ module SqlBeautifier
97
105
  end
98
106
 
99
107
  case character
100
- when "'"
108
+ when Constants::SINGLE_QUOTE
101
109
  inside_string_literal = true
102
110
  current_segment << character
103
111
 
104
- when '"'
112
+ when Constants::DOUBLE_QUOTE
105
113
  inside_quoted_identifier = true
106
114
  current_segment << character
107
115
 
108
- when "("
116
+ when Constants::OPEN_PARENTHESIS
109
117
  parenthesis_depth += 1
110
118
  current_segment << character
111
119
 
112
- when ")"
120
+ when Constants::CLOSE_PARENTHESIS
113
121
  parenthesis_depth = [parenthesis_depth - 1, 0].max
114
122
  current_segment << character
115
123
 
116
- when ","
124
+ when Constants::COMMA
117
125
  if parenthesis_depth.zero?
118
126
  segments << current_segment.strip
119
127
  current_segment = +""
@@ -132,10 +140,118 @@ module SqlBeautifier
132
140
  segments
133
141
  end
134
142
 
143
+ def split_top_level_conditions(text)
144
+ conjunction_boundaries = scan_top_level_conjunctions(text)
145
+
146
+ return [[nil, text.strip]] if conjunction_boundaries.empty?
147
+
148
+ condition_pairs = []
149
+ first_condition_text = text[0...conjunction_boundaries.first[:position]].strip
150
+ condition_pairs << [nil, first_condition_text]
151
+
152
+ conjunction_boundaries.each_with_index do |boundary, boundary_index|
153
+ content_start = boundary[:position] + boundary[:conjunction].length
154
+
155
+ content_end = begin
156
+ if boundary_index + 1 < conjunction_boundaries.length
157
+ conjunction_boundaries[boundary_index + 1][:position]
158
+ else
159
+ text.length
160
+ end
161
+ end
162
+
163
+ condition_text = text[content_start...content_end].strip
164
+
165
+ condition_pairs << [boundary[:conjunction], condition_text]
166
+ end
167
+
168
+ condition_pairs
169
+ end
170
+
171
+ def find_matching_parenthesis(text, opening_position)
172
+ parenthesis_depth = 0
173
+ inside_string_literal = false
174
+ inside_quoted_identifier = false
175
+ position = opening_position
176
+
177
+ while position < text.length
178
+ character = text[position]
179
+
180
+ if inside_string_literal
181
+ if escaped_single_quote?(text, position)
182
+ position += 2
183
+ next
184
+ elsif character == Constants::SINGLE_QUOTE
185
+ inside_string_literal = false
186
+ end
187
+
188
+ position += 1
189
+ next
190
+ end
191
+
192
+ if inside_quoted_identifier
193
+ if escaped_double_quote?(text, position)
194
+ position += 2
195
+ next
196
+ elsif character == Constants::DOUBLE_QUOTE
197
+ inside_quoted_identifier = false
198
+ end
199
+
200
+ position += 1
201
+ next
202
+ end
203
+
204
+ case character
205
+ when Constants::SINGLE_QUOTE
206
+ inside_string_literal = true
207
+ when Constants::DOUBLE_QUOTE
208
+ inside_quoted_identifier = true
209
+ when Constants::OPEN_PARENTHESIS
210
+ parenthesis_depth += 1
211
+ when Constants::CLOSE_PARENTHESIS
212
+ parenthesis_depth -= 1
213
+ return position if parenthesis_depth.zero?
214
+ end
215
+
216
+ position += 1
217
+ end
218
+
219
+ nil
220
+ end
221
+
222
+ def outer_parentheses_wrap_all?(text)
223
+ trimmed_text = text.strip
224
+ return false unless trimmed_text.start_with?(Constants::OPEN_PARENTHESIS)
225
+
226
+ closing_parenthesis_position = find_matching_parenthesis(trimmed_text, 0)
227
+
228
+ closing_parenthesis_position == trimmed_text.length - 1
229
+ end
230
+
135
231
  def word_boundary?(character)
136
232
  character.nil? || character !~ IDENTIFIER_CHARACTER
137
233
  end
138
234
 
235
+ def character_before(text, position)
236
+ return nil if position.zero?
237
+
238
+ text[position - 1]
239
+ end
240
+
241
+ def character_after(text, position, offset)
242
+ return nil if position + offset >= text.length
243
+
244
+ text[position + offset]
245
+ end
246
+
247
+ def escaped_single_quote?(text, position)
248
+ text[position] == Constants::SINGLE_QUOTE && text[position + 1] == Constants::SINGLE_QUOTE
249
+ end
250
+
251
+ def escaped_double_quote?(text, position)
252
+ text[position] == Constants::DOUBLE_QUOTE && text[position + 1] == Constants::DOUBLE_QUOTE
253
+ end
254
+
139
255
  def top_level?(sql, target_position)
140
256
  parenthesis_depth = 0
141
257
  inside_string_literal = false
@@ -146,10 +262,10 @@ module SqlBeautifier
146
262
  character = sql[position]
147
263
 
148
264
  if inside_string_literal
149
- if character == "'" && sql[position + 1] == "'"
265
+ if escaped_single_quote?(sql, position)
150
266
  position += 2
151
267
  next
152
- elsif character == "'"
268
+ elsif character == Constants::SINGLE_QUOTE
153
269
  inside_string_literal = false
154
270
  end
155
271
 
@@ -158,10 +274,10 @@ module SqlBeautifier
158
274
  end
159
275
 
160
276
  if inside_quoted_identifier
161
- if character == '"' && sql[position + 1] == '"'
277
+ if escaped_double_quote?(sql, position)
162
278
  position += 2
163
279
  next
164
- elsif character == '"'
280
+ elsif character == Constants::DOUBLE_QUOTE
165
281
  inside_quoted_identifier = false
166
282
  end
167
283
 
@@ -170,13 +286,13 @@ module SqlBeautifier
170
286
  end
171
287
 
172
288
  case character
173
- when "'"
289
+ when Constants::SINGLE_QUOTE
174
290
  inside_string_literal = true
175
- when '"'
291
+ when Constants::DOUBLE_QUOTE
176
292
  inside_quoted_identifier = true
177
- when "("
293
+ when Constants::OPEN_PARENTHESIS
178
294
  parenthesis_depth += 1
179
- when ")"
295
+ when Constants::CLOSE_PARENTHESIS
180
296
  parenthesis_depth = [parenthesis_depth - 1, 0].max
181
297
  end
182
298
 
@@ -185,5 +301,92 @@ module SqlBeautifier
185
301
 
186
302
  parenthesis_depth.zero? && !inside_string_literal && !inside_quoted_identifier
187
303
  end
304
+
305
+ def scan_top_level_conjunctions(text)
306
+ conjunction_boundaries = []
307
+ parenthesis_depth = 0
308
+ inside_string_literal = false
309
+ inside_quoted_identifier = false
310
+ inside_between = false
311
+ position = 0
312
+
313
+ while position < text.length
314
+ character = text[position]
315
+
316
+ if inside_string_literal
317
+ if escaped_single_quote?(text, position)
318
+ position += 2
319
+ elsif character == Constants::SINGLE_QUOTE
320
+ inside_string_literal = false
321
+ position += 1
322
+ else
323
+ position += 1
324
+ end
325
+ next
326
+ end
327
+
328
+ if inside_quoted_identifier
329
+ if escaped_double_quote?(text, position)
330
+ position += 2
331
+ elsif character == Constants::DOUBLE_QUOTE
332
+ inside_quoted_identifier = false
333
+ position += 1
334
+ else
335
+ position += 1
336
+ end
337
+ next
338
+ end
339
+
340
+ case character
341
+ when Constants::SINGLE_QUOTE
342
+ inside_string_literal = true
343
+ when Constants::DOUBLE_QUOTE
344
+ inside_quoted_identifier = true
345
+ when Constants::OPEN_PARENTHESIS
346
+ parenthesis_depth += 1
347
+ when Constants::CLOSE_PARENTHESIS
348
+ parenthesis_depth = [parenthesis_depth - 1, 0].max
349
+ else
350
+ if parenthesis_depth.zero?
351
+ inside_between = true if keyword_at?(text, position, Constants::BETWEEN_KEYWORD)
352
+
353
+ matched_conjunction = detect_conjunction_at(text, position)
354
+
355
+ if matched_conjunction
356
+ if matched_conjunction == "and" && inside_between
357
+ inside_between = false
358
+ else
359
+ conjunction_boundaries << {
360
+ conjunction: matched_conjunction,
361
+ position: position,
362
+ }
363
+ end
364
+
365
+ position += matched_conjunction.length
366
+ next
367
+ end
368
+ end
369
+ end
370
+
371
+ position += 1
372
+ end
373
+
374
+ conjunction_boundaries
375
+ end
376
+
377
+ def keyword_at?(text, position, keyword)
378
+ return false unless text[position, keyword.length]&.downcase == keyword
379
+
380
+ previous_character = character_before(text, position)
381
+ next_character = character_after(text, position, keyword.length)
382
+
383
+ word_boundary?(previous_character) && word_boundary?(next_character)
384
+ end
385
+
386
+ def detect_conjunction_at(text, position)
387
+ Constants::CONJUNCTIONS.detect do |conjunction|
388
+ keyword_at?(text, position, conjunction)
389
+ end
390
+ end
188
391
  end
189
392
  end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlBeautifier
4
+ module Util
5
+ module_function
6
+
7
+ def upper_pascal_case(name)
8
+ name.split("_").map(&:capitalize).join("_")
9
+ end
10
+
11
+ def first_word(text)
12
+ text.strip.split(Constants::WHITESPACE_REGEX).first
13
+ end
14
+
15
+ def strip_outer_parentheses(text)
16
+ stripped_text = text.strip
17
+ return stripped_text unless stripped_text.start_with?(Constants::OPEN_PARENTHESIS) && stripped_text.end_with?(Constants::CLOSE_PARENTHESIS)
18
+
19
+ stripped_text[1...-1].strip
20
+ end
21
+
22
+ def double_quote_string(value)
23
+ return if value.nil?
24
+
25
+ "#{Constants::DOUBLE_QUOTE}#{value}#{Constants::DOUBLE_QUOTE}"
26
+ end
27
+
28
+ def escape_double_quote(value)
29
+ return if value.nil?
30
+
31
+ value.gsub(Constants::DOUBLE_QUOTE, Constants::ESCAPED_DOUBLE_QUOTE)
32
+ end
33
+ end
34
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SqlBeautifier
4
- VERSION = "0.1.3"
4
+ VERSION = "0.2.0"
5
5
  end