sql_beautifier 0.1.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlBeautifier
4
+ class TableRegistry
5
+ attr_reader :table_map
6
+
7
+ def initialize(from_content)
8
+ @from_content = from_content
9
+ @table_map = {}
10
+ @alias_strategy = SqlBeautifier.config_for(:alias_strategy)
11
+ build!
12
+ end
13
+
14
+ def alias_for(table_name)
15
+ @table_map[table_name]
16
+ end
17
+
18
+ def apply_aliases(text)
19
+ return text if @table_map.empty?
20
+
21
+ output = +""
22
+ position = 0
23
+
24
+ while position < text.length
25
+ character = text[position]
26
+
27
+ case character
28
+ when Constants::SINGLE_QUOTE
29
+ position = copy_string_literal!(text, position, output)
30
+ when Constants::DOUBLE_QUOTE
31
+ position = copy_quoted_identifier!(text, position, output)
32
+ else
33
+ replacement = find_table_replacement_at(text, position)
34
+
35
+ if replacement
36
+ table_name, table_alias = replacement
37
+
38
+ output << "#{table_alias}."
39
+ position += table_name.length + 1
40
+ else
41
+ output << character
42
+ position += 1
43
+ end
44
+ end
45
+ end
46
+
47
+ output
48
+ end
49
+
50
+ private
51
+
52
+ def build!
53
+ table_entries = extract_table_entries(@from_content)
54
+
55
+ if @alias_strategy == :none
56
+ table_entries.each do |table_entry|
57
+ next unless table_entry[:explicit_alias]
58
+
59
+ @table_map[table_entry[:table_name]] = table_entry[:explicit_alias]
60
+ end
61
+ else
62
+ initials_occurrence_counts = count_initials_occurrences(table_entries)
63
+ used_aliases = []
64
+ assign_aliases!(table_entries, initials_occurrence_counts, used_aliases)
65
+ end
66
+
67
+ @tables_by_descending_length = @table_map.keys.sort_by { |name| -name.length }.freeze
68
+ end
69
+
70
+ def count_initials_occurrences(table_entries)
71
+ occurrence_counts = Hash.new(0)
72
+
73
+ table_entries.each do |table_entry|
74
+ next if table_entry[:explicit_alias]
75
+
76
+ table_name = table_entry[:table_name]
77
+ occurrence_counts[table_initials(table_name)] += 1
78
+ end
79
+
80
+ occurrence_counts
81
+ end
82
+
83
+ def assign_aliases!(table_entries, initials_occurrence_counts, used_aliases)
84
+ duplicate_initials_counts = Hash.new(0)
85
+ collision_counts = Hash.new(0)
86
+
87
+ table_entries.each do |table_entry|
88
+ table_name = table_entry[:table_name]
89
+ explicit_alias = table_entry[:explicit_alias]
90
+
91
+ if explicit_alias
92
+ @table_map[table_name] = explicit_alias
93
+ used_aliases << explicit_alias
94
+ next
95
+ end
96
+
97
+ initials = table_initials(table_name)
98
+ duplicate_initials_counts[initials] += 1 if initials_occurrence_counts[initials] > 1
99
+
100
+ candidate_alias = begin
101
+ if initials_occurrence_counts[initials] > 1
102
+ "#{initials}#{duplicate_initials_counts[initials]}"
103
+ else
104
+ initials
105
+ end
106
+ end
107
+
108
+ if used_aliases.include?(candidate_alias)
109
+ collision_counts[initials] = [collision_counts[initials], duplicate_initials_counts[initials]].max
110
+
111
+ loop do
112
+ collision_counts[initials] += 1
113
+ candidate_alias = "#{initials}#{collision_counts[initials]}"
114
+ break unless used_aliases.include?(candidate_alias)
115
+ end
116
+ end
117
+
118
+ @table_map[table_name] = candidate_alias
119
+ used_aliases << candidate_alias
120
+ end
121
+ end
122
+
123
+ def copy_string_literal!(text, position, output)
124
+ output << text[position]
125
+ position += 1
126
+
127
+ while position < text.length
128
+ character = text[position]
129
+ output << character
130
+
131
+ if Tokenizer.escaped_single_quote?(text, position)
132
+ position += 1
133
+ output << text[position]
134
+ elsif character == Constants::SINGLE_QUOTE
135
+ return position + 1
136
+ end
137
+
138
+ position += 1
139
+ end
140
+
141
+ position
142
+ end
143
+
144
+ def copy_quoted_identifier!(text, position, output)
145
+ output << text[position]
146
+ position += 1
147
+
148
+ while position < text.length
149
+ character = text[position]
150
+ output << character
151
+
152
+ if Tokenizer.escaped_double_quote?(text, position)
153
+ position += 1
154
+ output << text[position]
155
+ elsif character == Constants::DOUBLE_QUOTE
156
+ return position + 1
157
+ end
158
+
159
+ position += 1
160
+ end
161
+
162
+ position
163
+ end
164
+
165
+ def find_table_replacement_at(text, position)
166
+ return unless Tokenizer.word_boundary?(Tokenizer.character_before(text, position))
167
+
168
+ @tables_by_descending_length.each do |table_name|
169
+ next unless text[position, table_name.length + 1] == "#{table_name}."
170
+
171
+ return [table_name, @table_map[table_name]]
172
+ end
173
+
174
+ nil
175
+ end
176
+
177
+ def table_initials(table_name)
178
+ return @alias_strategy.call(table_name) if @alias_strategy.respond_to?(:call)
179
+
180
+ table_name.split("_").map { |segment| segment[0] }.join
181
+ end
182
+
183
+ def extract_table_entries(from_content)
184
+ split_segments = from_content.strip.split(Constants::JOIN_KEYWORD_PATTERN)
185
+
186
+ table_entries = []
187
+
188
+ primary_segment = split_segments.shift.strip
189
+ table_entries << extract_table_entry(primary_segment)
190
+
191
+ split_segments.each_slice(2) do |_join_keyword, join_content|
192
+ next unless join_content
193
+
194
+ table_entries << extract_table_entry(join_content)
195
+ end
196
+
197
+ table_entries.compact
198
+ end
199
+
200
+ def extract_table_entry(segment_text)
201
+ table_specification = table_specification_text(segment_text)
202
+ table_name = Util.first_word(table_specification)
203
+ return unless table_name
204
+
205
+ {
206
+ table_name: table_name,
207
+ explicit_alias: extract_explicit_alias(table_specification),
208
+ }
209
+ end
210
+
211
+ def table_specification_text(segment_text)
212
+ on_keyword_position = Tokenizer.find_top_level_keyword(segment_text, "on")
213
+ return segment_text.strip unless on_keyword_position
214
+
215
+ segment_text[0...on_keyword_position].strip
216
+ end
217
+
218
+ def extract_explicit_alias(table_specification)
219
+ words = table_specification.strip.split(Constants::WHITESPACE_REGEX)
220
+ return nil if words.length < 2
221
+
222
+ if words[1] == "as"
223
+ words[2]
224
+ else
225
+ words[1]
226
+ end
227
+ end
228
+ end
229
+ end
@@ -16,8 +16,8 @@ module SqlBeautifier
16
16
 
17
17
  match_position = match.begin(0)
18
18
 
19
- previous_character = match_position.zero? ? nil : sql[match_position - 1]
20
- next_character = match_position + keyword.length >= sql.length ? nil : sql[match_position + keyword.length]
19
+ previous_character = character_before(sql, match_position)
20
+ next_character = character_after(sql, match_position, keyword.length)
21
21
 
22
22
  return match_position if word_boundary?(previous_character) && word_boundary?(next_character) && top_level?(sql, match_position)
23
23
 
@@ -48,7 +48,15 @@ module SqlBeautifier
48
48
 
49
49
  boundaries.each_with_index do |boundary, boundary_index|
50
50
  content_start = boundary[:position] + boundary[:keyword].length
51
- content_end = boundary_index + 1 < boundaries.length ? boundaries[boundary_index + 1][:position] : sql.length
51
+
52
+ content_end = begin
53
+ if boundary_index + 1 < boundaries.length
54
+ boundaries[boundary_index + 1][:position]
55
+ else
56
+ sql.length
57
+ end
58
+ end
59
+
52
60
  clause_symbol = boundary[:keyword].tr(" ", "_").to_sym
53
61
 
54
62
  clauses[clause_symbol] = sql[content_start...content_end].strip
@@ -71,10 +79,10 @@ module SqlBeautifier
71
79
  if inside_string_literal
72
80
  current_segment << character
73
81
 
74
- if character == "'" && text[position + 1] == "'"
82
+ if escaped_single_quote?(text, position)
75
83
  position += 1
76
84
  current_segment << text[position]
77
- elsif character == "'"
85
+ elsif character == Constants::SINGLE_QUOTE
78
86
  inside_string_literal = false
79
87
  end
80
88
 
@@ -85,10 +93,10 @@ module SqlBeautifier
85
93
  if inside_quoted_identifier
86
94
  current_segment << character
87
95
 
88
- if character == '"' && text[position + 1] == '"'
96
+ if escaped_double_quote?(text, position)
89
97
  position += 1
90
98
  current_segment << text[position]
91
- elsif character == '"'
99
+ elsif character == Constants::DOUBLE_QUOTE
92
100
  inside_quoted_identifier = false
93
101
  end
94
102
 
@@ -97,23 +105,23 @@ module SqlBeautifier
97
105
  end
98
106
 
99
107
  case character
100
- when "'"
108
+ when Constants::SINGLE_QUOTE
101
109
  inside_string_literal = true
102
110
  current_segment << character
103
111
 
104
- when '"'
112
+ when Constants::DOUBLE_QUOTE
105
113
  inside_quoted_identifier = true
106
114
  current_segment << character
107
115
 
108
- when "("
116
+ when Constants::OPEN_PARENTHESIS
109
117
  parenthesis_depth += 1
110
118
  current_segment << character
111
119
 
112
- when ")"
120
+ when Constants::CLOSE_PARENTHESIS
113
121
  parenthesis_depth = [parenthesis_depth - 1, 0].max
114
122
  current_segment << character
115
123
 
116
- when ","
124
+ when Constants::COMMA
117
125
  if parenthesis_depth.zero?
118
126
  segments << current_segment.strip
119
127
  current_segment = +""
@@ -132,10 +140,118 @@ module SqlBeautifier
132
140
  segments
133
141
  end
134
142
 
143
+ def split_top_level_conditions(text)
144
+ conjunction_boundaries = scan_top_level_conjunctions(text)
145
+
146
+ return [[nil, text.strip]] if conjunction_boundaries.empty?
147
+
148
+ condition_pairs = []
149
+ first_condition_text = text[0...conjunction_boundaries.first[:position]].strip
150
+ condition_pairs << [nil, first_condition_text]
151
+
152
+ conjunction_boundaries.each_with_index do |boundary, boundary_index|
153
+ content_start = boundary[:position] + boundary[:conjunction].length
154
+
155
+ content_end = begin
156
+ if boundary_index + 1 < conjunction_boundaries.length
157
+ conjunction_boundaries[boundary_index + 1][:position]
158
+ else
159
+ text.length
160
+ end
161
+ end
162
+
163
+ condition_text = text[content_start...content_end].strip
164
+
165
+ condition_pairs << [boundary[:conjunction], condition_text]
166
+ end
167
+
168
+ condition_pairs
169
+ end
170
+
171
+ def find_matching_parenthesis(text, opening_position)
172
+ parenthesis_depth = 0
173
+ inside_string_literal = false
174
+ inside_quoted_identifier = false
175
+ position = opening_position
176
+
177
+ while position < text.length
178
+ character = text[position]
179
+
180
+ if inside_string_literal
181
+ if escaped_single_quote?(text, position)
182
+ position += 2
183
+ next
184
+ elsif character == Constants::SINGLE_QUOTE
185
+ inside_string_literal = false
186
+ end
187
+
188
+ position += 1
189
+ next
190
+ end
191
+
192
+ if inside_quoted_identifier
193
+ if escaped_double_quote?(text, position)
194
+ position += 2
195
+ next
196
+ elsif character == Constants::DOUBLE_QUOTE
197
+ inside_quoted_identifier = false
198
+ end
199
+
200
+ position += 1
201
+ next
202
+ end
203
+
204
+ case character
205
+ when Constants::SINGLE_QUOTE
206
+ inside_string_literal = true
207
+ when Constants::DOUBLE_QUOTE
208
+ inside_quoted_identifier = true
209
+ when Constants::OPEN_PARENTHESIS
210
+ parenthesis_depth += 1
211
+ when Constants::CLOSE_PARENTHESIS
212
+ parenthesis_depth -= 1
213
+ return position if parenthesis_depth.zero?
214
+ end
215
+
216
+ position += 1
217
+ end
218
+
219
+ nil
220
+ end
221
+
222
+ def outer_parentheses_wrap_all?(text)
223
+ trimmed_text = text.strip
224
+ return false unless trimmed_text.start_with?(Constants::OPEN_PARENTHESIS)
225
+
226
+ closing_parenthesis_position = find_matching_parenthesis(trimmed_text, 0)
227
+
228
+ closing_parenthesis_position == trimmed_text.length - 1
229
+ end
230
+
135
231
  def word_boundary?(character)
136
232
  character.nil? || character !~ IDENTIFIER_CHARACTER
137
233
  end
138
234
 
235
+ def character_before(text, position)
236
+ return nil if position.zero?
237
+
238
+ text[position - 1]
239
+ end
240
+
241
+ def character_after(text, position, offset)
242
+ return nil if position + offset >= text.length
243
+
244
+ text[position + offset]
245
+ end
246
+
247
+ def escaped_single_quote?(text, position)
248
+ text[position] == Constants::SINGLE_QUOTE && text[position + 1] == Constants::SINGLE_QUOTE
249
+ end
250
+
251
+ def escaped_double_quote?(text, position)
252
+ text[position] == Constants::DOUBLE_QUOTE && text[position + 1] == Constants::DOUBLE_QUOTE
253
+ end
254
+
139
255
  def top_level?(sql, target_position)
140
256
  parenthesis_depth = 0
141
257
  inside_string_literal = false
@@ -146,10 +262,10 @@ module SqlBeautifier
146
262
  character = sql[position]
147
263
 
148
264
  if inside_string_literal
149
- if character == "'" && sql[position + 1] == "'"
265
+ if escaped_single_quote?(sql, position)
150
266
  position += 2
151
267
  next
152
- elsif character == "'"
268
+ elsif character == Constants::SINGLE_QUOTE
153
269
  inside_string_literal = false
154
270
  end
155
271
 
@@ -158,10 +274,10 @@ module SqlBeautifier
158
274
  end
159
275
 
160
276
  if inside_quoted_identifier
161
- if character == '"' && sql[position + 1] == '"'
277
+ if escaped_double_quote?(sql, position)
162
278
  position += 2
163
279
  next
164
- elsif character == '"'
280
+ elsif character == Constants::DOUBLE_QUOTE
165
281
  inside_quoted_identifier = false
166
282
  end
167
283
 
@@ -170,13 +286,13 @@ module SqlBeautifier
170
286
  end
171
287
 
172
288
  case character
173
- when "'"
289
+ when Constants::SINGLE_QUOTE
174
290
  inside_string_literal = true
175
- when '"'
291
+ when Constants::DOUBLE_QUOTE
176
292
  inside_quoted_identifier = true
177
- when "("
293
+ when Constants::OPEN_PARENTHESIS
178
294
  parenthesis_depth += 1
179
- when ")"
295
+ when Constants::CLOSE_PARENTHESIS
180
296
  parenthesis_depth = [parenthesis_depth - 1, 0].max
181
297
  end
182
298
 
@@ -185,5 +301,92 @@ module SqlBeautifier
185
301
 
186
302
  parenthesis_depth.zero? && !inside_string_literal && !inside_quoted_identifier
187
303
  end
304
+
305
+ def scan_top_level_conjunctions(text)
306
+ conjunction_boundaries = []
307
+ parenthesis_depth = 0
308
+ inside_string_literal = false
309
+ inside_quoted_identifier = false
310
+ inside_between = false
311
+ position = 0
312
+
313
+ while position < text.length
314
+ character = text[position]
315
+
316
+ if inside_string_literal
317
+ if escaped_single_quote?(text, position)
318
+ position += 2
319
+ elsif character == Constants::SINGLE_QUOTE
320
+ inside_string_literal = false
321
+ position += 1
322
+ else
323
+ position += 1
324
+ end
325
+ next
326
+ end
327
+
328
+ if inside_quoted_identifier
329
+ if escaped_double_quote?(text, position)
330
+ position += 2
331
+ elsif character == Constants::DOUBLE_QUOTE
332
+ inside_quoted_identifier = false
333
+ position += 1
334
+ else
335
+ position += 1
336
+ end
337
+ next
338
+ end
339
+
340
+ case character
341
+ when Constants::SINGLE_QUOTE
342
+ inside_string_literal = true
343
+ when Constants::DOUBLE_QUOTE
344
+ inside_quoted_identifier = true
345
+ when Constants::OPEN_PARENTHESIS
346
+ parenthesis_depth += 1
347
+ when Constants::CLOSE_PARENTHESIS
348
+ parenthesis_depth = [parenthesis_depth - 1, 0].max
349
+ else
350
+ if parenthesis_depth.zero?
351
+ inside_between = true if keyword_at?(text, position, Constants::BETWEEN_KEYWORD)
352
+
353
+ matched_conjunction = detect_conjunction_at(text, position)
354
+
355
+ if matched_conjunction
356
+ if matched_conjunction == "and" && inside_between
357
+ inside_between = false
358
+ else
359
+ conjunction_boundaries << {
360
+ conjunction: matched_conjunction,
361
+ position: position,
362
+ }
363
+ end
364
+
365
+ position += matched_conjunction.length
366
+ next
367
+ end
368
+ end
369
+ end
370
+
371
+ position += 1
372
+ end
373
+
374
+ conjunction_boundaries
375
+ end
376
+
377
+ def keyword_at?(text, position, keyword)
378
+ return false unless text[position, keyword.length]&.downcase == keyword
379
+
380
+ previous_character = character_before(text, position)
381
+ next_character = character_after(text, position, keyword.length)
382
+
383
+ word_boundary?(previous_character) && word_boundary?(next_character)
384
+ end
385
+
386
+ def detect_conjunction_at(text, position)
387
+ Constants::CONJUNCTIONS.detect do |conjunction|
388
+ keyword_at?(text, position, conjunction)
389
+ end
390
+ end
188
391
  end
189
392
  end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlBeautifier
4
+ module Util
5
+ module_function
6
+
7
+ def whitespace(length)
8
+ " " * length
9
+ end
10
+
11
+ def upper_pascal_case(name)
12
+ name.split("_").map(&:capitalize).join("_")
13
+ end
14
+
15
+ def first_word(text)
16
+ text.strip.split(Constants::WHITESPACE_REGEX).first
17
+ end
18
+
19
+ def strip_outer_parentheses(text)
20
+ stripped_text = text.strip
21
+ return stripped_text unless stripped_text.start_with?(Constants::OPEN_PARENTHESIS) && stripped_text.end_with?(Constants::CLOSE_PARENTHESIS)
22
+
23
+ stripped_text[1...-1].strip
24
+ end
25
+
26
+ def double_quote_string(value)
27
+ return if value.nil?
28
+
29
+ "#{Constants::DOUBLE_QUOTE}#{value}#{Constants::DOUBLE_QUOTE}"
30
+ end
31
+
32
+ def escape_double_quote(value)
33
+ return if value.nil?
34
+
35
+ value.gsub(Constants::DOUBLE_QUOTE, Constants::ESCAPED_DOUBLE_QUOTE)
36
+ end
37
+
38
+ def keyword_padding(keyword)
39
+ formatted_keyword = format_keyword(keyword)
40
+ padding_width = [SqlBeautifier.config_for(:keyword_column_width) - formatted_keyword.length, 1].max
41
+
42
+ "#{formatted_keyword}#{whitespace(padding_width)}"
43
+ end
44
+
45
+ def continuation_padding
46
+ whitespace(SqlBeautifier.config_for(:keyword_column_width))
47
+ end
48
+
49
+ def format_keyword(keyword)
50
+ case SqlBeautifier.config_for(:keyword_case)
51
+ when :upper
52
+ keyword.upcase
53
+ else
54
+ keyword.downcase
55
+ end
56
+ end
57
+
58
+ def format_table_name(name)
59
+ case SqlBeautifier.config_for(:table_name_format)
60
+ when :lowercase
61
+ name.downcase
62
+ else
63
+ upper_pascal_case(name)
64
+ end
65
+ end
66
+ end
67
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SqlBeautifier
4
- VERSION = "0.1.4"
4
+ VERSION = "0.3.0"
5
5
  end
@@ -4,9 +4,16 @@ require "active_support/core_ext/object/blank"
4
4
 
5
5
  require_relative "sql_beautifier/version"
6
6
  require_relative "sql_beautifier/constants"
7
+ require_relative "sql_beautifier/util"
8
+ require_relative "sql_beautifier/configuration"
9
+
7
10
  require_relative "sql_beautifier/normalizer"
8
11
  require_relative "sql_beautifier/tokenizer"
12
+ require_relative "sql_beautifier/table_registry"
13
+ require_relative "sql_beautifier/condition_formatter"
14
+ require_relative "sql_beautifier/subquery_formatter"
9
15
  require_relative "sql_beautifier/clauses/base"
16
+ require_relative "sql_beautifier/clauses/condition_clause"
10
17
  require_relative "sql_beautifier/clauses/select"
11
18
  require_relative "sql_beautifier/clauses/from"
12
19
  require_relative "sql_beautifier/clauses/where"
@@ -26,4 +33,20 @@ module SqlBeautifier
26
33
 
27
34
  Formatter.call(value)
28
35
  end
36
+
37
+ def configuration
38
+ @configuration ||= Configuration.new
39
+ end
40
+
41
+ def configure
42
+ yield configuration
43
+ end
44
+
45
+ def config_for(key)
46
+ configuration.public_send(key)
47
+ end
48
+
49
+ def reset_configuration!
50
+ @configuration = Configuration.new
51
+ end
29
52
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sql_beautifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kinnell Shah
@@ -38,6 +38,7 @@ files:
38
38
  - README.md
39
39
  - lib/sql_beautifier.rb
40
40
  - lib/sql_beautifier/clauses/base.rb
41
+ - lib/sql_beautifier/clauses/condition_clause.rb
41
42
  - lib/sql_beautifier/clauses/from.rb
42
43
  - lib/sql_beautifier/clauses/group_by.rb
43
44
  - lib/sql_beautifier/clauses/having.rb
@@ -45,10 +46,15 @@ files:
45
46
  - lib/sql_beautifier/clauses/order_by.rb
46
47
  - lib/sql_beautifier/clauses/select.rb
47
48
  - lib/sql_beautifier/clauses/where.rb
49
+ - lib/sql_beautifier/condition_formatter.rb
50
+ - lib/sql_beautifier/configuration.rb
48
51
  - lib/sql_beautifier/constants.rb
49
52
  - lib/sql_beautifier/formatter.rb
50
53
  - lib/sql_beautifier/normalizer.rb
54
+ - lib/sql_beautifier/subquery_formatter.rb
55
+ - lib/sql_beautifier/table_registry.rb
51
56
  - lib/sql_beautifier/tokenizer.rb
57
+ - lib/sql_beautifier/util.rb
52
58
  - lib/sql_beautifier/version.rb
53
59
  homepage: https://github.com/kinnell/sql_beautifier
54
60
  licenses: