sql_beautifier 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +2 -2
  4. data/lib/sql_beautifier/base.rb +9 -0
  5. data/lib/sql_beautifier/clauses/base.rb +2 -2
  6. data/lib/sql_beautifier/clauses/condition_clause.rb +1 -1
  7. data/lib/sql_beautifier/clauses/from.rb +15 -69
  8. data/lib/sql_beautifier/clauses/order_by.rb +12 -1
  9. data/lib/sql_beautifier/clauses/select.rb +28 -15
  10. data/lib/sql_beautifier/comment.rb +23 -0
  11. data/lib/sql_beautifier/{comment_stripper.rb → comment_parser.rb} +67 -24
  12. data/lib/sql_beautifier/condition.rb +162 -0
  13. data/lib/sql_beautifier/configuration.rb +4 -15
  14. data/lib/sql_beautifier/create_table_as.rb +127 -0
  15. data/lib/sql_beautifier/cte_definition.rb +41 -0
  16. data/lib/sql_beautifier/cte_query.rb +129 -0
  17. data/lib/sql_beautifier/expression.rb +54 -0
  18. data/lib/sql_beautifier/formatter.rb +13 -80
  19. data/lib/sql_beautifier/join.rb +69 -0
  20. data/lib/sql_beautifier/normalizer.rb +33 -59
  21. data/lib/sql_beautifier/query.rb +185 -0
  22. data/lib/sql_beautifier/scanner.rb +420 -0
  23. data/lib/sql_beautifier/sort_expression.rb +39 -0
  24. data/lib/sql_beautifier/statement_assembler.rb +4 -4
  25. data/lib/sql_beautifier/statement_splitter.rb +35 -143
  26. data/lib/sql_beautifier/table_reference.rb +52 -0
  27. data/lib/sql_beautifier/table_registry.rb +50 -124
  28. data/lib/sql_beautifier/tokenizer.rb +47 -278
  29. data/lib/sql_beautifier/types.rb +9 -0
  30. data/lib/sql_beautifier/version.rb +1 -1
  31. data/lib/sql_beautifier.rb +14 -6
  32. metadata +43 -7
  33. data/lib/sql_beautifier/comment_restorer.rb +0 -62
  34. data/lib/sql_beautifier/condition_formatter.rb +0 -127
  35. data/lib/sql_beautifier/create_table_as_formatter.rb +0 -177
  36. data/lib/sql_beautifier/cte_formatter.rb +0 -192
  37. data/lib/sql_beautifier/subquery_formatter.rb +0 -113
@@ -15,102 +15,42 @@ module SqlBeautifier
15
15
  end
16
16
 
17
17
  def split_on_semicolons(sql)
18
+ scanner = Scanner.new(sql)
18
19
  segments = []
19
20
  current_segment = +""
20
- inside_string_literal = false
21
- inside_quoted_identifier = false
22
- inside_dollar_quoted_string = false
23
- dollar_quote_delimiter = nil
24
- parenthesis_depth = 0
25
- position = 0
26
21
 
27
- while position < sql.length
28
- character = sql[position]
29
-
30
- if inside_string_literal
31
- current_segment << character
32
-
33
- if character == Constants::SINGLE_QUOTE && sql[position + 1] == Constants::SINGLE_QUOTE
34
- position += 1
35
- current_segment << sql[position]
36
- elsif character == Constants::SINGLE_QUOTE
37
- inside_string_literal = false
38
- end
39
-
40
- position += 1
41
- next
42
- end
43
-
44
- if inside_dollar_quoted_string
45
- if sql[position, dollar_quote_delimiter.length] == dollar_quote_delimiter
46
- current_segment << dollar_quote_delimiter
47
- position += dollar_quote_delimiter.length
48
- inside_dollar_quoted_string = false
49
- dollar_quote_delimiter = nil
50
- else
51
- current_segment << character
52
- position += 1
53
- end
54
- next
55
- end
56
-
57
- if inside_quoted_identifier
58
- current_segment << character
59
-
60
- if character == Constants::DOUBLE_QUOTE && sql[position + 1] == Constants::DOUBLE_QUOTE
61
- position += 1
62
- current_segment << sql[position]
63
- elsif character == Constants::DOUBLE_QUOTE
64
- inside_quoted_identifier = false
65
- end
66
-
67
- position += 1
68
- next
69
- end
70
-
71
- if Tokenizer.sentinel_at?(sql, position)
72
- end_position = Tokenizer.sentinel_end_position(sql, position)
73
- current_segment << sql[position...end_position]
74
- position = end_position
22
+ until scanner.finished?
23
+ consumed = scanner.scan_quoted_or_sentinel!
24
+ if consumed
25
+ current_segment << consumed
75
26
  next
76
27
  end
77
28
 
78
- delimiter = Tokenizer.dollar_quote_delimiter_at(sql, position)
79
-
80
- if delimiter
81
- inside_dollar_quoted_string = true
82
- dollar_quote_delimiter = delimiter
83
- current_segment << delimiter
84
- position += delimiter.length
85
- next
86
- end
29
+ character = scanner.current_char
87
30
 
88
31
  case character
89
- when Constants::SINGLE_QUOTE
90
- inside_string_literal = true
91
- current_segment << character
92
- when Constants::DOUBLE_QUOTE
93
- inside_quoted_identifier = true
94
- current_segment << character
95
32
  when Constants::OPEN_PARENTHESIS
96
- parenthesis_depth += 1
33
+ scanner.increment_depth!
97
34
  current_segment << character
35
+
98
36
  when Constants::CLOSE_PARENTHESIS
99
- parenthesis_depth = [parenthesis_depth - 1, 0].max
37
+ scanner.decrement_depth!
100
38
  current_segment << character
39
+
101
40
  when ";"
102
- if parenthesis_depth.zero?
41
+ if scanner.parenthesis_depth.zero?
103
42
  stripped_segment = current_segment.strip
104
43
  segments << stripped_segment unless stripped_segment.empty?
105
44
  current_segment = +""
106
45
  else
107
46
  current_segment << character
108
47
  end
48
+
109
49
  else
110
50
  current_segment << character
111
51
  end
112
52
 
113
- position += 1
53
+ scanner.advance!
114
54
  end
115
55
 
116
56
  stripped_segment = current_segment.strip
@@ -141,102 +81,54 @@ module SqlBeautifier
141
81
  end
142
82
 
143
83
  def detect_statement_boundaries(sql)
84
+ scanner = Scanner.new(sql)
144
85
  boundaries = []
145
86
  clause_seen = false
146
87
  current_statement_keyword = nil
147
- inside_string_literal = false
148
- inside_quoted_identifier = false
149
- inside_dollar_quoted_string = false
150
- dollar_quote_delimiter = nil
151
- parenthesis_depth = 0
152
- position = 0
153
-
154
- while position < sql.length
155
- character = sql[position]
156
-
157
- if inside_string_literal
158
- if character == Constants::SINGLE_QUOTE && sql[position + 1] == Constants::SINGLE_QUOTE
159
- position += 2
160
- else
161
- inside_string_literal = false if character == Constants::SINGLE_QUOTE
162
- position += 1
163
- end
164
- next
165
- end
166
-
167
- if inside_dollar_quoted_string
168
- if sql[position, dollar_quote_delimiter.length] == dollar_quote_delimiter
169
- position += dollar_quote_delimiter.length
170
- inside_dollar_quoted_string = false
171
- dollar_quote_delimiter = nil
172
- else
173
- position += 1
174
- end
175
- next
176
- end
177
88
 
178
- if inside_quoted_identifier
179
- if character == Constants::DOUBLE_QUOTE && sql[position + 1] == Constants::DOUBLE_QUOTE
180
- position += 2
181
- else
182
- inside_quoted_identifier = false if character == Constants::DOUBLE_QUOTE
183
- position += 1
184
- end
185
- next
186
- end
187
-
188
- if Tokenizer.sentinel_at?(sql, position)
189
- position = Tokenizer.sentinel_end_position(sql, position)
190
- next
191
- end
89
+ until scanner.finished?
90
+ next if scanner.skip_quoted_or_sentinel!
192
91
 
193
- delimiter = Tokenizer.dollar_quote_delimiter_at(sql, position)
194
-
195
- if delimiter
196
- inside_dollar_quoted_string = true
197
- dollar_quote_delimiter = delimiter
198
- position += delimiter.length
199
- next
200
- end
201
-
202
- case character
92
+ case scanner.current_char
203
93
  when Constants::SINGLE_QUOTE
204
- inside_string_literal = true
94
+ scanner.enter_single_quote!
205
95
  when Constants::DOUBLE_QUOTE
206
- inside_quoted_identifier = true
96
+ scanner.enter_double_quote!
207
97
  when Constants::OPEN_PARENTHESIS
208
- parenthesis_depth += 1
98
+ scanner.increment_depth!
99
+ scanner.advance!
209
100
  when Constants::CLOSE_PARENTHESIS
210
- parenthesis_depth = [parenthesis_depth - 1, 0].max
101
+ scanner.decrement_depth!
102
+ scanner.advance!
211
103
  else
212
- if parenthesis_depth.zero?
213
- matched_statement_keyword = keyword_match_at(sql, position, STATEMENT_KEYWORDS)
104
+ if scanner.parenthesis_depth.zero?
105
+ matched_statement_keyword = keyword_match_at(scanner, STATEMENT_KEYWORDS)
214
106
 
215
107
  if matched_statement_keyword
216
108
  if clause_seen && !continuation_keyword?(current_statement_keyword, matched_statement_keyword)
217
- boundaries << position
109
+ boundaries << scanner.position
218
110
  clause_seen = false
219
111
  current_statement_keyword = matched_statement_keyword
220
112
  elsif boundaries.empty?
221
- boundaries << position
113
+ boundaries << scanner.position
222
114
  current_statement_keyword = matched_statement_keyword
223
115
  end
224
116
 
225
- position += matched_statement_keyword.length
117
+ scanner.advance!(matched_statement_keyword.length)
226
118
  next
227
119
  end
228
120
 
229
- matched_boundary_keyword = keyword_match_at(sql, position, BOUNDARY_KEYWORDS)
121
+ matched_boundary_keyword = keyword_match_at(scanner, BOUNDARY_KEYWORDS)
230
122
 
231
123
  if matched_boundary_keyword
232
124
  clause_seen = true
233
- position += matched_boundary_keyword.length
125
+ scanner.advance!(matched_boundary_keyword.length)
234
126
  next
235
127
  end
236
128
  end
237
- end
238
129
 
239
- position += 1
130
+ scanner.advance!
131
+ end
240
132
  end
241
133
 
242
134
  boundaries
@@ -255,15 +147,15 @@ module SqlBeautifier
255
147
  end
256
148
 
257
149
  def sentinel_only?(segment)
258
- segment.gsub(CommentStripper::SENTINEL_PATTERN, "").strip.empty?
150
+ segment.gsub(CommentParser::SENTINEL_PATTERN, "").strip.empty?
259
151
  end
260
152
 
261
153
  def continuation_keyword?(current_keyword, next_keyword)
262
154
  CONTINUATION_PAIRS[current_keyword] == next_keyword
263
155
  end
264
156
 
265
- def keyword_match_at(sql, position, keywords)
266
- keywords.detect { |keyword| Tokenizer.keyword_at?(sql, position, keyword) }
157
+ def keyword_match_at(scanner, keywords)
158
+ keywords.detect { |keyword| scanner.keyword_at?(keyword) }
267
159
  end
268
160
  end
269
161
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlBeautifier
4
+ class TableReference < Base
5
+ option :name
6
+ option :explicit_alias, default: -> {}
7
+ option :assigned_alias, default: -> {}
8
+
9
+ def self.parse(segment_text)
10
+ table_specification = table_specification_text(segment_text)
11
+ table_name = Util.first_word(table_specification)
12
+ return unless table_name
13
+
14
+ new(name: table_name, explicit_alias: extract_explicit_alias(table_specification))
15
+ end
16
+
17
+ def self.table_specification_text(segment_text)
18
+ on_keyword_position = Tokenizer.find_top_level_keyword(segment_text, "on")
19
+ return segment_text.strip unless on_keyword_position
20
+
21
+ segment_text[0...on_keyword_position].strip
22
+ end
23
+
24
+ def self.extract_explicit_alias(table_specification)
25
+ words = table_specification.strip.split(Constants::WHITESPACE_REGEX).grep_v(CommentParser::SENTINEL_PATTERN)
26
+ return nil if words.length < 2
27
+
28
+ if words[1] == "as"
29
+ words[2]
30
+ else
31
+ words[1]
32
+ end
33
+ end
34
+
35
+ def formatted_name
36
+ Util.format_table_name(@name)
37
+ end
38
+
39
+ def assign_alias!(value)
40
+ @assigned_alias = value
41
+ end
42
+
43
+ def alias_name
44
+ @explicit_alias || @assigned_alias
45
+ end
46
+
47
+ def render(trailing_sentinels: nil)
48
+ formatted = alias_name ? "#{formatted_name} #{alias_name}" : formatted_name
49
+ trailing_sentinels&.any? ? "#{formatted} #{trailing_sentinels.join(' ')}" : formatted
50
+ end
51
+ end
52
+ end
@@ -2,44 +2,51 @@
2
2
 
3
3
  module SqlBeautifier
4
4
  class TableRegistry
5
- attr_reader :table_map
5
+ attr_reader :references
6
6
 
7
7
  def initialize(from_content)
8
8
  @from_content = from_content
9
- @table_map = {}
10
9
  @alias_strategy = SqlBeautifier.config_for(:alias_strategy)
10
+ @references = []
11
+ @references_by_name = {}
11
12
  build!
12
13
  end
13
14
 
14
15
  def alias_for(table_name)
15
- @table_map[table_name]
16
+ @references_by_name[table_name]&.alias_name
17
+ end
18
+
19
+ def reference_for(table_name)
20
+ @references_by_name[table_name]
21
+ end
22
+
23
+ def table_map
24
+ @references_by_name.transform_values(&:alias_name)
16
25
  end
17
26
 
18
27
  def apply_aliases(text)
19
- return text if @table_map.empty?
28
+ return text if @references_by_name.empty?
20
29
 
30
+ scanner = Scanner.new(text)
21
31
  output = +""
22
- position = 0
23
-
24
- while position < text.length
25
- character = text[position]
26
32
 
27
- case character
33
+ until scanner.finished?
34
+ case scanner.current_char
28
35
  when Constants::SINGLE_QUOTE
29
- position = copy_string_literal!(text, position, output)
36
+ output << scanner.consume_single_quoted_string!
30
37
  when Constants::DOUBLE_QUOTE
31
- position = copy_quoted_identifier!(text, position, output)
38
+ output << scanner.consume_double_quoted_identifier!
32
39
  else
33
- replacement = find_table_replacement_at(text, position)
40
+ replacement = find_table_replacement_at(text, scanner.position, scanner)
34
41
 
35
42
  if replacement
36
43
  table_name, table_alias = replacement
37
44
 
38
45
  output << "#{table_alias}."
39
- position += table_name.length + 1
46
+ scanner.advance!(table_name.length + 1)
40
47
  else
41
- output << character
42
- position += 1
48
+ output << scanner.current_char
49
+ scanner.advance!
43
50
  end
44
51
  end
45
52
  end
@@ -50,51 +57,29 @@ module SqlBeautifier
50
57
  private
51
58
 
52
59
  def build!
53
- table_entries = extract_table_entries(@from_content)
54
-
55
- if @alias_strategy == :none
56
- table_entries.each do |table_entry|
57
- next unless table_entry[:explicit_alias]
58
-
59
- @table_map[table_entry[:table_name]] = table_entry[:explicit_alias]
60
- end
61
- else
62
- initials_occurrence_counts = count_initials_occurrences(table_entries)
63
- used_aliases = []
64
- assign_aliases!(table_entries, initials_occurrence_counts, used_aliases)
65
- end
66
-
67
- @tables_by_descending_length = @table_map.keys.sort_by { |name| -name.length }.freeze
68
- end
69
-
70
- def count_initials_occurrences(table_entries)
71
- occurrence_counts = Hash.new(0)
60
+ @references = parse_references(@from_content)
61
+ @references.each { |reference| @references_by_name[reference.name] = reference }
72
62
 
73
- table_entries.each do |table_entry|
74
- next if table_entry[:explicit_alias]
75
-
76
- table_name = table_entry[:table_name]
77
- occurrence_counts[table_initials(table_name)] += 1
78
- end
63
+ assign_computed_aliases! unless @alias_strategy == :none
79
64
 
80
- occurrence_counts
65
+ aliased_names = @references_by_name.select { |_name, reference| reference.alias_name }.keys
66
+ @tables_by_descending_length = aliased_names.sort_by { |name| -name.length }.freeze
81
67
  end
82
68
 
83
- def assign_aliases!(table_entries, initials_occurrence_counts, used_aliases)
69
+ def assign_computed_aliases!
70
+ initials_occurrence_counts = count_initials_occurrences
84
71
  duplicate_initials_counts = Hash.new(0)
85
72
  collision_counts = Hash.new(0)
73
+ used_aliases = []
86
74
 
87
- table_entries.each do |table_entry|
88
- table_name = table_entry[:table_name]
89
- explicit_alias = table_entry[:explicit_alias]
90
-
91
- if explicit_alias
92
- @table_map[table_name] = explicit_alias
93
- used_aliases << explicit_alias
75
+ @references.each do |reference|
76
+ if reference.explicit_alias
77
+ @references_by_name[reference.name] = reference
78
+ used_aliases << reference.explicit_alias
94
79
  next
95
80
  end
96
81
 
97
- initials = table_initials(table_name)
82
+ initials = table_initials(reference.name)
98
83
  duplicate_initials_counts[initials] += 1 if initials_occurrence_counts[initials] > 1
99
84
 
100
85
  candidate_alias = begin
@@ -115,60 +100,30 @@ module SqlBeautifier
115
100
  end
116
101
  end
117
102
 
118
- @table_map[table_name] = candidate_alias
103
+ reference.assign_alias!(candidate_alias)
119
104
  used_aliases << candidate_alias
120
105
  end
121
106
  end
122
107
 
123
- def copy_string_literal!(text, position, output)
124
- output << text[position]
125
- position += 1
126
-
127
- while position < text.length
128
- character = text[position]
129
- output << character
130
-
131
- if Tokenizer.escaped_single_quote?(text, position)
132
- position += 1
133
- output << text[position]
134
- elsif character == Constants::SINGLE_QUOTE
135
- return position + 1
136
- end
137
-
138
- position += 1
139
- end
140
-
141
- position
142
- end
143
-
144
- def copy_quoted_identifier!(text, position, output)
145
- output << text[position]
146
- position += 1
147
-
148
- while position < text.length
149
- character = text[position]
150
- output << character
108
+ def count_initials_occurrences
109
+ occurrence_counts = Hash.new(0)
151
110
 
152
- if Tokenizer.escaped_double_quote?(text, position)
153
- position += 1
154
- output << text[position]
155
- elsif character == Constants::DOUBLE_QUOTE
156
- return position + 1
157
- end
111
+ @references.each do |reference|
112
+ next if reference.explicit_alias
158
113
 
159
- position += 1
114
+ occurrence_counts[table_initials(reference.name)] += 1
160
115
  end
161
116
 
162
- position
117
+ occurrence_counts
163
118
  end
164
119
 
165
- def find_table_replacement_at(text, position)
166
- return unless Tokenizer.word_boundary?(Tokenizer.character_before(text, position))
120
+ def find_table_replacement_at(text, position, scanner)
121
+ return unless scanner.word_boundary?(scanner.character_before(position))
167
122
 
168
123
  @tables_by_descending_length.each do |table_name|
169
124
  next unless text[position, table_name.length + 1] == "#{table_name}."
170
125
 
171
- return [table_name, @table_map[table_name]]
126
+ return [table_name, @references_by_name[table_name].alias_name]
172
127
  end
173
128
 
174
129
  nil
@@ -180,50 +135,21 @@ module SqlBeautifier
180
135
  table_name.split("_").map { |segment| segment[0] }.join
181
136
  end
182
137
 
183
- def extract_table_entries(from_content)
138
+ def parse_references(from_content)
184
139
  split_segments = from_content.strip.split(Constants::JOIN_KEYWORD_PATTERN)
185
140
 
186
- table_entries = []
141
+ references = []
187
142
 
188
143
  primary_segment = split_segments.shift.strip
189
- table_entries << extract_table_entry(primary_segment)
144
+ references << TableReference.parse(primary_segment)
190
145
 
191
146
  split_segments.each_slice(2) do |_join_keyword, join_content|
192
147
  next unless join_content
193
148
 
194
- table_entries << extract_table_entry(join_content)
149
+ references << TableReference.parse(join_content)
195
150
  end
196
151
 
197
- table_entries.compact
198
- end
199
-
200
- def extract_table_entry(segment_text)
201
- table_specification = table_specification_text(segment_text)
202
- table_name = Util.first_word(table_specification)
203
- return unless table_name
204
-
205
- {
206
- table_name: table_name,
207
- explicit_alias: extract_explicit_alias(table_specification),
208
- }
209
- end
210
-
211
- def table_specification_text(segment_text)
212
- on_keyword_position = Tokenizer.find_top_level_keyword(segment_text, "on")
213
- return segment_text.strip unless on_keyword_position
214
-
215
- segment_text[0...on_keyword_position].strip
216
- end
217
-
218
- def extract_explicit_alias(table_specification)
219
- words = table_specification.strip.split(Constants::WHITESPACE_REGEX).grep_v(CommentStripper::SENTINEL_PATTERN)
220
- return nil if words.length < 2
221
-
222
- if words[1] == "as"
223
- words[2]
224
- else
225
- words[1]
226
- end
152
+ references.compact
227
153
  end
228
154
  end
229
155
  end