sql_beautifier 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +2 -2
  4. data/lib/sql_beautifier/base.rb +9 -0
  5. data/lib/sql_beautifier/clauses/base.rb +2 -2
  6. data/lib/sql_beautifier/clauses/condition_clause.rb +1 -1
  7. data/lib/sql_beautifier/clauses/from.rb +15 -69
  8. data/lib/sql_beautifier/clauses/order_by.rb +12 -1
  9. data/lib/sql_beautifier/clauses/select.rb +28 -15
  10. data/lib/sql_beautifier/comment.rb +23 -0
  11. data/lib/sql_beautifier/{comment_stripper.rb → comment_parser.rb} +67 -24
  12. data/lib/sql_beautifier/condition.rb +162 -0
  13. data/lib/sql_beautifier/configuration.rb +4 -15
  14. data/lib/sql_beautifier/create_table_as.rb +127 -0
  15. data/lib/sql_beautifier/cte_definition.rb +41 -0
  16. data/lib/sql_beautifier/cte_query.rb +129 -0
  17. data/lib/sql_beautifier/expression.rb +54 -0
  18. data/lib/sql_beautifier/formatter.rb +13 -80
  19. data/lib/sql_beautifier/join.rb +69 -0
  20. data/lib/sql_beautifier/normalizer.rb +33 -59
  21. data/lib/sql_beautifier/query.rb +185 -0
  22. data/lib/sql_beautifier/scanner.rb +420 -0
  23. data/lib/sql_beautifier/sort_expression.rb +39 -0
  24. data/lib/sql_beautifier/statement_assembler.rb +4 -4
  25. data/lib/sql_beautifier/statement_splitter.rb +35 -143
  26. data/lib/sql_beautifier/table_reference.rb +52 -0
  27. data/lib/sql_beautifier/table_registry.rb +50 -124
  28. data/lib/sql_beautifier/tokenizer.rb +47 -278
  29. data/lib/sql_beautifier/types.rb +9 -0
  30. data/lib/sql_beautifier/version.rb +1 -1
  31. data/lib/sql_beautifier.rb +14 -6
  32. metadata +43 -7
  33. data/lib/sql_beautifier/comment_restorer.rb +0 -62
  34. data/lib/sql_beautifier/condition_formatter.rb +0 -127
  35. data/lib/sql_beautifier/create_table_as_formatter.rb +0 -177
  36. data/lib/sql_beautifier/cte_formatter.rb +0 -192
  37. data/lib/sql_beautifier/subquery_formatter.rb +0 -113
@@ -1,127 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SqlBeautifier
4
- module ConditionFormatter
5
- module_function
6
-
7
- def format(text, args = {})
8
- indent_width = args.fetch(:indent_width, 0)
9
- conditions = Tokenizer.split_top_level_conditions(text)
10
- return text.strip if conditions.length <= 1 && !parse_condition_group(conditions.dig(0, 1))
11
-
12
- conditions = flatten_same_conjunction_groups(conditions)
13
- indentation = Util.whitespace(indent_width)
14
- lines = []
15
-
16
- conditions.each_with_index do |(conjunction, condition_text), index|
17
- unwrapped_condition = unwrap_single_condition(condition_text)
18
- formatted_condition_text = format_single_condition(unwrapped_condition, indent_width: indent_width)
19
-
20
- line = begin
21
- if index.zero?
22
- "#{indentation}#{formatted_condition_text}"
23
- else
24
- "#{indentation}#{conjunction} #{formatted_condition_text}"
25
- end
26
- end
27
-
28
- lines << line
29
- end
30
-
31
- lines.join("\n")
32
- end
33
-
34
- def flatten_same_conjunction_groups(conditions)
35
- return conditions if conditions.length <= 1
36
-
37
- outer_conjunction = conditions[1]&.first
38
- return conditions unless outer_conjunction
39
- return conditions unless conditions.drop(1).all? { |pair| pair[0] == outer_conjunction }
40
-
41
- flattened_conditions = []
42
-
43
- conditions.each do |conjunction, condition_text|
44
- inner_conditions = parse_condition_group(condition_text)
45
-
46
- if inner_conditions && flattenable_into_conjunction?(inner_conditions, outer_conjunction)
47
- flatten_inner_conditions_into!(flattened_conditions, inner_conditions, conjunction, outer_conjunction)
48
- else
49
- flattened_conditions << [conjunction, condition_text]
50
- end
51
- end
52
-
53
- flattened_conditions
54
- end
55
-
56
- def rebuild_inline(inner_conditions)
57
- parts = inner_conditions.map.with_index do |(conjunction, condition_text), index|
58
- index.zero? ? condition_text : "#{conjunction} #{condition_text}"
59
- end
60
-
61
- "(#{parts.join(' ')})"
62
- end
63
-
64
- def unwrap_single_condition(condition)
65
- output = condition.strip
66
-
67
- while Tokenizer.outer_parentheses_wrap_all?(output)
68
- inner_content = Util.strip_outer_parentheses(output)
69
- inner_conditions = Tokenizer.split_top_level_conditions(inner_content)
70
- break if inner_conditions.length > 1
71
-
72
- output = inner_content
73
- end
74
-
75
- output
76
- end
77
-
78
- def parse_condition_group(condition_text)
79
- return unless condition_text
80
-
81
- trimmed_condition = condition_text.strip
82
- return unless Tokenizer.outer_parentheses_wrap_all?(trimmed_condition)
83
-
84
- inner_content = Util.strip_outer_parentheses(trimmed_condition)
85
- inner_conditions = Tokenizer.split_top_level_conditions(inner_content)
86
- return unless inner_conditions.length > 1
87
-
88
- inner_conditions
89
- end
90
-
91
- def format_single_condition(condition_text, indent_width:)
92
- inner_conditions = parse_condition_group(condition_text)
93
- return condition_text unless inner_conditions
94
-
95
- inline_version = rebuild_inline(inner_conditions)
96
- return inline_version if inline_version.length <= SqlBeautifier.config_for(:inline_group_threshold)
97
-
98
- inner_content = Util.strip_outer_parentheses(condition_text.strip)
99
- formatted_inner_content = format(inner_content, indent_width: indent_width + 4)
100
- indentation = Util.whitespace(indent_width)
101
-
102
- "(\n#{formatted_inner_content}\n#{indentation})"
103
- end
104
-
105
- def flattenable_into_conjunction?(inner_conditions, outer_conjunction)
106
- inner_conjunction = inner_conditions[1]&.first
107
-
108
- inner_conjunction == outer_conjunction && inner_conditions.drop(1).all? { |pair| pair[0] == outer_conjunction }
109
- end
110
-
111
- def flatten_inner_conditions_into!(flattened_conditions, inner_conditions, conjunction, outer_conjunction)
112
- inner_conditions.each_with_index do |inner_pair, inner_index|
113
- condition_pair = begin
114
- if flattened_conditions.empty?
115
- [nil, inner_pair[1]]
116
- elsif inner_index.zero?
117
- [conjunction || outer_conjunction, inner_pair[1]]
118
- else
119
- [outer_conjunction, inner_pair[1]]
120
- end
121
- end
122
-
123
- flattened_conditions << condition_pair
124
- end
125
- end
126
- end
127
- end
@@ -1,177 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SqlBeautifier
4
- module CreateTableAsFormatter
5
- MODIFIERS = %w[
6
- temp
7
- temporary
8
- unlogged
9
- local
10
- ].freeze
11
-
12
- WITH_DATA_SUFFIX_REGEX = %r{\s+(with\s+(?:no\s+)?data)\s*\z}i
13
-
14
- module_function
15
-
16
- def format(normalized_sql, _args = {})
17
- return nil unless create_table_as_query?(normalized_sql)
18
-
19
- parsed = parse(normalized_sql)
20
- return nil unless parsed
21
-
22
- format_statement(parsed[:preamble], parsed[:body], parsed[:suffix])
23
- end
24
-
25
- def create_table_as_query?(sql)
26
- Tokenizer.keyword_at?(sql, 0, "create")
27
- end
28
-
29
- def parse(sql)
30
- position = 0
31
- return nil unless Tokenizer.keyword_at?(sql, position, "create")
32
-
33
- position = skip_past_keyword(sql, position, "create")
34
-
35
- modifier = detect_modifier(sql, position)
36
- position = skip_past_keyword(sql, position, modifier) if modifier
37
-
38
- return nil unless Tokenizer.keyword_at?(sql, position, "table")
39
-
40
- position = skip_past_keyword(sql, position, "table")
41
-
42
- if_not_exists = detect_if_not_exists?(sql, position)
43
- position = skip_past_if_not_exists(sql, position) if if_not_exists
44
-
45
- table_name, position = read_identifier(sql, position)
46
- return nil unless table_name
47
-
48
- position = skip_whitespace(sql, position)
49
- return nil unless Tokenizer.keyword_at?(sql, position, "as")
50
-
51
- position = skip_past_keyword(sql, position, "as")
52
-
53
- result = extract_body(sql, position)
54
- return nil unless result
55
-
56
- body_sql, suffix = result
57
- return nil unless body_sql
58
-
59
- preamble = build_preamble(modifier, if_not_exists, table_name)
60
- { preamble: preamble, body: body_sql, suffix: suffix }
61
- end
62
-
63
- def detect_modifier(sql, position)
64
- MODIFIERS.detect { |modifier| Tokenizer.keyword_at?(sql, position, modifier) }
65
- end
66
-
67
- def detect_if_not_exists?(sql, position)
68
- Tokenizer.keyword_at?(sql, position, "if") && Tokenizer.keyword_at?(sql, skip_past_keyword(sql, position, "if"), "not") && Tokenizer.keyword_at?(sql, skip_past_keyword(sql, skip_past_keyword(sql, position, "if"), "not"), "exists")
69
- end
70
-
71
- def skip_past_if_not_exists(sql, position)
72
- position = skip_past_keyword(sql, position, "if")
73
- position = skip_past_keyword(sql, position, "not")
74
- skip_past_keyword(sql, position, "exists")
75
- end
76
-
77
- def extract_body(sql, position)
78
- position = skip_whitespace(sql, position)
79
- return nil if position >= sql.length
80
-
81
- if sql[position] == Constants::OPEN_PARENTHESIS
82
- closing = Tokenizer.find_matching_parenthesis(sql, position)
83
- return nil unless closing
84
-
85
- body = sql[(position + 1)...closing].strip
86
- suffix = sql[(closing + 1)..].strip.presence
87
- [body, suffix]
88
- else
89
- extract_unparenthesized_body(sql[position..].strip)
90
- end
91
- end
92
-
93
- def extract_unparenthesized_body(raw_body)
94
- return nil unless raw_body.present?
95
-
96
- match = raw_body.match(WITH_DATA_SUFFIX_REGEX)
97
-
98
- if match
99
- body = raw_body[0...match.begin(0)].strip
100
- return nil unless body.present?
101
-
102
- [body, match[1]]
103
- else
104
- [raw_body, nil]
105
- end
106
- end
107
-
108
- def build_preamble(modifier, if_not_exists, table_name)
109
- parts = [Util.format_keyword("create")]
110
- parts << Util.format_keyword(modifier) if modifier
111
- parts << Util.format_keyword("table")
112
- parts << "#{Util.format_keyword('if')} #{Util.format_keyword('not')} #{Util.format_keyword('exists')}" if if_not_exists
113
- parts << Util.format_table_name(table_name)
114
- parts << Util.format_keyword("as")
115
- parts.join(" ")
116
- end
117
-
118
- def format_statement(preamble, body_sql, suffix)
119
- indent_spaces = SqlBeautifier.config_for(:indent_spaces) || 4
120
- formatted = Formatter.new(body_sql, depth: 0).call
121
- return "#{preamble}\n" unless formatted
122
-
123
- indentation = Util.whitespace(indent_spaces)
124
- indented_lines = formatted.chomp.lines.map { |line| line.strip.empty? ? "\n" : "#{indentation}#{line}" }.join
125
-
126
- formatted_suffix = suffix ? " #{format_suffix(suffix)}" : ""
127
- "#{preamble} (\n#{indented_lines}\n)#{formatted_suffix}\n"
128
- end
129
-
130
- def format_suffix(suffix)
131
- suffix.strip.split(%r{\s+}).map { |word| Util.format_keyword(word) }.join(" ")
132
- end
133
-
134
- def read_identifier(sql, position)
135
- position = skip_whitespace(sql, position)
136
- return nil if position >= sql.length
137
-
138
- if sql[position] == Constants::DOUBLE_QUOTE
139
- start = position
140
- position += 1
141
-
142
- while position < sql.length
143
- if sql[position] == Constants::DOUBLE_QUOTE
144
- if position + 1 < sql.length && sql[position + 1] == Constants::DOUBLE_QUOTE
145
- position += 2
146
- next
147
- end
148
-
149
- position += 1
150
- break
151
- end
152
-
153
- position += 1
154
- end
155
-
156
- return nil unless position <= sql.length && sql[position - 1] == Constants::DOUBLE_QUOTE
157
-
158
- return [sql[start...position], position]
159
- end
160
-
161
- start = position
162
- position += 1 while position < sql.length && sql[position] =~ Tokenizer::IDENTIFIER_CHARACTER
163
- return nil if position == start
164
-
165
- [sql[start...position], position]
166
- end
167
-
168
- def skip_whitespace(sql, position)
169
- position += 1 while position < sql.length && sql[position] =~ Constants::WHITESPACE_CHARACTER_REGEX
170
- position
171
- end
172
-
173
- def skip_past_keyword(sql, position, keyword)
174
- skip_whitespace(sql, position + keyword.length)
175
- end
176
- end
177
- end
@@ -1,192 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SqlBeautifier
4
- module CteFormatter
5
- module_function
6
-
7
- def format(normalized_sql, args = {})
8
- depth = args.fetch(:depth, 0)
9
- return nil unless cte_query?(normalized_sql)
10
-
11
- recursive, definitions, main_query_sql = parse(normalized_sql)
12
- return nil unless definitions.any? && main_query_sql.present?
13
-
14
- format_cte_statement(recursive, definitions, main_query_sql, depth)
15
- end
16
-
17
- def cte_query?(sql)
18
- Tokenizer.keyword_at?(sql, 0, "with")
19
- end
20
-
21
- def parse(sql)
22
- position = skip_past_keyword(sql, 0, "with")
23
-
24
- recursive = Tokenizer.keyword_at?(sql, position, "recursive")
25
- position = skip_past_keyword(sql, position, "recursive") if recursive
26
-
27
- definitions = []
28
-
29
- loop do
30
- definition, new_position = parse_definition(sql, position)
31
- break unless definition
32
-
33
- definitions << definition
34
- position = skip_whitespace(sql, new_position)
35
-
36
- break unless position < sql.length && sql[position] == Constants::COMMA
37
-
38
- position = skip_whitespace(sql, position + 1)
39
- end
40
-
41
- main_query_sql = sql[position..].strip
42
-
43
- [recursive, definitions, main_query_sql]
44
- end
45
-
46
- def parse_definition(sql, position)
47
- name, position = read_identifier(sql, position)
48
- return nil unless name
49
-
50
- position = skip_whitespace(sql, position)
51
-
52
- column_list = parse_column_list(sql, position)
53
- position = column_list[:next_position] if column_list
54
-
55
- return nil unless Tokenizer.keyword_at?(sql, position, "as")
56
-
57
- position = skip_past_keyword(sql, position, "as")
58
- materialization, position = parse_materialization(sql, position)
59
-
60
- return nil unless position < sql.length && sql[position] == Constants::OPEN_PARENTHESIS
61
-
62
- closing = Tokenizer.find_matching_parenthesis(sql, position)
63
- return nil unless closing
64
-
65
- body_sql = sql[(position + 1)...closing].strip
66
- definition = { name: name, body: body_sql }
67
- definition[:column_list] = column_list[:text] if column_list
68
- definition[:materialization] = materialization if materialization
69
-
70
- [definition, closing + 1]
71
- end
72
-
73
- def parse_column_list(sql, position)
74
- return nil unless position < sql.length && sql[position] == Constants::OPEN_PARENTHESIS
75
-
76
- closing = Tokenizer.find_matching_parenthesis(sql, position)
77
- return nil unless closing
78
-
79
- after_paren = skip_whitespace(sql, closing + 1)
80
- return nil unless Tokenizer.keyword_at?(sql, after_paren, "as")
81
-
82
- { text: sql[(position + 1)...closing].strip, next_position: after_paren }
83
- end
84
-
85
- def format_cte_statement(recursive, definitions, main_query_sql, depth)
86
- keyword_width = SqlBeautifier.config_for(:keyword_column_width)
87
- cte_name_column = keyword_width
88
- continuation_indent = Util.continuation_padding
89
-
90
- output = +""
91
-
92
- definitions.each_with_index do |definition, index|
93
- if index.zero?
94
- output << Util.keyword_padding("with")
95
- output << "#{Util.format_keyword('recursive')} " if recursive
96
- else
97
- output << continuation_indent
98
- end
99
-
100
- output << definition_header(definition)
101
- output << format_body(definition[:body], cte_name_column)
102
- output << (index < definitions.length - 1 ? ",\n" : "\n\n")
103
- end
104
-
105
- formatted_main = Formatter.new(main_query_sql, depth: depth).call
106
- output << formatted_main if formatted_main
107
-
108
- output
109
- end
110
-
111
- def definition_header(definition)
112
- header = +definition[:name].to_s
113
- header << " (#{definition[:column_list]})" if definition[:column_list]
114
- header << " #{Util.format_keyword('as')}"
115
- header << " #{format_materialization(definition[:materialization])}" if definition[:materialization]
116
- header << " "
117
- header
118
- end
119
-
120
- def parse_materialization(sql, position)
121
- position = skip_whitespace(sql, position)
122
- return ["materialized", skip_past_keyword(sql, position, "materialized")] if Tokenizer.keyword_at?(sql, position, "materialized")
123
- return [nil, position] unless Tokenizer.keyword_at?(sql, position, "not")
124
-
125
- materialized_position = skip_past_keyword(sql, position, "not")
126
- return [nil, position] unless Tokenizer.keyword_at?(sql, materialized_position, "materialized")
127
-
128
- ["not materialized", skip_past_keyword(sql, materialized_position, "materialized")]
129
- end
130
-
131
- def format_materialization(materialization)
132
- return Util.format_keyword("materialized") if materialization == "materialized"
133
-
134
- [Util.format_keyword("not"), Util.format_keyword("materialized")].join(" ")
135
- end
136
-
137
- def format_body(body_sql, base_indent)
138
- indent_spaces = SqlBeautifier.config_for(:indent_spaces) || 4
139
- body_indent = base_indent + indent_spaces
140
- formatted = Formatter.new(body_sql, depth: 0).call
141
- return "(#{body_sql})" unless formatted
142
-
143
- indentation = Util.whitespace(body_indent)
144
- indented_lines = formatted.chomp.lines.map { |line| line.strip.empty? ? "\n" : "#{indentation}#{line}" }.join
145
-
146
- "(\n#{indented_lines}\n#{Util.whitespace(base_indent)})"
147
- end
148
-
149
- def read_identifier(sql, position)
150
- position = skip_whitespace(sql, position)
151
- return nil if position >= sql.length
152
-
153
- if sql[position] == Constants::DOUBLE_QUOTE
154
- start = position
155
- position += 1
156
-
157
- while position < sql.length
158
- if sql[position] == Constants::DOUBLE_QUOTE
159
- if position + 1 < sql.length && sql[position + 1] == Constants::DOUBLE_QUOTE
160
- position += 2
161
- next
162
- end
163
-
164
- position += 1
165
- break
166
- end
167
-
168
- position += 1
169
- end
170
-
171
- return nil unless position <= sql.length && sql[position - 1] == Constants::DOUBLE_QUOTE
172
-
173
- return [sql[start...position], position]
174
- end
175
-
176
- start = position
177
- position += 1 while position < sql.length && sql[position] =~ Tokenizer::IDENTIFIER_CHARACTER
178
- return nil if position == start
179
-
180
- [sql[start...position], position]
181
- end
182
-
183
- def skip_whitespace(sql, position)
184
- position += 1 while position < sql.length && sql[position] =~ Constants::WHITESPACE_CHARACTER_REGEX
185
- position
186
- end
187
-
188
- def skip_past_keyword(sql, position, keyword)
189
- skip_whitespace(sql, position + keyword.length)
190
- end
191
- end
192
- end
@@ -1,113 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SqlBeautifier
4
- module SubqueryFormatter
5
- module_function
6
-
7
- def format(text, base_indent)
8
- output = +""
9
- position = 0
10
-
11
- while position < text.length
12
- subquery_position = find_top_level_subquery(text, position)
13
-
14
- unless subquery_position
15
- output << text[position..]
16
- break
17
- end
18
-
19
- output << text[position...subquery_position]
20
-
21
- closing_position = Tokenizer.find_matching_parenthesis(text, subquery_position)
22
-
23
- unless closing_position
24
- output << text[subquery_position..]
25
- break
26
- end
27
-
28
- inner_sql = text[(subquery_position + 1)...closing_position].strip
29
- subquery_base_indent = subquery_base_indent_for(text, subquery_position, base_indent)
30
- output << format_subquery(inner_sql, subquery_base_indent)
31
- position = closing_position + 1
32
- end
33
-
34
- output
35
- end
36
-
37
- def find_top_level_subquery(text, start_position)
38
- position = start_position
39
- in_single_quoted_string = false
40
- in_double_quoted_identifier = false
41
- while position < text.length
42
- character = text[position]
43
-
44
- if in_single_quoted_string
45
- if character == Constants::SINGLE_QUOTE && text[position + 1] == Constants::SINGLE_QUOTE
46
- position += 2
47
- elsif character == Constants::SINGLE_QUOTE
48
- in_single_quoted_string = false
49
- position += 1
50
- else
51
- position += 1
52
- end
53
- next
54
- end
55
-
56
- if in_double_quoted_identifier
57
- if character == Constants::DOUBLE_QUOTE && text[position + 1] == Constants::DOUBLE_QUOTE
58
- position += 2
59
- elsif character == Constants::DOUBLE_QUOTE
60
- in_double_quoted_identifier = false
61
- position += 1
62
- else
63
- position += 1
64
- end
65
- next
66
- end
67
-
68
- case character
69
- when Constants::SINGLE_QUOTE
70
- in_single_quoted_string = true
71
- when Constants::DOUBLE_QUOTE
72
- in_double_quoted_identifier = true
73
- when Constants::OPEN_PARENTHESIS
74
- return position if select_follows?(text, position)
75
- end
76
-
77
- position += 1
78
- end
79
-
80
- nil
81
- end
82
-
83
- def format_subquery(inner_sql, base_indent)
84
- indent_spaces = SqlBeautifier.config_for(:indent_spaces) || 4
85
- subquery_indent = base_indent + indent_spaces
86
- formatted = Formatter.new(inner_sql, depth: subquery_indent).call
87
- return "(#{inner_sql})" unless formatted
88
-
89
- indentation = Util.whitespace(subquery_indent)
90
- indented_lines = formatted.chomp.lines.map { |line| line.strip.empty? ? "\n" : "#{indentation}#{line}" }.join
91
-
92
- "(\n#{indented_lines}\n#{Util.whitespace(base_indent)})"
93
- end
94
-
95
- def subquery_base_indent_for(text, subquery_position, default_base_indent)
96
- line_start_position = text.rindex("\n", subquery_position - 1)
97
- line_start_position = line_start_position ? line_start_position + 1 : 0
98
- line_before_subquery = text[line_start_position...subquery_position]
99
- line_leading_spaces = line_before_subquery[%r{\A[[:space:]]*}].to_s.length
100
-
101
- return default_base_indent unless line_before_subquery.lstrip.match?(%r{\Awhere(?:[[:space:]]|$)}i)
102
-
103
- default_base_indent + line_leading_spaces + SqlBeautifier.config_for(:keyword_column_width)
104
- end
105
-
106
- def select_follows?(text, position)
107
- remaining_text = text[(position + 1)..]
108
- return false unless remaining_text
109
-
110
- remaining_text.match?(%r{\A[[:space:]]*select(?:[[:space:]]|\()}i)
111
- end
112
- end
113
- end