sql_beautifier 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/README.md +156 -37
- data/lib/sql_beautifier/clauses/from.rb +8 -2
- data/lib/sql_beautifier/comment_restorer.rb +62 -0
- data/lib/sql_beautifier/comment_stripper.rb +222 -0
- data/lib/sql_beautifier/configuration.rb +7 -1
- data/lib/sql_beautifier/formatter.rb +29 -5
- data/lib/sql_beautifier/normalizer.rb +26 -63
- data/lib/sql_beautifier/statement_assembler.rb +31 -0
- data/lib/sql_beautifier/statement_splitter.rb +269 -0
- data/lib/sql_beautifier/table_registry.rb +1 -1
- data/lib/sql_beautifier/tokenizer.rb +62 -0
- data/lib/sql_beautifier/version.rb +1 -1
- data/lib/sql_beautifier.rb +29 -2
- metadata +5 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ae905b6c37cef236a53ae510743055f7c5fb5c184a1513152920aeaa23b6f79b
|
|
4
|
+
data.tar.gz: ed5e4c3e0207d1cdf9f600852661fca974bd4f435d1e6c32fd3d471ab46e6288
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ef28e0c0853fb740d677be87061c284aeae9c8e5f0ed62f4d34417577d58c5ac87bcc0072c88a6f8bcfdac52a4cae3ff335cf8f1de07a9d1c7b3ba67533ed820
|
|
7
|
+
data.tar.gz: 3f8f35358960a0c0015026251799621ee0a775aaa066a4dc53de36c4a973920f0f76052cfced812a861e4b372e88ca22430c432f77c1537642d6303c0468501a
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
## [X.X.X] - YYYY-MM-DD
|
|
4
4
|
|
|
5
|
+
## [0.6.0] - 2026-03-28
|
|
6
|
+
|
|
7
|
+
- **Breaking**: comments are now preserved by default. Set `removable_comment_types = :all` to restore previous behavior of stripping all comments
|
|
8
|
+
- Add `removable_comment_types` configuration option (default: `:none`) — controls which SQL comment types are stripped during formatting. Accepts `:none`, `:all`, or an array of specific types (`:inline`, `:separate_line`, `:blocks`)
|
|
9
|
+
- Add multi-statement support — input containing multiple statements (separated by `;` or concatenated) is split and formatted independently
|
|
10
|
+
- Add `trailing_semicolon` configuration option (default: `true`) — automatically appends `;` to each formatted statement
|
|
11
|
+
- Add per-call configuration overrides via `SqlBeautifier.call(value, trailing_semicolon: false)` — overrides take precedence over global config for the duration of the call
|
|
12
|
+
- Change `inline_group_threshold` default from `100` to `0` — parenthesized condition groups are now always expanded to multiple lines
|
|
13
|
+
- Fix `StatementSplitter` incorrectly splitting `INSERT INTO ... SELECT` as two separate statements
|
|
14
|
+
- Fix inline comments after a trailing semicolon (e.g. `SELECT 1; -- done`) being silently dropped during formatting
|
|
15
|
+
- Fix infinite loop in `Normalizer#consume_sentinel!` when a malformed sentinel prefix has no closing `*/`
|
|
16
|
+
- Fix `CommentStripper#resolve_removal_set` returning `nil` for unrecognized `removable_types` values — now raises `ArgumentError` with a descriptive message
|
|
17
|
+
- Fix `CommentStripper#resolve_removal_set` silently accepting invalid entries in Array-typed `removable_types` (e.g. `[:inlne]`) — now validates each element against known comment types
|
|
18
|
+
- Fix `CommentStripper` not inserting token-separating whitespace around sentinels when preserving block comments between adjacent tokens (e.g. `SELECT/*comment*/id`)
|
|
19
|
+
- Strengthen end-to-end specs with exact full-output assertions and add coverage for JOINs, subqueries, CTEs, CREATE TABLE AS, DISTINCT, complex WHERE conditions, and configuration variations
|
|
20
|
+
|
|
5
21
|
## [0.5.0] - 2026-03-28
|
|
6
22
|
|
|
7
23
|
- Add support for Create Table As (CTA) formatting
|
data/README.md
CHANGED
|
@@ -45,7 +45,7 @@ from Users u
|
|
|
45
45
|
|
|
46
46
|
where active = true
|
|
47
47
|
|
|
48
|
-
order by name
|
|
48
|
+
order by name;
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
Single-word keywords are lowercased and padded so their clause bodies start at an 8-character column. Multi-word clauses such as `order by` and `group by`, and short clauses like `limit`, use a single space between the keyword and the clause body instead of padding. Clause spacing is compact by default for simple one-column / one-table / one-condition queries, and otherwise uses blank lines between top-level clauses. Multi-column SELECT lists place each column on its own line with continuation indentation. Table names are PascalCased and automatically aliased.
|
|
@@ -74,7 +74,7 @@ select u.id,
|
|
|
74
74
|
|
|
75
75
|
from Users u
|
|
76
76
|
|
|
77
|
-
where u.active = true
|
|
77
|
+
where u.active = true;
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
When two tables produce the same initials, a counter is appended for disambiguation (e.g. `u1`, `u2`).
|
|
@@ -108,7 +108,7 @@ from Users u
|
|
|
108
108
|
where u.active = true
|
|
109
109
|
and o.total > 100
|
|
110
110
|
|
|
111
|
-
order by o.total desc
|
|
111
|
+
order by o.total desc;
|
|
112
112
|
```
|
|
113
113
|
|
|
114
114
|
Supported join types: `inner join`, `left join`, `right join`, `full join`, `left outer join`, `right outer join`, `full outer join`, `cross join`.
|
|
@@ -129,7 +129,7 @@ select distinct
|
|
|
129
129
|
name,
|
|
130
130
|
email
|
|
131
131
|
|
|
132
|
-
from Users u
|
|
132
|
+
from Users u;
|
|
133
133
|
```
|
|
134
134
|
|
|
135
135
|
`DISTINCT ON` preserves the full expression:
|
|
@@ -145,7 +145,7 @@ select distinct on (user_id)
|
|
|
145
145
|
id,
|
|
146
146
|
name
|
|
147
147
|
|
|
148
|
-
from Events e
|
|
148
|
+
from Events e;
|
|
149
149
|
```
|
|
150
150
|
|
|
151
151
|
### WHERE and HAVING Conditions
|
|
@@ -165,10 +165,10 @@ from Users u
|
|
|
165
165
|
|
|
166
166
|
where active = true
|
|
167
167
|
and role = 'admin'
|
|
168
|
-
and created_at > '2024-01-01'
|
|
168
|
+
and created_at > '2024-01-01';
|
|
169
169
|
```
|
|
170
170
|
|
|
171
|
-
|
|
171
|
+
Parenthesized condition groups are expanded to multiple lines with indentation:
|
|
172
172
|
|
|
173
173
|
```ruby
|
|
174
174
|
SqlBeautifier.call("SELECT * FROM users WHERE active = true AND (role = 'admin' OR role = 'moderator')")
|
|
@@ -182,7 +182,10 @@ select *
|
|
|
182
182
|
from Users u
|
|
183
183
|
|
|
184
184
|
where active = true
|
|
185
|
-
and (
|
|
185
|
+
and (
|
|
186
|
+
role = 'admin'
|
|
187
|
+
or role = 'moderator'
|
|
188
|
+
);
|
|
186
189
|
```
|
|
187
190
|
|
|
188
191
|
### GROUP BY and HAVING
|
|
@@ -206,7 +209,7 @@ from Users u
|
|
|
206
209
|
|
|
207
210
|
group by status
|
|
208
211
|
|
|
209
|
-
having count(*) > 5
|
|
212
|
+
having count(*) > 5;
|
|
210
213
|
```
|
|
211
214
|
|
|
212
215
|
### LIMIT
|
|
@@ -221,7 +224,7 @@ Produces:
|
|
|
221
224
|
select id
|
|
222
225
|
from Users u
|
|
223
226
|
order by created_at desc
|
|
224
|
-
limit 25
|
|
227
|
+
limit 25;
|
|
225
228
|
```
|
|
226
229
|
|
|
227
230
|
### String Literals
|
|
@@ -240,7 +243,7 @@ select *
|
|
|
240
243
|
from Users u
|
|
241
244
|
|
|
242
245
|
where name = 'O''Brien'
|
|
243
|
-
and status = 'Active'
|
|
246
|
+
and status = 'Active';
|
|
244
247
|
```
|
|
245
248
|
|
|
246
249
|
### Double-Quoted Identifiers
|
|
@@ -257,7 +260,7 @@ Produces:
|
|
|
257
260
|
select user_id,
|
|
258
261
|
full_name
|
|
259
262
|
|
|
260
|
-
from Users u
|
|
263
|
+
from Users u;
|
|
261
264
|
```
|
|
262
265
|
|
|
263
266
|
### Subqueries
|
|
@@ -277,17 +280,17 @@ where id in (
|
|
|
277
280
|
select user_id
|
|
278
281
|
from Orders o
|
|
279
282
|
where total > 100
|
|
280
|
-
)
|
|
283
|
+
);
|
|
281
284
|
```
|
|
282
285
|
|
|
283
286
|
Nested subqueries increase indentation at each level.
|
|
284
287
|
|
|
285
|
-
###
|
|
288
|
+
### Trailing Semicolons
|
|
286
289
|
|
|
287
|
-
|
|
290
|
+
By default, each formatted statement ends with a `;`:
|
|
288
291
|
|
|
289
292
|
```ruby
|
|
290
|
-
SqlBeautifier.call("SELECT id
|
|
293
|
+
SqlBeautifier.call("SELECT id FROM users WHERE active = true")
|
|
291
294
|
```
|
|
292
295
|
|
|
293
296
|
Produces:
|
|
@@ -295,45 +298,141 @@ Produces:
|
|
|
295
298
|
```sql
|
|
296
299
|
select id
|
|
297
300
|
from Users u
|
|
298
|
-
where active = true
|
|
301
|
+
where active = true;
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
Disable with `config.trailing_semicolon = false` to omit the trailing `;`.
|
|
305
|
+
|
|
306
|
+
### Multiple Statements
|
|
307
|
+
|
|
308
|
+
Input containing multiple SQL statements is split and formatted independently. Statements can be separated by `;` or simply concatenated:
|
|
309
|
+
|
|
310
|
+
```ruby
|
|
311
|
+
SqlBeautifier.call("SELECT id FROM constituents; SELECT id FROM departments")
|
|
299
312
|
```
|
|
300
313
|
|
|
314
|
+
Produces:
|
|
315
|
+
|
|
316
|
+
```sql
|
|
317
|
+
select id
|
|
318
|
+
from Constituents c;
|
|
319
|
+
|
|
320
|
+
select id
|
|
321
|
+
from Departments d;
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
Concatenated statements without `;` are also detected:
|
|
325
|
+
|
|
326
|
+
```ruby
|
|
327
|
+
SqlBeautifier.call("SELECT id FROM constituents SELECT id FROM departments")
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
Produces the same output. Subqueries and CTE bodies are not mistakenly split.
|
|
331
|
+
|
|
332
|
+
### Comments
|
|
333
|
+
|
|
334
|
+
By default, SQL comments are preserved in formatted output. Line comments (`--`) and block comments (`/* */`) are classified by position and passed through formatting:
|
|
335
|
+
|
|
336
|
+
```ruby
|
|
337
|
+
SqlBeautifier.call("-- Base Query\nSELECT id /* primary key */ FROM users WHERE active = true")
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Produces:
|
|
341
|
+
|
|
342
|
+
```sql
|
|
343
|
+
-- Base Query
|
|
344
|
+
select id /* primary key */
|
|
345
|
+
from Users u
|
|
346
|
+
where active = true;
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
Configure `removable_comment_types` to control which comment types are stripped. See the `removable_comment_types` configuration option for details. Comments inside string literals are always preserved regardless of configuration.
|
|
350
|
+
|
|
301
351
|
### Configuration
|
|
302
352
|
|
|
303
353
|
Customize formatting behavior with `SqlBeautifier.configure`:
|
|
304
354
|
|
|
305
355
|
```ruby
|
|
306
356
|
SqlBeautifier.configure do |config|
|
|
307
|
-
config.keyword_case = :upper
|
|
308
|
-
config.keyword_column_width = 10
|
|
309
|
-
config.indent_spaces = 4
|
|
310
|
-
config.clause_spacing_mode = :spacious
|
|
311
|
-
config.table_name_format = :lowercase
|
|
312
|
-
config.inline_group_threshold = 80
|
|
313
|
-
config.alias_strategy = :none
|
|
357
|
+
config.keyword_case = :upper
|
|
358
|
+
config.keyword_column_width = 10
|
|
359
|
+
config.indent_spaces = 4
|
|
360
|
+
config.clause_spacing_mode = :spacious
|
|
361
|
+
config.table_name_format = :lowercase
|
|
362
|
+
config.inline_group_threshold = 80
|
|
363
|
+
config.alias_strategy = :none
|
|
364
|
+
config.trailing_semicolon = false
|
|
365
|
+
config.removable_comment_types = :all
|
|
314
366
|
end
|
|
315
367
|
```
|
|
316
368
|
|
|
317
|
-
#### Clause Spacing Modes
|
|
318
|
-
|
|
319
|
-
- `:compact` (default) keeps top-level clauses on single newlines only when the query is simple:
|
|
320
|
-
- exactly one SELECT column
|
|
321
|
-
- exactly one FROM table (no JOINs)
|
|
322
|
-
- zero or one top-level WHERE condition
|
|
323
|
-
- only `select`, `from`, optional `where`, optional `order by`, and optional `limit`
|
|
324
|
-
- `:spacious` always separates top-level clauses with blank lines
|
|
325
|
-
|
|
326
369
|
Reset to defaults:
|
|
327
370
|
|
|
328
371
|
```ruby
|
|
329
372
|
SqlBeautifier.reset_configuration!
|
|
330
373
|
```
|
|
331
374
|
|
|
332
|
-
####
|
|
375
|
+
#### Per-Call Overrides
|
|
333
376
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
377
|
+
Pass configuration overrides directly to `SqlBeautifier.call` to override global settings for a single invocation:
|
|
378
|
+
|
|
379
|
+
```ruby
|
|
380
|
+
SqlBeautifier.call(query, trailing_semicolon: false, keyword_case: :upper)
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
Per-call overrides take precedence over the global `SqlBeautifier.configure` block. Any keys not included in the override hash fall back to the global configuration. The global configuration is never mutated. Unknown keys raise `ArgumentError`.
|
|
384
|
+
|
|
385
|
+
#### `keyword_case`
|
|
386
|
+
|
|
387
|
+
Controls the case of SQL keywords in the output. Default: `:lower`.
|
|
388
|
+
|
|
389
|
+
- `:lower` — lowercases all keywords (`select`, `from`, `where`, `inner join`, etc.)
|
|
390
|
+
- `:upper` — uppercases all keywords (`SELECT`, `FROM`, `WHERE`, `INNER JOIN`, etc.)
|
|
391
|
+
|
|
392
|
+
#### `keyword_column_width`
|
|
393
|
+
|
|
394
|
+
Sets the column width for single-word keyword alignment. Keywords shorter than this width are right-padded with spaces so clause bodies start at this column position. Default: `8`.
|
|
395
|
+
|
|
396
|
+
For example, with the default width of 8, `select` (6 chars) gets 2 spaces of padding, `where` (5 chars) gets 3 spaces, and `from` (4 chars) gets 4 spaces. Multi-word keywords like `order by` and `group by` use a single space instead of padding.
|
|
397
|
+
|
|
398
|
+
#### `indent_spaces`
|
|
399
|
+
|
|
400
|
+
Number of spaces used for indentation within subqueries and CTE bodies. Each nesting level adds this many spaces of indentation. Default: `4`.
|
|
401
|
+
|
|
402
|
+
#### `clause_spacing_mode`
|
|
403
|
+
|
|
404
|
+
Controls whether blank lines are inserted between top-level clauses. Default: `:compact`.
|
|
405
|
+
|
|
406
|
+
- `:compact` — omits blank lines when the query is simple (single SELECT column, single FROM table with no JOINs, at most one WHERE condition, and only basic clauses like `select`, `from`, `where`, `order by`, `limit`). Complex queries automatically get blank lines regardless.
|
|
407
|
+
- `:spacious` — always inserts blank lines between every top-level clause.
|
|
408
|
+
|
|
409
|
+
#### `table_name_format`
|
|
410
|
+
|
|
411
|
+
Controls how table names are formatted in the output. Default: `:pascal_case`.
|
|
412
|
+
|
|
413
|
+
- `:pascal_case` — capitalizes each underscore-separated segment (`users` → `Users`, `active_storage_blobs` → `Active_Storage_Blobs`)
|
|
414
|
+
- `:lowercase` — keeps table names lowercase as-is
|
|
415
|
+
|
|
416
|
+
#### `inline_group_threshold`
|
|
417
|
+
|
|
418
|
+
Maximum character length for a parenthesized condition group to remain on a single line. Groups whose inline representation exceeds this length are expanded to multiple lines with indented conditions. Default: `0` (always expand).
|
|
419
|
+
|
|
420
|
+
Set to a positive integer to allow short groups to stay inline. For example, with a threshold of `80`, the group `(role = 'admin' or role = 'moderator')` would stay on one line since it's under 80 characters.
|
|
421
|
+
|
|
422
|
+
#### `trailing_semicolon`
|
|
423
|
+
|
|
424
|
+
Controls whether a trailing `;` is appended to each formatted statement. Default: `true`.
|
|
425
|
+
|
|
426
|
+
- `true` — appends `;` at the end of each statement
|
|
427
|
+
- `false` — omits the trailing `;`
|
|
428
|
+
|
|
429
|
+
#### `alias_strategy`
|
|
430
|
+
|
|
431
|
+
Controls automatic table aliasing in FROM and JOIN clauses. Default: `:initials`.
|
|
432
|
+
|
|
433
|
+
- `:initials` — generates aliases from the first letter of each underscore-separated segment (`users` → `u`, `active_storage_blobs` → `asb`). When two tables produce the same initials, a counter is appended for disambiguation (`u1`, `u2`). All `table.column` references throughout the query are replaced with `alias.column`.
|
|
434
|
+
- `:none` — disables automatic aliasing. Explicit aliases written in the SQL are still preserved.
|
|
435
|
+
- Callable — provide a proc/lambda that receives the table name and returns a custom alias string:
|
|
337
436
|
|
|
338
437
|
```ruby
|
|
339
438
|
SqlBeautifier.configure do |config|
|
|
@@ -341,6 +440,26 @@ SqlBeautifier.configure do |config|
|
|
|
341
440
|
end
|
|
342
441
|
```
|
|
343
442
|
|
|
443
|
+
#### `removable_comment_types`
|
|
444
|
+
|
|
445
|
+
Controls which SQL comment types are stripped during formatting. Default: `:none`.
|
|
446
|
+
|
|
447
|
+
- `:none` — preserves all comments in the formatted output
|
|
448
|
+
- `:all` — strips all comments (equivalent to `[:inline, :separate_line, :blocks]`)
|
|
449
|
+
- Array of specific types — strips only the listed types, preserving the rest
|
|
450
|
+
|
|
451
|
+
The three comment types:
|
|
452
|
+
|
|
453
|
+
- `:separate_line` — `--` comments on their own line (only whitespace before `--`), including banner-style dividers
|
|
454
|
+
- `:inline` — `--` comments at the end of a line that contains SQL
|
|
455
|
+
- `:blocks` — `/* ... */` block comments (single or multi-line)
|
|
456
|
+
|
|
457
|
+
```ruby
|
|
458
|
+
SqlBeautifier.configure do |config|
|
|
459
|
+
config.removable_comment_types = [:inline, :blocks]
|
|
460
|
+
end
|
|
461
|
+
```
|
|
462
|
+
|
|
344
463
|
### Callable Interface
|
|
345
464
|
|
|
346
465
|
`SqlBeautifier.call` is the public API, making it a valid callable for Rails `normalizes` and anywhere a proc-like object is expected:
|
|
@@ -141,9 +141,15 @@ module SqlBeautifier
|
|
|
141
141
|
table_name = Util.first_word(table_text)
|
|
142
142
|
formatted_table_name = Util.format_table_name(table_name)
|
|
143
143
|
table_alias = @table_registry.alias_for(table_name)
|
|
144
|
-
|
|
144
|
+
trailing_sentinels = extract_trailing_sentinels(table_text)
|
|
145
145
|
|
|
146
|
-
"#{formatted_table_name} #{table_alias}"
|
|
146
|
+
formatted = table_alias ? "#{formatted_table_name} #{table_alias}" : formatted_table_name
|
|
147
|
+
trailing_sentinels.empty? ? formatted : "#{formatted} #{trailing_sentinels}"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def extract_trailing_sentinels(text)
|
|
151
|
+
sentinels = text.scan(CommentStripper::SENTINEL_PATTERN).map { |match| "#{CommentStripper::SENTINEL_PREFIX}#{match[0]}#{CommentStripper::SENTINEL_SUFFIX}" }
|
|
152
|
+
sentinels.join(" ")
|
|
147
153
|
end
|
|
148
154
|
end
|
|
149
155
|
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlBeautifier
|
|
4
|
+
class CommentRestorer
|
|
5
|
+
def self.call(formatted_sql, comment_map)
|
|
6
|
+
return formatted_sql if comment_map.empty?
|
|
7
|
+
|
|
8
|
+
new(formatted_sql, comment_map).call
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def initialize(formatted_sql, comment_map)
|
|
12
|
+
@formatted_sql = formatted_sql
|
|
13
|
+
@comment_map = comment_map
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call
|
|
17
|
+
result = @formatted_sql
|
|
18
|
+
|
|
19
|
+
@comment_map.each do |index, entry|
|
|
20
|
+
sentinel = "#{CommentStripper::SENTINEL_PREFIX}#{index}#{CommentStripper::SENTINEL_SUFFIX}"
|
|
21
|
+
|
|
22
|
+
result = begin
|
|
23
|
+
case entry[:type]
|
|
24
|
+
when :blocks
|
|
25
|
+
restore_block_comment(result, sentinel, entry[:text])
|
|
26
|
+
when :separate_line
|
|
27
|
+
restore_separate_line_comment(result, sentinel, entry[:text])
|
|
28
|
+
when :inline
|
|
29
|
+
restore_inline_comment(result, sentinel, entry[:text])
|
|
30
|
+
else
|
|
31
|
+
result
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
result
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def restore_block_comment(sql, sentinel, comment_text)
|
|
42
|
+
sql.sub(sentinel, comment_text)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def restore_separate_line_comment(sql, sentinel, comment_text)
|
|
46
|
+
sql.sub(%r{#{Regexp.escape(sentinel)}[ \n]?}, "#{comment_text}\n")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def restore_inline_comment(sql, sentinel, comment_text)
|
|
50
|
+
pattern = %r{ ?#{Regexp.escape(sentinel)}([^\n]*)}
|
|
51
|
+
sql.sub(pattern) do
|
|
52
|
+
trailing_content = Regexp.last_match(1)
|
|
53
|
+
|
|
54
|
+
if trailing_content.strip.empty?
|
|
55
|
+
" #{comment_text}"
|
|
56
|
+
else
|
|
57
|
+
"#{trailing_content.strip} #{comment_text}"
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlBeautifier
|
|
4
|
+
class CommentStripper
|
|
5
|
+
SENTINEL_PREFIX = "/*__sqlb_"
|
|
6
|
+
SENTINEL_SUFFIX = "__*/"
|
|
7
|
+
SENTINEL_PATTERN = %r{/\*__sqlb_(\d+)__\*/}
|
|
8
|
+
|
|
9
|
+
Result = Struct.new(:stripped_sql, :comment_map)
|
|
10
|
+
|
|
11
|
+
def self.call(sql, removable_types)
|
|
12
|
+
new(sql, removable_types).call
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def initialize(sql, removable_types)
|
|
16
|
+
@sql = sql
|
|
17
|
+
@removal_set = resolve_removal_set(removable_types)
|
|
18
|
+
@output = +""
|
|
19
|
+
@comment_map = {}
|
|
20
|
+
@sentinel_index = 0
|
|
21
|
+
@position = 0
|
|
22
|
+
@pending_separate_line_comments = []
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def call
|
|
26
|
+
while @position < @sql.length
|
|
27
|
+
character = @sql[@position]
|
|
28
|
+
|
|
29
|
+
if @in_single_quoted_string
|
|
30
|
+
consume_single_quoted_character!(character)
|
|
31
|
+
elsif @in_double_quoted_identifier
|
|
32
|
+
consume_double_quoted_character!(character)
|
|
33
|
+
elsif character == Constants::SINGLE_QUOTE
|
|
34
|
+
flush_pending_separate_line_comments!
|
|
35
|
+
@in_single_quoted_string = true
|
|
36
|
+
@output << character
|
|
37
|
+
@position += 1
|
|
38
|
+
elsif character == Constants::DOUBLE_QUOTE
|
|
39
|
+
flush_pending_separate_line_comments!
|
|
40
|
+
@in_double_quoted_identifier = true
|
|
41
|
+
@output << character
|
|
42
|
+
@position += 1
|
|
43
|
+
elsif line_comment_start?
|
|
44
|
+
handle_line_comment!
|
|
45
|
+
elsif block_comment_start?
|
|
46
|
+
flush_pending_separate_line_comments!
|
|
47
|
+
handle_block_comment!
|
|
48
|
+
else
|
|
49
|
+
flush_pending_separate_line_comments! unless character == "\n"
|
|
50
|
+
@output << character
|
|
51
|
+
@position += 1
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
flush_pending_separate_line_comments!
|
|
56
|
+
|
|
57
|
+
Result.new(@output, @comment_map)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def resolve_removal_set(removable_types)
|
|
63
|
+
case removable_types
|
|
64
|
+
when :none
|
|
65
|
+
[]
|
|
66
|
+
when :all
|
|
67
|
+
Configuration::COMMENT_TYPES.dup
|
|
68
|
+
when Array
|
|
69
|
+
invalid_types = removable_types - Configuration::COMMENT_TYPES
|
|
70
|
+
raise ArgumentError, "Unsupported removable_types entries: #{invalid_types.inspect}. Expected elements of #{Configuration::COMMENT_TYPES.inspect}" if invalid_types.any?
|
|
71
|
+
|
|
72
|
+
removable_types
|
|
73
|
+
when *Configuration::COMMENT_TYPES
|
|
74
|
+
[removable_types]
|
|
75
|
+
else
|
|
76
|
+
raise ArgumentError, "Unsupported removable_types: #{removable_types.inspect}. Expected :none, :all, an Array, or one of #{Configuration::COMMENT_TYPES.inspect}"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def consume_single_quoted_character!(character)
|
|
81
|
+
@output << character
|
|
82
|
+
|
|
83
|
+
if character == Constants::SINGLE_QUOTE && @sql[@position + 1] == Constants::SINGLE_QUOTE
|
|
84
|
+
@position += 1
|
|
85
|
+
@output << @sql[@position]
|
|
86
|
+
elsif character == Constants::SINGLE_QUOTE
|
|
87
|
+
@in_single_quoted_string = false
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
@position += 1
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def consume_double_quoted_character!(character)
|
|
94
|
+
@output << character
|
|
95
|
+
|
|
96
|
+
if character == Constants::DOUBLE_QUOTE && @sql[@position + 1] == Constants::DOUBLE_QUOTE
|
|
97
|
+
@position += 1
|
|
98
|
+
@output << @sql[@position]
|
|
99
|
+
elsif character == Constants::DOUBLE_QUOTE
|
|
100
|
+
@in_double_quoted_identifier = false
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
@position += 1
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def line_comment_start?
|
|
107
|
+
@sql[@position] == "-" && @sql[@position + 1] == "-"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def block_comment_start?
|
|
111
|
+
@sql[@position] == "/" && @sql[@position + 1] == "*"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def handle_line_comment!
|
|
115
|
+
comment_type = separate_line_comment? ? :separate_line : :inline
|
|
116
|
+
comment_text = extract_line_comment_text
|
|
117
|
+
|
|
118
|
+
if removable?(comment_type)
|
|
119
|
+
strip_line_comment!
|
|
120
|
+
else
|
|
121
|
+
preserve_line_comment!(comment_type, comment_text)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def separate_line_comment?
|
|
126
|
+
line_start = @output.rindex("\n")
|
|
127
|
+
preceding_content = begin
|
|
128
|
+
if line_start
|
|
129
|
+
@output[(line_start + 1)..]
|
|
130
|
+
else
|
|
131
|
+
@output
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
preceding_content.match?(%r{\A[[:space:]]*\z})
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def extract_line_comment_text
|
|
139
|
+
start_position = @position
|
|
140
|
+
@position += 2
|
|
141
|
+
|
|
142
|
+
@position += 1 while @position < @sql.length && @sql[@position] != "\n"
|
|
143
|
+
|
|
144
|
+
@sql[start_position...@position]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def strip_line_comment!; end
|
|
148
|
+
|
|
149
|
+
def preserve_line_comment!(comment_type, comment_text)
|
|
150
|
+
if comment_type == :separate_line
|
|
151
|
+
@pending_separate_line_comments << comment_text
|
|
152
|
+
@position += 1 if @position < @sql.length && @sql[@position] == "\n"
|
|
153
|
+
else
|
|
154
|
+
flush_pending_separate_line_comments!
|
|
155
|
+
sentinel = build_sentinel(comment_type, comment_text)
|
|
156
|
+
@output << sentinel
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def handle_block_comment!
|
|
161
|
+
comment_text = extract_block_comment_text
|
|
162
|
+
|
|
163
|
+
if removable?(:blocks)
|
|
164
|
+
strip_block_comment!
|
|
165
|
+
else
|
|
166
|
+
sentinel = build_sentinel(:blocks, comment_text)
|
|
167
|
+
@output << " " unless @output.empty? || @output[-1] =~ Constants::WHITESPACE_CHARACTER_REGEX
|
|
168
|
+
@output << sentinel
|
|
169
|
+
|
|
170
|
+
next_character = @sql[@position]
|
|
171
|
+
@output << " " if next_character && next_character !~ Constants::WHITESPACE_CHARACTER_REGEX
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def extract_block_comment_text
|
|
176
|
+
start_position = @position
|
|
177
|
+
@position += 2
|
|
178
|
+
|
|
179
|
+
while @position < @sql.length
|
|
180
|
+
if @sql[@position] == "*" && @sql[@position + 1] == "/"
|
|
181
|
+
@position += 2
|
|
182
|
+
break
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
@position += 1
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
@sql[start_position...@position]
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def strip_block_comment!
|
|
192
|
+
@output << " " unless @output.empty? || @output[-1] =~ Constants::WHITESPACE_CHARACTER_REGEX
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def flush_pending_separate_line_comments!
|
|
196
|
+
return if @pending_separate_line_comments.empty?
|
|
197
|
+
|
|
198
|
+
grouped_text = @pending_separate_line_comments.join("\n")
|
|
199
|
+
sentinel = build_sentinel(:separate_line, grouped_text)
|
|
200
|
+
|
|
201
|
+
@output << sentinel
|
|
202
|
+
@output << "\n"
|
|
203
|
+
|
|
204
|
+
@pending_separate_line_comments.clear
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def build_sentinel(comment_type, comment_text)
|
|
208
|
+
index = @sentinel_index
|
|
209
|
+
@sentinel_index += 1
|
|
210
|
+
@comment_map[index] = {
|
|
211
|
+
type: comment_type,
|
|
212
|
+
text: comment_text,
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
"#{SENTINEL_PREFIX}#{index}#{SENTINEL_SUFFIX}"
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def removable?(comment_type)
|
|
219
|
+
@removal_set.include?(comment_type)
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
@@ -8,10 +8,14 @@ module SqlBeautifier
|
|
|
8
8
|
indent_spaces: 4,
|
|
9
9
|
clause_spacing_mode: :compact,
|
|
10
10
|
table_name_format: :pascal_case,
|
|
11
|
-
inline_group_threshold:
|
|
11
|
+
inline_group_threshold: 0,
|
|
12
12
|
alias_strategy: :initials,
|
|
13
|
+
trailing_semicolon: true,
|
|
14
|
+
removable_comment_types: :none,
|
|
13
15
|
}.freeze
|
|
14
16
|
|
|
17
|
+
COMMENT_TYPES = %i[inline separate_line blocks].freeze
|
|
18
|
+
|
|
15
19
|
attr_accessor :keyword_case
|
|
16
20
|
attr_accessor :keyword_column_width
|
|
17
21
|
attr_accessor :indent_spaces
|
|
@@ -19,6 +23,8 @@ module SqlBeautifier
|
|
|
19
23
|
attr_accessor :table_name_format
|
|
20
24
|
attr_accessor :inline_group_threshold
|
|
21
25
|
attr_accessor :alias_strategy
|
|
26
|
+
attr_accessor :trailing_semicolon
|
|
27
|
+
attr_accessor :removable_comment_types
|
|
22
28
|
|
|
23
29
|
def initialize
|
|
24
30
|
reset!
|
|
@@ -17,14 +17,17 @@ module SqlBeautifier
|
|
|
17
17
|
@normalized_value = Normalizer.call(@value)
|
|
18
18
|
return unless @normalized_value.present?
|
|
19
19
|
|
|
20
|
+
@leading_sentinels = extract_leading_sentinels!
|
|
21
|
+
return unless @normalized_value.present?
|
|
22
|
+
|
|
20
23
|
cte_result = CteFormatter.format(@normalized_value, depth: @depth)
|
|
21
|
-
return cte_result if cte_result
|
|
24
|
+
return prepend_sentinels(cte_result) if cte_result
|
|
22
25
|
|
|
23
26
|
create_table_as_result = CreateTableAsFormatter.format(@normalized_value, depth: @depth)
|
|
24
|
-
return create_table_as_result if create_table_as_result
|
|
27
|
+
return prepend_sentinels(create_table_as_result) if create_table_as_result
|
|
25
28
|
|
|
26
29
|
first_clause_position = Tokenizer.first_clause_position(@normalized_value)
|
|
27
|
-
return "#{@normalized_value}\n" if first_clause_position.nil? || first_clause_position.positive?
|
|
30
|
+
return prepend_sentinels("#{@normalized_value}\n") if first_clause_position.nil? || first_clause_position.positive?
|
|
28
31
|
|
|
29
32
|
@clauses = Tokenizer.split_into_clauses(@normalized_value)
|
|
30
33
|
@table_registry = TableRegistry.new(@clauses[:from]) if @clauses[:from].present?
|
|
@@ -39,15 +42,36 @@ module SqlBeautifier
|
|
|
39
42
|
append_clause!(:limit, Clauses::Limit)
|
|
40
43
|
|
|
41
44
|
output = @parts.join(clause_separator)
|
|
42
|
-
return "#{@normalized_value}\n" if output.empty?
|
|
45
|
+
return prepend_sentinels("#{@normalized_value}\n") if output.empty?
|
|
43
46
|
|
|
44
47
|
output = SubqueryFormatter.format(output, @depth)
|
|
45
48
|
output = @table_registry.apply_aliases(output) if @table_registry
|
|
46
|
-
"#{output}\n"
|
|
49
|
+
prepend_sentinels("#{output}\n")
|
|
47
50
|
end
|
|
48
51
|
|
|
49
52
|
private
|
|
50
53
|
|
|
54
|
+
def extract_leading_sentinels!
|
|
55
|
+
leading_sentinel_text = +""
|
|
56
|
+
remaining_value = @normalized_value
|
|
57
|
+
|
|
58
|
+
while remaining_value.match?(%r{\A#{CommentStripper::SENTINEL_PATTERN}[[:space:]]*})
|
|
59
|
+
match = remaining_value.match(%r{\A(#{CommentStripper::SENTINEL_PATTERN}[[:space:]]*)})
|
|
60
|
+
leading_sentinel_text << match[1]
|
|
61
|
+
remaining_value = remaining_value[match[1].length..]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
@normalized_value = remaining_value
|
|
65
|
+
|
|
66
|
+
leading_sentinel_text
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def prepend_sentinels(output)
|
|
70
|
+
return output if @leading_sentinels.empty?
|
|
71
|
+
|
|
72
|
+
"#{@leading_sentinels}#{output}"
|
|
73
|
+
end
|
|
74
|
+
|
|
51
75
|
def append_clause!(clause_key, formatter_class)
|
|
52
76
|
value = @clauses[clause_key]
|
|
53
77
|
return unless value.present?
|
|
@@ -18,7 +18,6 @@ module SqlBeautifier
|
|
|
18
18
|
@source = @value.strip
|
|
19
19
|
return unless @source.present?
|
|
20
20
|
|
|
21
|
-
@source = strip_comments(@source)
|
|
22
21
|
@source = strip_trailing_semicolons(@source)
|
|
23
22
|
@source = @source.strip
|
|
24
23
|
return unless @source.present?
|
|
@@ -34,6 +33,14 @@ module SqlBeautifier
|
|
|
34
33
|
when Constants::DOUBLE_QUOTE
|
|
35
34
|
consume_quoted_identifier!
|
|
36
35
|
|
|
36
|
+
when "/"
|
|
37
|
+
if sentinel_at_position?
|
|
38
|
+
consume_sentinel!
|
|
39
|
+
else
|
|
40
|
+
@output << current_character.downcase
|
|
41
|
+
@position += 1
|
|
42
|
+
end
|
|
43
|
+
|
|
37
44
|
when Constants::WHITESPACE_CHARACTER_REGEX
|
|
38
45
|
collapse_whitespace!
|
|
39
46
|
|
|
@@ -52,6 +59,24 @@ module SqlBeautifier
|
|
|
52
59
|
@source[@position]
|
|
53
60
|
end
|
|
54
61
|
|
|
62
|
+
def sentinel_at_position?
|
|
63
|
+
@source[@position, CommentStripper::SENTINEL_PREFIX.length] == CommentStripper::SENTINEL_PREFIX
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def consume_sentinel!
|
|
67
|
+
sentinel_end = @source.index("*/", @position + CommentStripper::SENTINEL_PREFIX.length)
|
|
68
|
+
|
|
69
|
+
unless sentinel_end
|
|
70
|
+
@output << current_character.downcase
|
|
71
|
+
@position += 1
|
|
72
|
+
return
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end_position = sentinel_end + 2
|
|
76
|
+
@output << @source[@position...end_position]
|
|
77
|
+
@position = end_position
|
|
78
|
+
end
|
|
79
|
+
|
|
55
80
|
def collapse_whitespace!
|
|
56
81
|
@output << " "
|
|
57
82
|
@position += 1
|
|
@@ -119,67 +144,5 @@ module SqlBeautifier
|
|
|
119
144
|
def strip_trailing_semicolons(sql)
|
|
120
145
|
sql.sub(%r{;[[:space:]]*\z}, "")
|
|
121
146
|
end
|
|
122
|
-
|
|
123
|
-
def strip_comments(sql)
|
|
124
|
-
output = +""
|
|
125
|
-
position = 0
|
|
126
|
-
in_single_quoted_string = false
|
|
127
|
-
in_double_quoted_identifier = false
|
|
128
|
-
|
|
129
|
-
while position < sql.length
|
|
130
|
-
character = sql[position]
|
|
131
|
-
|
|
132
|
-
if in_single_quoted_string
|
|
133
|
-
output << character
|
|
134
|
-
|
|
135
|
-
if character == Constants::SINGLE_QUOTE && sql[position + 1] == Constants::SINGLE_QUOTE
|
|
136
|
-
position += 1
|
|
137
|
-
output << sql[position]
|
|
138
|
-
elsif character == Constants::SINGLE_QUOTE
|
|
139
|
-
in_single_quoted_string = false
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
position += 1
|
|
143
|
-
next
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
if in_double_quoted_identifier
|
|
147
|
-
output << character
|
|
148
|
-
|
|
149
|
-
if character == Constants::DOUBLE_QUOTE && sql[position + 1] == Constants::DOUBLE_QUOTE
|
|
150
|
-
position += 1
|
|
151
|
-
output << sql[position]
|
|
152
|
-
elsif character == Constants::DOUBLE_QUOTE
|
|
153
|
-
in_double_quoted_identifier = false
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
position += 1
|
|
157
|
-
next
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
if character == Constants::SINGLE_QUOTE
|
|
161
|
-
in_single_quoted_string = true
|
|
162
|
-
output << character
|
|
163
|
-
position += 1
|
|
164
|
-
elsif character == Constants::DOUBLE_QUOTE
|
|
165
|
-
in_double_quoted_identifier = true
|
|
166
|
-
output << character
|
|
167
|
-
position += 1
|
|
168
|
-
elsif character == "-" && sql[position + 1] == "-"
|
|
169
|
-
position += 2
|
|
170
|
-
position += 1 while position < sql.length && sql[position] != "\n"
|
|
171
|
-
elsif character == "/" && sql[position + 1] == "*"
|
|
172
|
-
output << " " unless output.empty? || output[-1] =~ Constants::WHITESPACE_CHARACTER_REGEX
|
|
173
|
-
position += 2
|
|
174
|
-
position += 1 while position < sql.length && !(sql[position] == "*" && sql[position + 1] == "/")
|
|
175
|
-
position += 2 if position < sql.length
|
|
176
|
-
else
|
|
177
|
-
output << character
|
|
178
|
-
position += 1
|
|
179
|
-
end
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
output
|
|
183
|
-
end
|
|
184
147
|
end
|
|
185
148
|
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlBeautifier
|
|
4
|
+
class StatementAssembler
|
|
5
|
+
def self.call(value)
|
|
6
|
+
new(value).call
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def initialize(value)
|
|
10
|
+
@value = value
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call
|
|
14
|
+
removable_types = SqlBeautifier.config_for(:removable_comment_types)
|
|
15
|
+
comment_result = CommentStripper.call(@value, removable_types)
|
|
16
|
+
|
|
17
|
+
statements = StatementSplitter.split(comment_result.stripped_sql)
|
|
18
|
+
formatted_statements = statements.filter_map do |statement|
|
|
19
|
+
Formatter.call(statement)&.chomp
|
|
20
|
+
end
|
|
21
|
+
return if formatted_statements.empty?
|
|
22
|
+
|
|
23
|
+
trailing_semicolon = SqlBeautifier.config_for(:trailing_semicolon)
|
|
24
|
+
separator = trailing_semicolon ? ";\n\n" : "\n\n"
|
|
25
|
+
terminator = trailing_semicolon ? ";\n" : "\n"
|
|
26
|
+
|
|
27
|
+
output = formatted_statements.join(separator) + terminator
|
|
28
|
+
CommentRestorer.call(output, comment_result.comment_map)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlBeautifier
|
|
4
|
+
module StatementSplitter
|
|
5
|
+
STATEMENT_KEYWORDS = %w[select with create insert update delete].freeze
|
|
6
|
+
BOUNDARY_KEYWORDS = %w[from where having limit into set values].freeze
|
|
7
|
+
CONTINUATION_PAIRS = { "insert" => "select" }.freeze
|
|
8
|
+
|
|
9
|
+
module_function
|
|
10
|
+
|
|
11
|
+
def split(sql)
|
|
12
|
+
semicolon_chunks = split_on_semicolons(sql)
|
|
13
|
+
statements = semicolon_chunks.flat_map { |chunk| split_concatenated_statements(chunk) }.reject(&:empty?)
|
|
14
|
+
merge_trailing_sentinel_segments(statements)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def split_on_semicolons(sql)
|
|
18
|
+
segments = []
|
|
19
|
+
current_segment = +""
|
|
20
|
+
inside_string_literal = false
|
|
21
|
+
inside_quoted_identifier = false
|
|
22
|
+
inside_dollar_quoted_string = false
|
|
23
|
+
dollar_quote_delimiter = nil
|
|
24
|
+
parenthesis_depth = 0
|
|
25
|
+
position = 0
|
|
26
|
+
|
|
27
|
+
while position < sql.length
|
|
28
|
+
character = sql[position]
|
|
29
|
+
|
|
30
|
+
if inside_string_literal
|
|
31
|
+
current_segment << character
|
|
32
|
+
|
|
33
|
+
if character == Constants::SINGLE_QUOTE && sql[position + 1] == Constants::SINGLE_QUOTE
|
|
34
|
+
position += 1
|
|
35
|
+
current_segment << sql[position]
|
|
36
|
+
elsif character == Constants::SINGLE_QUOTE
|
|
37
|
+
inside_string_literal = false
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
position += 1
|
|
41
|
+
next
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
if inside_dollar_quoted_string
|
|
45
|
+
if sql[position, dollar_quote_delimiter.length] == dollar_quote_delimiter
|
|
46
|
+
current_segment << dollar_quote_delimiter
|
|
47
|
+
position += dollar_quote_delimiter.length
|
|
48
|
+
inside_dollar_quoted_string = false
|
|
49
|
+
dollar_quote_delimiter = nil
|
|
50
|
+
else
|
|
51
|
+
current_segment << character
|
|
52
|
+
position += 1
|
|
53
|
+
end
|
|
54
|
+
next
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
if inside_quoted_identifier
|
|
58
|
+
current_segment << character
|
|
59
|
+
|
|
60
|
+
if character == Constants::DOUBLE_QUOTE && sql[position + 1] == Constants::DOUBLE_QUOTE
|
|
61
|
+
position += 1
|
|
62
|
+
current_segment << sql[position]
|
|
63
|
+
elsif character == Constants::DOUBLE_QUOTE
|
|
64
|
+
inside_quoted_identifier = false
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
position += 1
|
|
68
|
+
next
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if Tokenizer.sentinel_at?(sql, position)
|
|
72
|
+
end_position = Tokenizer.sentinel_end_position(sql, position)
|
|
73
|
+
current_segment << sql[position...end_position]
|
|
74
|
+
position = end_position
|
|
75
|
+
next
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
delimiter = Tokenizer.dollar_quote_delimiter_at(sql, position)
|
|
79
|
+
|
|
80
|
+
if delimiter
|
|
81
|
+
inside_dollar_quoted_string = true
|
|
82
|
+
dollar_quote_delimiter = delimiter
|
|
83
|
+
current_segment << delimiter
|
|
84
|
+
position += delimiter.length
|
|
85
|
+
next
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
case character
|
|
89
|
+
when Constants::SINGLE_QUOTE
|
|
90
|
+
inside_string_literal = true
|
|
91
|
+
current_segment << character
|
|
92
|
+
when Constants::DOUBLE_QUOTE
|
|
93
|
+
inside_quoted_identifier = true
|
|
94
|
+
current_segment << character
|
|
95
|
+
when Constants::OPEN_PARENTHESIS
|
|
96
|
+
parenthesis_depth += 1
|
|
97
|
+
current_segment << character
|
|
98
|
+
when Constants::CLOSE_PARENTHESIS
|
|
99
|
+
parenthesis_depth = [parenthesis_depth - 1, 0].max
|
|
100
|
+
current_segment << character
|
|
101
|
+
when ";"
|
|
102
|
+
if parenthesis_depth.zero?
|
|
103
|
+
stripped_segment = current_segment.strip
|
|
104
|
+
segments << stripped_segment unless stripped_segment.empty?
|
|
105
|
+
current_segment = +""
|
|
106
|
+
else
|
|
107
|
+
current_segment << character
|
|
108
|
+
end
|
|
109
|
+
else
|
|
110
|
+
current_segment << character
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
position += 1
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
stripped_segment = current_segment.strip
|
|
117
|
+
segments << stripped_segment unless stripped_segment.empty?
|
|
118
|
+
segments
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def split_concatenated_statements(sql)
|
|
122
|
+
boundaries = detect_statement_boundaries(sql)
|
|
123
|
+
return [sql.strip] if boundaries.length <= 1
|
|
124
|
+
|
|
125
|
+
statements = []
|
|
126
|
+
|
|
127
|
+
boundaries.each_with_index do |boundary_position, index|
|
|
128
|
+
end_position = begin
|
|
129
|
+
if index + 1 < boundaries.length
|
|
130
|
+
boundaries[index + 1]
|
|
131
|
+
else
|
|
132
|
+
sql.length
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
statement = sql[boundary_position...end_position].strip
|
|
137
|
+
statements << statement unless statement.empty?
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
statements
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def detect_statement_boundaries(sql)
|
|
144
|
+
boundaries = []
|
|
145
|
+
clause_seen = false
|
|
146
|
+
current_statement_keyword = nil
|
|
147
|
+
inside_string_literal = false
|
|
148
|
+
inside_quoted_identifier = false
|
|
149
|
+
inside_dollar_quoted_string = false
|
|
150
|
+
dollar_quote_delimiter = nil
|
|
151
|
+
parenthesis_depth = 0
|
|
152
|
+
position = 0
|
|
153
|
+
|
|
154
|
+
while position < sql.length
|
|
155
|
+
character = sql[position]
|
|
156
|
+
|
|
157
|
+
if inside_string_literal
|
|
158
|
+
if character == Constants::SINGLE_QUOTE && sql[position + 1] == Constants::SINGLE_QUOTE
|
|
159
|
+
position += 2
|
|
160
|
+
else
|
|
161
|
+
inside_string_literal = false if character == Constants::SINGLE_QUOTE
|
|
162
|
+
position += 1
|
|
163
|
+
end
|
|
164
|
+
next
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
if inside_dollar_quoted_string
|
|
168
|
+
if sql[position, dollar_quote_delimiter.length] == dollar_quote_delimiter
|
|
169
|
+
position += dollar_quote_delimiter.length
|
|
170
|
+
inside_dollar_quoted_string = false
|
|
171
|
+
dollar_quote_delimiter = nil
|
|
172
|
+
else
|
|
173
|
+
position += 1
|
|
174
|
+
end
|
|
175
|
+
next
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
if inside_quoted_identifier
|
|
179
|
+
if character == Constants::DOUBLE_QUOTE && sql[position + 1] == Constants::DOUBLE_QUOTE
|
|
180
|
+
position += 2
|
|
181
|
+
else
|
|
182
|
+
inside_quoted_identifier = false if character == Constants::DOUBLE_QUOTE
|
|
183
|
+
position += 1
|
|
184
|
+
end
|
|
185
|
+
next
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
if Tokenizer.sentinel_at?(sql, position)
|
|
189
|
+
position = Tokenizer.sentinel_end_position(sql, position)
|
|
190
|
+
next
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
delimiter = Tokenizer.dollar_quote_delimiter_at(sql, position)
|
|
194
|
+
|
|
195
|
+
if delimiter
|
|
196
|
+
inside_dollar_quoted_string = true
|
|
197
|
+
dollar_quote_delimiter = delimiter
|
|
198
|
+
position += delimiter.length
|
|
199
|
+
next
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
case character
|
|
203
|
+
when Constants::SINGLE_QUOTE
|
|
204
|
+
inside_string_literal = true
|
|
205
|
+
when Constants::DOUBLE_QUOTE
|
|
206
|
+
inside_quoted_identifier = true
|
|
207
|
+
when Constants::OPEN_PARENTHESIS
|
|
208
|
+
parenthesis_depth += 1
|
|
209
|
+
when Constants::CLOSE_PARENTHESIS
|
|
210
|
+
parenthesis_depth = [parenthesis_depth - 1, 0].max
|
|
211
|
+
else
|
|
212
|
+
if parenthesis_depth.zero?
|
|
213
|
+
matched_statement_keyword = keyword_match_at(sql, position, STATEMENT_KEYWORDS)
|
|
214
|
+
|
|
215
|
+
if matched_statement_keyword
|
|
216
|
+
if clause_seen && !continuation_keyword?(current_statement_keyword, matched_statement_keyword)
|
|
217
|
+
boundaries << position
|
|
218
|
+
clause_seen = false
|
|
219
|
+
current_statement_keyword = matched_statement_keyword
|
|
220
|
+
elsif boundaries.empty?
|
|
221
|
+
boundaries << position
|
|
222
|
+
current_statement_keyword = matched_statement_keyword
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
position += matched_statement_keyword.length
|
|
226
|
+
next
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
matched_boundary_keyword = keyword_match_at(sql, position, BOUNDARY_KEYWORDS)
|
|
230
|
+
|
|
231
|
+
if matched_boundary_keyword
|
|
232
|
+
clause_seen = true
|
|
233
|
+
position += matched_boundary_keyword.length
|
|
234
|
+
next
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
position += 1
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
boundaries
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def merge_trailing_sentinel_segments(statements)
|
|
246
|
+
return statements if statements.length <= 1
|
|
247
|
+
|
|
248
|
+
statements.each_with_object([]) do |statement, merged|
|
|
249
|
+
if sentinel_only?(statement) && merged.any?
|
|
250
|
+
merged[-1] = "#{merged[-1]} #{statement}"
|
|
251
|
+
else
|
|
252
|
+
merged << statement
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def sentinel_only?(segment)
|
|
258
|
+
segment.gsub(CommentStripper::SENTINEL_PATTERN, "").strip.empty?
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def continuation_keyword?(current_keyword, next_keyword)
|
|
262
|
+
CONTINUATION_PAIRS[current_keyword] == next_keyword
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def keyword_match_at(sql, position, keywords)
|
|
266
|
+
keywords.detect { |keyword| Tokenizer.keyword_at?(sql, position, keyword) }
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
@@ -216,7 +216,7 @@ module SqlBeautifier
|
|
|
216
216
|
end
|
|
217
217
|
|
|
218
218
|
def extract_explicit_alias(table_specification)
|
|
219
|
-
words = table_specification.strip.split(Constants::WHITESPACE_REGEX)
|
|
219
|
+
words = table_specification.strip.split(Constants::WHITESPACE_REGEX).grep_v(CommentStripper::SENTINEL_PATTERN)
|
|
220
220
|
return nil if words.length < 2
|
|
221
221
|
|
|
222
222
|
if words[1] == "as"
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
module SqlBeautifier
|
|
4
4
|
module Tokenizer
|
|
5
5
|
IDENTIFIER_CHARACTER = %r{[[:alnum:]_$]}
|
|
6
|
+
SENTINEL_MAX_LOOKBACK = 20
|
|
6
7
|
|
|
7
8
|
module_function
|
|
8
9
|
|
|
@@ -16,6 +17,11 @@ module SqlBeautifier
|
|
|
16
17
|
|
|
17
18
|
match_position = match.begin(0)
|
|
18
19
|
|
|
20
|
+
if inside_sentinel?(sql, match_position)
|
|
21
|
+
search_position = sentinel_end_position(sql, match_position) || (match_position + 1)
|
|
22
|
+
next
|
|
23
|
+
end
|
|
24
|
+
|
|
19
25
|
previous_character = character_before(sql, match_position)
|
|
20
26
|
next_character = character_after(sql, match_position, keyword.length)
|
|
21
27
|
|
|
@@ -104,6 +110,13 @@ module SqlBeautifier
|
|
|
104
110
|
next
|
|
105
111
|
end
|
|
106
112
|
|
|
113
|
+
if sentinel_at?(text, position)
|
|
114
|
+
end_position = sentinel_end_position(text, position)
|
|
115
|
+
current_segment << text[position...end_position]
|
|
116
|
+
position = end_position
|
|
117
|
+
next
|
|
118
|
+
end
|
|
119
|
+
|
|
107
120
|
case character
|
|
108
121
|
when Constants::SINGLE_QUOTE
|
|
109
122
|
inside_string_literal = true
|
|
@@ -201,6 +214,11 @@ module SqlBeautifier
|
|
|
201
214
|
next
|
|
202
215
|
end
|
|
203
216
|
|
|
217
|
+
if sentinel_at?(text, position)
|
|
218
|
+
position = sentinel_end_position(text, position)
|
|
219
|
+
next
|
|
220
|
+
end
|
|
221
|
+
|
|
204
222
|
case character
|
|
205
223
|
when Constants::SINGLE_QUOTE
|
|
206
224
|
inside_string_literal = true
|
|
@@ -252,6 +270,40 @@ module SqlBeautifier
|
|
|
252
270
|
text[position] == Constants::DOUBLE_QUOTE && text[position + 1] == Constants::DOUBLE_QUOTE
|
|
253
271
|
end
|
|
254
272
|
|
|
273
|
+
def dollar_quote_delimiter_at(text, position)
|
|
274
|
+
return "$$" if text[position, 2] == "$$"
|
|
275
|
+
return unless text[position] == "$"
|
|
276
|
+
|
|
277
|
+
closing_dollar_position = text.index("$", position + 1)
|
|
278
|
+
return unless closing_dollar_position
|
|
279
|
+
|
|
280
|
+
delimiter = text[position..closing_dollar_position]
|
|
281
|
+
tag = delimiter[1..-2]
|
|
282
|
+
return unless tag.match?(%r{\A[[:alpha:]_][[:alnum:]_]*\z})
|
|
283
|
+
|
|
284
|
+
delimiter
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def sentinel_at?(text, position)
|
|
288
|
+
text[position, CommentStripper::SENTINEL_PREFIX.length] == CommentStripper::SENTINEL_PREFIX
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def sentinel_end_position(text, position)
|
|
292
|
+
closing = text.index(CommentStripper::SENTINEL_SUFFIX, position + CommentStripper::SENTINEL_PREFIX.length)
|
|
293
|
+
return position + 1 unless closing
|
|
294
|
+
|
|
295
|
+
closing + CommentStripper::SENTINEL_SUFFIX.length
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def inside_sentinel?(text, position)
|
|
299
|
+
search_start = [position - SENTINEL_MAX_LOOKBACK, 0].max
|
|
300
|
+
prefix_position = text.rindex(CommentStripper::SENTINEL_PREFIX, position)
|
|
301
|
+
return false unless prefix_position && prefix_position >= search_start
|
|
302
|
+
|
|
303
|
+
end_position = sentinel_end_position(text, prefix_position)
|
|
304
|
+
position < end_position
|
|
305
|
+
end
|
|
306
|
+
|
|
255
307
|
def top_level?(sql, target_position)
|
|
256
308
|
parenthesis_depth = 0
|
|
257
309
|
inside_string_literal = false
|
|
@@ -285,6 +337,11 @@ module SqlBeautifier
|
|
|
285
337
|
next
|
|
286
338
|
end
|
|
287
339
|
|
|
340
|
+
if sentinel_at?(sql, position)
|
|
341
|
+
position = sentinel_end_position(sql, position)
|
|
342
|
+
next
|
|
343
|
+
end
|
|
344
|
+
|
|
288
345
|
case character
|
|
289
346
|
when Constants::SINGLE_QUOTE
|
|
290
347
|
inside_string_literal = true
|
|
@@ -337,6 +394,11 @@ module SqlBeautifier
|
|
|
337
394
|
next
|
|
338
395
|
end
|
|
339
396
|
|
|
397
|
+
if sentinel_at?(text, position)
|
|
398
|
+
position = sentinel_end_position(text, position)
|
|
399
|
+
next
|
|
400
|
+
end
|
|
401
|
+
|
|
340
402
|
case character
|
|
341
403
|
when Constants::SINGLE_QUOTE
|
|
342
404
|
inside_string_literal = true
|
data/lib/sql_beautifier.rb
CHANGED
|
@@ -7,8 +7,11 @@ require_relative "sql_beautifier/constants"
|
|
|
7
7
|
require_relative "sql_beautifier/util"
|
|
8
8
|
require_relative "sql_beautifier/configuration"
|
|
9
9
|
|
|
10
|
+
require_relative "sql_beautifier/comment_stripper"
|
|
11
|
+
require_relative "sql_beautifier/comment_restorer"
|
|
10
12
|
require_relative "sql_beautifier/normalizer"
|
|
11
13
|
require_relative "sql_beautifier/tokenizer"
|
|
14
|
+
require_relative "sql_beautifier/statement_splitter"
|
|
12
15
|
require_relative "sql_beautifier/table_registry"
|
|
13
16
|
require_relative "sql_beautifier/condition_formatter"
|
|
14
17
|
require_relative "sql_beautifier/subquery_formatter"
|
|
@@ -24,16 +27,19 @@ require_relative "sql_beautifier/clauses/order_by"
|
|
|
24
27
|
require_relative "sql_beautifier/clauses/having"
|
|
25
28
|
require_relative "sql_beautifier/clauses/limit"
|
|
26
29
|
require_relative "sql_beautifier/formatter"
|
|
30
|
+
require_relative "sql_beautifier/statement_assembler"
|
|
27
31
|
|
|
28
32
|
module SqlBeautifier
|
|
29
33
|
class Error < StandardError; end
|
|
30
34
|
|
|
31
35
|
module_function
|
|
32
36
|
|
|
33
|
-
def call(value)
|
|
37
|
+
def call(value, config = {})
|
|
34
38
|
return unless value.present?
|
|
35
39
|
|
|
36
|
-
|
|
40
|
+
with_configuration(config) do
|
|
41
|
+
StatementAssembler.call(value)
|
|
42
|
+
end
|
|
37
43
|
end
|
|
38
44
|
|
|
39
45
|
def configuration
|
|
@@ -45,10 +51,31 @@ module SqlBeautifier
|
|
|
45
51
|
end
|
|
46
52
|
|
|
47
53
|
def config_for(key)
|
|
54
|
+
overrides = Thread.current[:sql_beautifier_config]
|
|
55
|
+
return overrides[key] if overrides&.key?(key)
|
|
56
|
+
|
|
48
57
|
configuration.public_send(key)
|
|
49
58
|
end
|
|
50
59
|
|
|
51
60
|
def reset_configuration!
|
|
52
61
|
@configuration = Configuration.new
|
|
53
62
|
end
|
|
63
|
+
|
|
64
|
+
def with_configuration(config)
|
|
65
|
+
raise ArgumentError, "Expected a Hash for configuration overrides, got #{config.class}" unless config.is_a?(Hash)
|
|
66
|
+
|
|
67
|
+
return yield if config.empty?
|
|
68
|
+
|
|
69
|
+
previous = Thread.current[:sql_beautifier_config]
|
|
70
|
+
validate_configuration_keys!(config)
|
|
71
|
+
Thread.current[:sql_beautifier_config] = config
|
|
72
|
+
yield
|
|
73
|
+
ensure
|
|
74
|
+
Thread.current[:sql_beautifier_config] = previous if config.is_a?(Hash) && config.any?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def validate_configuration_keys!(config)
|
|
78
|
+
invalid_keys = config.keys - Configuration::DEFAULTS.keys
|
|
79
|
+
raise ArgumentError, "Unknown configuration keys: #{invalid_keys.join(', ')}" if invalid_keys.any?
|
|
80
|
+
end
|
|
54
81
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sql_beautifier
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kinnell Shah
|
|
@@ -46,6 +46,8 @@ files:
|
|
|
46
46
|
- lib/sql_beautifier/clauses/order_by.rb
|
|
47
47
|
- lib/sql_beautifier/clauses/select.rb
|
|
48
48
|
- lib/sql_beautifier/clauses/where.rb
|
|
49
|
+
- lib/sql_beautifier/comment_restorer.rb
|
|
50
|
+
- lib/sql_beautifier/comment_stripper.rb
|
|
49
51
|
- lib/sql_beautifier/condition_formatter.rb
|
|
50
52
|
- lib/sql_beautifier/configuration.rb
|
|
51
53
|
- lib/sql_beautifier/constants.rb
|
|
@@ -53,6 +55,8 @@ files:
|
|
|
53
55
|
- lib/sql_beautifier/cte_formatter.rb
|
|
54
56
|
- lib/sql_beautifier/formatter.rb
|
|
55
57
|
- lib/sql_beautifier/normalizer.rb
|
|
58
|
+
- lib/sql_beautifier/statement_assembler.rb
|
|
59
|
+
- lib/sql_beautifier/statement_splitter.rb
|
|
56
60
|
- lib/sql_beautifier/subquery_formatter.rb
|
|
57
61
|
- lib/sql_beautifier/table_registry.rb
|
|
58
62
|
- lib/sql_beautifier/tokenizer.rb
|