tsql_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ddacc151a5ff2930d1bf0e7501a53711503811c123fdcc1fa74a5e3926435c94
4
+ data.tar.gz: 3ac0fa643de5be751c55544bb2b226c8e8e5b062f56d8910db01f853c5400597
5
+ SHA512:
6
+ metadata.gz: eb1cd578f4346f3ed104d5a177ee1f7a3d67f5030fa748df64a85156a364ad38e0efe0e9fc48770d5f216d5a0857c2d65250caee3e28974392fd864a47873227
7
+ data.tar.gz: ace5d1a98579b17d3b48447691c61c3a0d346a090ba5462b54c6fa44c5ea7da1093825b71a6fb9b28e51f5e93785915f8e358e895011c236bf7ca3121865a84e
@@ -0,0 +1,195 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/formatter.rb
14
+ # object:
15
+ # TSqlParser::Parsing::Formatter
16
+
17
+ module TSqlParser::Parsing
18
+ require_relative "iterator"
19
+ require_relative "parser"
20
+ require_relative "text_formatter"
21
+ require_relative "model/sql_container"
22
+ require_relative "model/flat_sql_container"
23
+
24
+ class Formatter
25
+ def self.format(tokens, tab_count = 0, tab = " ")
26
+ containers = self.as_containers(tokens)
27
+ lines = self.combine_containers(containers)
28
+ lines = self.cleanup_whitespace(lines)
29
+ lines = self.insert_indentation(lines, tab_count, tab)
30
+ lines = self.insert_newlines(lines)
31
+ text = lines.join("\n")
32
+ text = TextFormatter.format_inserts(text, tab)
33
+ text = TextFormatter.format_updates(text, tab)
34
+ text = TextFormatter.format_joins(text, tab)
35
+ text = TextFormatter.format_wheres(text, tab)
36
+ text = TextFormatter.format_selects(text, tab)
37
+ text = TextFormatter.format_sets(text, tab)
38
+ text
39
+ end
40
+
41
+ private
42
+
43
+ def self.insert_newlines(lines)
44
+ new_lines = []
45
+ lines.each do |line|
46
+ first = line.strip.split(" ").first
47
+ if first.nil?
48
+ new_lines << ""
49
+ next
50
+ end
51
+ if %w[IF RETURN INSERT DELETE WHILE].include? first or first.start_with? "/*"
52
+ new_lines << ""
53
+ end
54
+ new_lines << line
55
+ end
56
+ new_lines
57
+ end
58
+
59
+ def self.insert_indentation(lines, tab_count = 0, tab = " ")
60
+ indented_lines = []
61
+ work_lines = []
62
+ lines.each do |line|
63
+ work_lines << line.split("\n")
64
+ end
65
+ sub_one = false
66
+ work_lines = work_lines.flatten
67
+ last = ''
68
+ work_lines.each_with_index do |line, index|
69
+ first = line.strip.split(" ").first
70
+
71
+ if %w[CASE BEGIN SELECT].include? first or line.strip.start_with? "CREATE PROCEDURE"
72
+ indented_lines << "#{tab * tab_count}#{line}"
73
+ tab_count += 1
74
+ elsif %w[END GO FROM].include? first and last != "DELETE"
75
+ tab_count -= 1 if tab_count > 0
76
+ indented_lines << "#{tab * tab_count}#{line}"
77
+ elsif %w[IF].include? first
78
+ indented_lines << "#{tab * tab_count}#{line}"
79
+ next_line = work_lines[index + 1] unless index + 1 > work_lines.size
80
+ sub_one = true unless next_line.start_with? "BEGIN"
81
+ tab_count += 1 if sub_one
82
+ last = first
83
+ next
84
+ else
85
+ indented_lines << "#{tab * tab_count}#{line}"
86
+ end
87
+
88
+ if sub_one
89
+ sub_one = false
90
+ tab_count -= 1
91
+ end
92
+ last = first
93
+ end
94
+ indented_lines
95
+ end
96
+
97
+ def self.cleanup_whitespace(combined)
98
+ lines = []
99
+ combined.each do |c|
100
+ lines << self.safe_ws_cleanup(c)
101
+ end
102
+ lines
103
+ end
104
+
105
+ def self.combine_containers(containers)
106
+ lines = []
107
+ containers.each do |c|
108
+ ct = c.get_token
109
+
110
+ builder = []
111
+ builder << ct[:value]
112
+
113
+ if c.has_siblings?
114
+ c.get_siblings.each do |sibling|
115
+ st = sibling.get_token
116
+
117
+ if st[:comment]
118
+ builder << "\n#{st[:value]}"
119
+ next
120
+ end
121
+
122
+ builder << st[:value]
123
+ end
124
+ end
125
+
126
+ lines << builder.join(" ")
127
+ end
128
+
129
+ lines
130
+ end
131
+
132
+ def self.as_containers(tokens)
133
+ containers = []
134
+ container = nil
135
+ tokens.each do |t|
136
+ if Parser.is_new_node_keyword? t[:value]
137
+ containers << container unless container.nil?
138
+ container = SqlContainer.new(t)
139
+ else
140
+ container.add t unless container.nil?
141
+ end
142
+ end
143
+ containers << container unless container.nil?
144
+ FlatSqlContainer.flatten_containers(containers)
145
+ end
146
+
147
+ def self.safe_ws_cleanup(line)
148
+ parts = []
149
+ builder = ""
150
+ in_string = false
151
+ line.split("").each do |c|
152
+ # if we run into a single-quote
153
+ # flip the in_string flag
154
+ if c == "'"
155
+ if not in_string
156
+ in_string = true
157
+ parts << builder unless builder.empty?
158
+ builder = ""
159
+ builder << c
160
+ else
161
+ in_string = false
162
+ builder << c
163
+ parts << builder
164
+ builder = ""
165
+ end
166
+ else
167
+ builder << c
168
+ end
169
+ end
170
+ parts << builder unless builder.empty?
171
+ parts.map do |p|
172
+ if p.start_with? "'" and p.end_with? "'"
173
+ p
174
+ else
175
+ self.fix_whitespace(p)
176
+ end
177
+ end.join
178
+ end
179
+
180
+ def self.fix_whitespace(line)
181
+ line.gsub(" , ", ", ")
182
+ #.gsub(' (', '(')
183
+ .gsub(" )", ")")
184
+ .gsub("( ", "(")
185
+ .gsub("AS(", "AS (")
186
+ .gsub("IN(", "IN (")
187
+ .gsub(",(", ", (")
188
+ .gsub("[ ", "[")
189
+ .gsub(" ]", "]")
190
+ .gsub("] .", "].")
191
+ .gsub(". [", ".[")
192
+ .gsub(" ;", ";")
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,54 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/iterator.rb
14
+ # object:
15
+ # TSqlParser::Parsing::TokenIterator
16
+
17
+ module TSqlParser::Parsing
18
+ class TokenIterator
19
+ def initialize(tokens)
20
+ @tokens = tokens
21
+ @size = tokens.size
22
+ @iter = -1
23
+ end
24
+
25
+ def has_next?
26
+ @iter < @size - 1
27
+ end
28
+
29
+ def get!
30
+ @tokens[@iter]
31
+ end
32
+
33
+ def peek!
34
+ @tokens[@iter + 1]
35
+ end
36
+
37
+ def peek_ahead!(length)
38
+ @tokens[@iter + length]
39
+ end
40
+
41
+ def peek_value!
42
+ self.peek![:value]
43
+ end
44
+
45
+ def peek_ahead_value!(length)
46
+ self.peek_ahead!(length)[:value]
47
+ end
48
+
49
+ def next!
50
+ @iter += 1
51
+ self.get!
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,93 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/keyword.rb
14
+ # object:
15
+ # TSqlParser::Parsing::Keyword
16
+
17
+ module TSqlParser::Parsing
18
+ class Keyword
19
+ def self.get_keywords
20
+ [].concat(self.get_reserved_keywords)
21
+ .concat(self.get_special_variables)
22
+ .concat(self.get_functions)
23
+ .concat(self.get_types)
24
+ end
25
+
26
+ def self.get_new_node_keywords
27
+ %w[CREATE ALTER DROP RENAME SELECT INSERT UPDATE DELETE WHILE IF ELSE DECLARE SET WITH BEGIN FROM WHERE INNER LEFT JOIN END GO GROUP ORDER CASE PRINT RETURN] \
28
+ - %w[WHERE SET]
29
+ end
30
+
31
+ def self.get_begin_keyword
32
+ "BEGIN"
33
+ end
34
+
35
+ def self.get_end_keyword
36
+ "END"
37
+ end
38
+
39
+ def self.get_join_keywords
40
+ %w[INNER OUTER LEFT RIGHT FULL CROSS JOIN]
41
+ end
42
+
43
+ def self.get_join_type_keywords
44
+ %w[INNER LEFT RIGHT CROSS FULL]
45
+ end
46
+
47
+ def self.get_newline_keywords
48
+ %w[INSERT UPDATE DELETE SELECT SET DECLARE CREATE FROM INNER FULL OUTER LEFT RIGHT CROSS JOIN IF BEGIN END RETURN WHERE PRINT GROUP ORDER WHILE]
49
+ end
50
+
51
+ def self.get_reserved_keywords
52
+ %w[ADD ALL ALTER AND ANY AS ASC AUTHORIZATION BACKUP BEGIN BETWEEN BREAK BROWSE BULK BY CASCADE CASE CHECK CHECKPOINT CLOSE CLUSTERED COALESCE COLLATE COLUMN COMMIT COMPUTE CONSTRAINT CONTAINS CONTAINSTABLE CONTINUE CONVERT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE DBCC DEALLOCATE DECLARE DEFAULT DELETE DENY DESC DISK DISTINCT DISTRIBUTED DOUBLE DROP DUMP ELSE END ERRLVL ESCAPE EXCEPT EXEC EXECUTE EXISTS EXIT EXTERNAL FETCH FILE FILLFACTOR FOR FOREIGN FREETEXT FREETEXTTABLE FROM FULL FUNCTION GOTO GRANT GROUP HAVING HOLDLOCK IDENTITY IDENTITY_INSERT IDENTITYCOL IF IN INDEX INNER INSERT INTERSECT INTO IS JOIN KEY KILL LEFT LIKE LINENO LOAD MERGE NATIONAL NOCHECK NONCLUSTERED NOT NULL NULLIF OF OFF OFFSETS ON OPEN OPENDATASOURCE OPENQUERY OPENROWSET OPENXML OPTION OR ORDER OUTER OVER PERCENT PIVOT PLAN PRECISION PRIMARY PRINT PROC PROCEDURE PUBLIC RAISERROR READ READTEXT RECONFIGURE REFERENCES REPLICATION RESTORE RESTRICT RETURN REVERT REVOKE RIGHT ROLLBACK ROWCOUNT ROWGUIDCOL RULE SAVE SCHEMA SECURITYAUDIT SELECT SEMANTICKEYPHRASETABLE SEMANTICSIMILARITYDETAILSTABLE SEMANTICSIMILARITYTABLE SESSION_USER SET SETUSER SHUTDOWN SOME STATISTICS SYSTEM_USER TABLE TABLESAMPLE TEXTSIZE THEN TO TOP TRAN TRANSACTION TRIGGER TRUNCATE TRY_CONVERT TSEQUAL UNION UNIQUE UNPIVOT UPDATE UPDATETEXT USE USER VALUES VARYING VIEW WAITFOR WHEN WHERE WHILE WITH WITHIN WRITETEXT]
53
+ end
54
+
55
+ def self.get_special_variables
56
+ %w[@@ERROR @@FETCH_STATUS @@IDENTITY @@LOCK_TIMEOUT @@NESTLEVEL @@ROWCOUNT @@SERVERNAME @@SPID @@SQLSTATUS @@TRANCOUNT @@VERSION]
57
+ end
58
+
59
+ def self.get_types
60
+ %w[BIGINT BINARY BIT CHAR CURSOR DATE DATETIME DATETIME2 DATETIMEOFFSET DECIMAL FLOAT HIERARCHYID IMAGE INT MONEY NCHAR NTEXT NUMERIC NVARCHAR REAL ROWVERSION SMALLDATETIME SMALLINT SMALLMONEY SQL_VARIANT TABLE TEXT TIME TINYINT UNIQUEIDENTIFIER VARBINARY VARCHAR XML]
61
+ end
62
+
63
+ def self.get_functions
64
+ [].concat(self.get_math_functions)
65
+ .concat(self.get_conversion_functions)
66
+ .concat(self.get_string_functions)
67
+ .concat(self.get_aggregate_functions)
68
+ .concat(self.get_date_functions)
69
+ end
70
+
71
+ # Functions
72
+
73
+ def self.get_math_functions
74
+ %w[ABS ACOS ASIN ATAN ATN2 CEILING COS COT DEGREES EXP FLOOR LOG LOG10 PI POWER RADIANS RAND ROUND SIGN SIN SQRT SQUARE TAN]
75
+ end
76
+
77
+ def self.get_conversion_functions
78
+ %w[CAST CONVERT PARSE TRY_CAST TRY_CONVERT TRY_PARSE]
79
+ end
80
+
81
+ def self.get_string_functions
82
+ %w[ASCII CHAR CHARINDEX CONCAT CONCAT_WS DIFFERENCE FORMAT LEFT LEN LOWER LTRIM NCHAR PATINDEX QUOTENAME REPLACE REPLICATE REVERSE RIGHT RTRIM SOUNDEX SPACE STR STRING_AGG STRING_ESCAPE STRING_SPLIT STUFF SUBSTRING TRANSLATE TRIM UNICODE UPPER]
83
+ end
84
+
85
+ def self.get_aggregate_functions
86
+ %w[APPROX_COUNT_DISTINCT AVG CHECKSUM_AGG COUNT COUNT_BIG GROUPING GROUPING_ID MAX MIN STDEV STDEVP STRING_AGG SUM VAR VARP]
87
+ end
88
+
89
+ def self.get_date_functions
90
+ %w[CURRENT_TIMESTAMP CURRENT_TIMEZONE CURRENT_TIMEZONE_ID DATE_BUCKET DATEADD DATEDIFF DATEDIFF_BIG DATEFROMPARTS DATENAME DATEPART DATETIME2FROMPARTS DATETIMEFROMPARTS DATETIMEOFFSETFROMPARTS DATETRUNC DAY EOMONTH FORMAT GETDATE GETUTCDATE ISDATE MONTH SMALLDATETIMEFROMPARTS SWITCHOFFSET SYSDATETIME SYSDATETIMEOFFSET SYSUTCDATETIME TIMEFROMPARTS TODATETIMEOFFSET YEAR]
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,77 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/model/flat_sql_container.rb
14
+ # object:
15
+ # TSqlParser::Parsing::FlatSqlContainer
16
+
17
+ module TSqlParser::Parsing
18
+ class FlatSqlContainer
19
+ def initialize(token = nil)
20
+ @token = token
21
+ @children = []
22
+ @siblings = []
23
+
24
+ unless token.nil?
25
+ if token.has_nodes?
26
+ token.get_nodes.each do |n|
27
+ @siblings << n
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ def self.flatten_containers(containers)
34
+ flat_containers = []
35
+ containers.each do |c|
36
+ flat_containers << FlatSqlContainer.new(c)
37
+ end
38
+ flat_containers
39
+ end
40
+
41
+ def set_token(token)
42
+ @token = token
43
+ end
44
+
45
+ def add_sibling(token)
46
+ @siblings << token
47
+ end
48
+
49
+ def add_child(token)
50
+ @children << SqlContainer.new(token)
51
+ end
52
+
53
+ def has_siblings?
54
+ @siblings.size > 0
55
+ end
56
+
57
+ def has_children?
58
+ @children.size > 0
59
+ end
60
+
61
+ def get_siblings
62
+ @siblings
63
+ end
64
+
65
+ def get_children
66
+ @children
67
+ end
68
+
69
+ def get_token
70
+ @token.get_token
71
+ end
72
+
73
+ def to_s
74
+ @token.get_token[:value] unless @token.nil?
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,48 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/model/sql_container.rb
14
+ # object:
15
+ # TSqlParser::Parsing::SqlContainer
16
+
17
+ module TSqlParser::Parsing
18
+ class SqlContainer
19
+ def initialize(token = nil)
20
+ @token = token
21
+ @nodes = []
22
+ end
23
+
24
+ def set_token(token)
25
+ @token = token
26
+ end
27
+
28
+ def add(token)
29
+ @nodes << SqlContainer.new(token)
30
+ end
31
+
32
+ def has_nodes?
33
+ @nodes.size > 0
34
+ end
35
+
36
+ def get_nodes
37
+ @nodes
38
+ end
39
+
40
+ def get_token
41
+ @token
42
+ end
43
+
44
+ def to_s
45
+ @token[:value] unless @token.nil?
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,145 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/parser.rb
14
+ # object:
15
+ # TSqlParser::Parsing::Parser
16
+
17
+ module TSqlParser::Parsing
18
+ require_relative "keyword"
19
+
20
+ class Parser
21
+ def self.is_multiline_comment_start?(c, next_c)
22
+ c == "/" and next_c == "*"
23
+ end
24
+
25
+ def self.is_multiline_comment_end?(c, next_c)
26
+ c == "*" and next_c == "/"
27
+ end
28
+
29
+ def self.is_comment_start?(c, next_c)
30
+ c == "-" and next_c == "-"
31
+ end
32
+
33
+ def self.is_operator?(s)
34
+ ["<>", "!=", "<=", ">=", "!<", "!>", "+=", "-=", "*=", "/=", "%=", "==", "=", "+", "-", "%", "/", "*", "<", ">"].include? s
35
+ end
36
+
37
+ def self.is_one_char_op?(c)
38
+ ["=", "+", "-", "%", "/", "*", "<", ">"].include? c
39
+ end
40
+
41
+ def self.is_two_char_op?(c, next_c)
42
+ ["<>", "!=", "<=", ">=", "==", "!<", "!>", "+=", "-=", "*=", "/=", "%="].include? "#{c}#{next_c}"
43
+ end
44
+
45
+ def self.is_numeric?(s)
46
+ s.match? /\A-?+(?=.??\d)\d*\.?\d*\z/
47
+ end
48
+
49
+ def self.is_variable?(s)
50
+ s.start_with? "@"
51
+ end
52
+
53
+ def self.is_special_variable?(s)
54
+ s.start_with? "@@"
55
+ end
56
+
57
+ def self.is_temp_table?(s)
58
+ s.start_with? "#"
59
+ end
60
+
61
+ def self.is_label?(s)
62
+ s.end_with? ":"
63
+ end
64
+
65
+ def self.is_parenthesis?(s)
66
+ ["(", ")"].include? s
67
+ end
68
+
69
+ def self.is_open_parenthesis?(s)
70
+ s == "("
71
+ end
72
+
73
+ def self.is_close_parenthesis?(s)
74
+ s == ")"
75
+ end
76
+
77
+ def self.is_bracket?(s)
78
+ ["[", "]"].include? s
79
+ end
80
+
81
+ def self.is_open_bracket?(s)
82
+ s == "["
83
+ end
84
+
85
+ def self.is_close_bracket?(s)
86
+ s == "]"
87
+ end
88
+
89
+ def self.is_string_mark?(s)
90
+ s == "'"
91
+ end
92
+
93
+ def self.is_comma?(s)
94
+ s == ","
95
+ end
96
+
97
+ def self.is_comment?(s)
98
+ (s.start_with? "/*" and s.end_with? "*/") or s.start_with? "--"
99
+ end
100
+
101
+ def self.is_keyword?(s)
102
+ Keyword.get_keywords.include? s.upcase
103
+ end
104
+
105
+ def self.is_begin?(s)
106
+ Keyword.get_begin_keyword == s.upcase
107
+ end
108
+
109
+ def self.is_end?(s)
110
+ Keyword.get_end_keyword == s.upcase
111
+ end
112
+
113
+ def self.is_join?(s)
114
+ Keyword.get_join_keywords.include? s.upcase
115
+ end
116
+
117
+ def self.is_join_type?(s)
118
+ Keyword.get_join_type_keywords.include? s.upcase
119
+ end
120
+
121
+ def self.is_function?(s)
122
+ Keyword.get_functions.include? s.upcase
123
+ end
124
+
125
+ def self.is_type?(s)
126
+ Keyword.get_types.include? s.upcase
127
+ end
128
+
129
+ def self.is_special_variable?(s)
130
+ Keyword.get_special_variables.include? s.upcase
131
+ end
132
+
133
+ def self.is_newline_required?(s)
134
+ Keyword.get_newline_keywords.include? s.upcase
135
+ end
136
+
137
+ def self.is_new_node_keyword?(s)
138
+ Keyword.get_new_node_keywords.include? s.upcase
139
+ end
140
+
141
+ def self.is_terminator?(s)
142
+ s == ";"
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,247 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/formatter.rb
14
+ # object:
15
+ # TSqlParser::Parsing::Formatter
16
+
17
+ module TSqlParser::Parsing
18
+ class TextFormatter
19
+ def self.format_sets(text, tab = " ")
20
+ formatted = []
21
+ lines = text.split("\n")
22
+ lines.each do |line|
23
+ first = line.strip.split(" ").first
24
+ if first == "SET" and not line.strip.start_with? "SET @"
25
+ tab_count = self.get_tab_count(line, tab)
26
+ set = line.strip[first.size + 1..]
27
+ new_set = self.format_set(set, tab_count, tab)
28
+ if new_set.nil?
29
+ formatted << line
30
+ next
31
+ end
32
+ formatted << line.sub(set, new_set)
33
+ elsif first != "SET" and line.include? " SET "
34
+ parts = line.strip.split(" SET ")
35
+ tab_count = self.get_tab_count(line, tab)
36
+ formatted << "#{tab * tab_count}#{parts[0]}\n"
37
+ parts[1..].each {|p| formatted << "#{tab * tab_count}SET #{p}" }
38
+ else
39
+ formatted << line
40
+ end
41
+ end
42
+ formatted.join("\n")
43
+ end
44
+
45
+ def self.format_joins(text, tab = " ")
46
+ text = text.gsub(/INNER\s+JOIN/, "INNER JOIN")
47
+ .gsub(/LEFT\s+JOIN/, "LEFT JOIN")
48
+ lines = text.split("\n")
49
+ new_text = []
50
+
51
+ lines.each do |line|
52
+ first = line.strip.split(" ").first
53
+ if line.include? " WHERE " and first != "WHERE" and not first.start_with? "--" and not first.start_with? "/*"
54
+ tab_count = self.get_tab_count(line, tab)
55
+ where_parts = line.strip.split(" WHERE ")
56
+ where_text = []
57
+ where_text << "#{tab * tab_count}#{where_parts[0]}"
58
+ where_text << "#{tab * tab_count}WHERE #{where_parts[1]}"
59
+ new_text << where_text.join("\n")
60
+ else
61
+ new_text << line
62
+ end
63
+ end
64
+
65
+ new_text.join("\n")
66
+ end
67
+
68
+ def self.format_updates(text, tab = " ")
69
+ formatted = []
70
+ lines = text.split("\n")
71
+ lines.each do |line|
72
+ first = line.strip.split(" ").first
73
+ if first != "UPDATE"
74
+ formatted << line
75
+ next
76
+ end
77
+
78
+ tab_count = self.get_tab_count(line, tab)
79
+ update = line.strip[first.size + 1..]
80
+ new_update = self.format_update(update, tab_count, tab)
81
+ if new_update.nil?
82
+ formatted << line
83
+ next
84
+ end
85
+ formatted << line.sub(update, new_update)
86
+ end
87
+ formatted.join("\n")
88
+ end
89
+
90
+ def self.format_inserts(text, tab = " ")
91
+ formatted = []
92
+ lines = text.split("\n")
93
+ search = "INSERT INTO"
94
+ lines.each do |line|
95
+ first = line.strip.split(" ").first
96
+ if first != "INSERT"
97
+ formatted << line
98
+ next
99
+ end
100
+
101
+ tab_count = self.get_tab_count(line, tab)
102
+ insert = line.strip[search.size + 1..]
103
+ new_insert = self.format_insert(insert, tab_count, tab)
104
+ if new_insert.nil?
105
+ formatted << line
106
+ next
107
+ end
108
+ formatted << line.sub(insert, new_insert)
109
+ end
110
+ formatted.join("\n")
111
+ end
112
+
113
+ def self.format_selects(text, tab = " ")
114
+ formatted = []
115
+ lines = text.split("\n")
116
+ lines.each do |line|
117
+ first = line.strip.split(" ").first
118
+ if first != "SELECT"
119
+ formatted << line
120
+ next
121
+ end
122
+
123
+ tab_count = self.get_tab_count(line, tab)
124
+ select_sql = line.strip[first.size + 1..]
125
+ new_select = self.format_select(select_sql, tab_count, tab)
126
+ if new_select.nil?
127
+ formatted << line
128
+ next
129
+ end
130
+ formatted << line.sub(select_sql, new_select)
131
+ end
132
+ formatted.join("\n")
133
+ end
134
+
135
+ def self.format_wheres(text, tab = " ")
136
+ formatted = []
137
+ text.split("\n").each do |line|
138
+ first = line.strip.split(" ").first
139
+ if first != "WHERE"
140
+ formatted << line
141
+ next
142
+ end
143
+
144
+ tab_count = self.get_tab_count(line, tab)
145
+ predicate = line.strip[first.size + 1..]
146
+ new_predicate = self.format_predicate(predicate, tab_count, tab)
147
+ if new_predicate.nil?
148
+ formatted << line
149
+ next
150
+ end
151
+ formatted << line.sub(predicate, new_predicate)
152
+ end
153
+
154
+ formatted.join("\n")
155
+ end
156
+
157
+ private
158
+
159
+ def self.format_set(s, tab_count = 0, tab = " ")
160
+ parts = []
161
+ builder = ''
162
+ parenthesis = 0
163
+ s.split('').each do |c|
164
+ parenthesis += 1 if c == '('
165
+ parenthesis -= 1 if c == ')'
166
+ if c == ','
167
+ if parenthesis > 0
168
+ builder << c
169
+ else
170
+ parts << builder
171
+ builder = ''
172
+ end
173
+ else
174
+ builder << c
175
+ end
176
+ end
177
+ parts << builder unless builder.empty?
178
+ "\n#{parts.map {|p| "#{tab * (tab_count + 1)}#{p.strip}"}.join(",\n")}"
179
+ end
180
+
181
+ def self.format_update(s, tab_count = 0, tab = " ")
182
+ formatted = []
183
+ parts = s.split(" SET ")
184
+ table = parts[0]
185
+ where_parts = parts[1].split(" WHERE ")
186
+ formatted << "\n#{tab * (tab_count + 1)}#{table}"
187
+ formatted << "#{tab * tab_count}SET #{where_parts[0]}"
188
+ formatted << "#{tab * tab_count}WHERE #{where_parts[1]}" if where_parts.size > 0
189
+ formatted.join("\n")
190
+ end
191
+
192
+ def self.format_insert(s, tab_count = 0, tab = " ")
193
+ formatted = []
194
+ if s.include? ") VALUES ("
195
+ tokens = s.split(") VALUES (")
196
+ table = tokens[0][..tokens[0].index("(") - 2]
197
+ columns = tokens[0][tokens[0].index("(") + 1..]
198
+ values = tokens[1][..-2]
199
+ formatted << "\n#{tab * (tab_count + 1)}#{table}"
200
+ formatted << "#{tab * (tab_count + 2)}(#{columns})"
201
+ formatted << "#{tab * (tab_count + 1)}VALUES"
202
+ formatted << "#{tab * (tab_count + 2)}(#{values})"
203
+ end
204
+ formatted.join("\n") unless formatted.empty?
205
+ end
206
+
207
+ def self.format_select(s, tab_count = 0, tab = " ")
208
+ tokens = s.split(", ")
209
+ "\n#{tokens.map { |t| "#{tab * (tab_count + 1)}#{t}" }.join(",\n")}"
210
+ end
211
+
212
+ def self.format_predicate(s, tab_count = 0, tab = " ")
213
+ indented = []
214
+ formatted = []
215
+ builder = []
216
+
217
+ tokens = s.split(" ")
218
+ tokens.each do |t|
219
+ if %w[AND OR].include? t
220
+ formatted << builder.join(" ") unless builder.empty?
221
+ builder = [t]
222
+ else
223
+ builder << t
224
+ end
225
+ end
226
+ formatted << builder.join(" ")
227
+
228
+ level = tab_count
229
+ formatted.each_with_index do |f, i|
230
+ indented << "#{tab * (level + 1)}#{f}"
231
+ level -= f.count(")")
232
+ level += f.count("(")
233
+ end
234
+
235
+ "\n#{indented.join("\n")}"
236
+ end
237
+
238
+ def self.get_tab_count(line, tab = " ")
239
+ tab_count = 0
240
+ while line.start_with? tab
241
+ tab_count += 1
242
+ line = line.sub(tab, "")
243
+ end
244
+ tab_count
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,151 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # parsing/tokenizer.rb
14
+ # object:
15
+ # TSqlParser::Parsing::Tokenizer
16
+
17
+ module TSqlParser::Parsing
18
+ require_relative "parser"
19
+
20
+ class Tokenizer
21
+ def self.tokenize(tsql_string)
22
+ tokens = basic_tokenize(
23
+ tsql_string,
24
+ ["(", ",", ")", "=", "+", "-", "%", "/", "*", "<", "!", ">", "'", "[", "]", ";"],
25
+ [" ", "\n", "\t"]
26
+ )
27
+ tokens.map do |t|
28
+ categorize(t)
29
+ end
30
+ end
31
+
32
+ def self.categorize(s)
33
+ data = {}
34
+ data[:value] = s
35
+ data[:keyword] = true if Parser.is_keyword? s
36
+ data[:operator] = true if Parser.is_operator? s
37
+ data[:function] = true if Parser.is_function? s
38
+ data[:type] = true if Parser.is_type? s
39
+ data[:comment] = true if Parser.is_comment? s
40
+ data[:numeric] = true if Parser.is_numeric? s
41
+ data[:special_variable] = true if Parser.is_special_variable? s
42
+ data[:variable] = true if Parser.is_variable? s
43
+ data[:temporary_table] = true if Parser.is_temp_table? s
44
+ data[:label] = true if Parser.is_label? s
45
+ data[:parenthesis] = true if Parser.is_parenthesis? s
46
+ data[:open_parenthesis] = true if Parser.is_open_parenthesis? s
47
+ data[:close_parenthesis] = true if Parser.is_close_parenthesis? s
48
+ data[:bracket] = true if Parser.is_bracket? s
49
+ data[:open_bracket] = true if Parser.is_open_bracket? s
50
+ data[:close_bracket] = true if Parser.is_close_bracket? s
51
+ data[:string_mark] = true if Parser.is_string_mark? s
52
+ data[:comma] = true if Parser.is_comma? s
53
+ data[:join] = true if Parser.is_join? s
54
+ data[:join_type] = true if Parser.is_join_type? s
55
+ data[:begin] = true if Parser.is_begin? s
56
+ data[:end] = true if Parser.is_end? s
57
+ data[:terminator] = true if Parser.is_terminator? s
58
+ data[:value] = data[:value].upcase if data[:keyword] or data[:function] or data[:type]
59
+ data[:needs_newline] = true if data[:keyword] and Parser.is_newline_required? s
60
+ data
61
+ end
62
+
63
+ def self.basic_tokenize(tsql_string, char_delimiters, skip_delimiters)
64
+ specific_tokens = []
65
+ delimiters = ([] << char_delimiters << skip_delimiters).flatten
66
+ builder = ""
67
+ tsql_chars = tsql_string.split("")
68
+ multiline_comment = false
69
+ comment = false
70
+ string = false
71
+ string_count = 0
72
+ skip_count = 0
73
+ tsql_chars.each_with_index do |c, i|
74
+ if skip_count > 0
75
+ skip_count -= 1
76
+ next
77
+ end
78
+
79
+ next_c = tsql_chars[i + 1] unless i + 1 > tsql_chars.size
80
+
81
+ if Parser.is_multiline_comment_start?(c, next_c)
82
+ multiline_comment = true
83
+ specific_tokens << builder unless builder.empty?
84
+ builder = c
85
+ next
86
+ end
87
+
88
+ if Parser.is_multiline_comment_end?(c, next_c)
89
+ skip_count = 1
90
+ multiline_comment = false
91
+ builder << c << next_c
92
+ specific_tokens << builder unless builder.empty?
93
+ builder = ""
94
+ next
95
+ end
96
+
97
+ if Parser.is_comment_start?(c, next_c)
98
+ comment = true
99
+ skip_count = 1
100
+ specific_tokens << builder unless builder.empty?
101
+ builder = "--"
102
+ next
103
+ end
104
+
105
+ if c == "'" and not multiline_comment and not comment
106
+ if not string
107
+ string = true
108
+ specific_tokens << builder unless builder.empty?
109
+ builder = c
110
+ next
111
+ else
112
+ string = false
113
+ builder << c
114
+ specific_tokens << builder unless builder.empty?
115
+ builder = ""
116
+ next
117
+ end
118
+ end
119
+
120
+ if Parser.is_two_char_op?(c, next_c)
121
+ skip_count = 1
122
+ specific_tokens << builder unless builder.empty?
123
+ specific_tokens << "#{c}#{next_c}"
124
+ builder = ""
125
+ next
126
+ end
127
+
128
+ if comment and c != "\n"
129
+ builder << c
130
+ next
131
+ elsif comment and c == "\n"
132
+ specific_tokens << builder unless builder.empty?
133
+ builder = ""
134
+ comment = false
135
+ next
136
+ end
137
+
138
+ if delimiters.include? c and !multiline_comment and !string
139
+ specific_tokens << builder unless builder.empty?
140
+ specific_tokens << c unless skip_delimiters.include? c
141
+ builder = ""
142
+ next
143
+ end
144
+
145
+ builder << c
146
+ end
147
+ specific_tokens << builder unless builder.empty?
148
+ specific_tokens
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,38 @@
1
+ # __ .__
2
+ # _/ |_ ___________| | ___________ _______ ______ ___________
3
+ # \ __\/ ___/ ____/ | ______ \____ \__ \\_ __ \/ ___// __ \_ __ \
4
+ # | | \___ < <_| | |__ /_____/ | |_> > __ \| | \/\___ \\ ___/| | \/
5
+ # |__| /____ >__ |____/ | __(____ /__| /____ >\___ >__|
6
+ # \/ |__| |__| \/ \/ \/
7
+ #
8
+ # A very light-weight and opinionated T-SQL parser and formatter.
9
+ #
10
+ # github.com/scstauf
11
+ #
12
+ # path:
13
+ # tsql_parser.rb
14
+ # object:
15
+ # TSqlParser
16
+
17
+ module TSqlParser
18
+ # Formats a SQL string.
19
+ #
20
+ # @param sql [String] the SQL string to format.
21
+ # @param tab_count [Integer] the number of tabs to start with.
22
+ # @param tab [String] the tab string.
23
+ # @return [String] the formatted SQL string.
24
+ def self.format(sql, tab_count = 0, tab = " ")
25
+ require_relative "parsing/formatter"
26
+ tokens = self.parse(sql)
27
+ Parsing::Formatter.format(tokens, tab_count, tab)
28
+ end
29
+
30
+ # Parses SQL string into token hashes.
31
+ #
32
+ # @param sql [String] the SQL string to parse.
33
+ # @return [Array] the token hashes.
34
+ def self.parse(sql)
35
+ require_relative "parsing/tokenizer"
36
+ Parsing::Tokenizer.tokenize(sql)
37
+ end
38
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tsql_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Scott Stauffer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-03-27 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A very light-weight and opinionated T-SQL parser and formatter.
14
+ email: scott@fuseraft.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/parsing/formatter.rb
20
+ - lib/parsing/iterator.rb
21
+ - lib/parsing/keyword.rb
22
+ - lib/parsing/model/flat_sql_container.rb
23
+ - lib/parsing/model/sql_container.rb
24
+ - lib/parsing/parser.rb
25
+ - lib/parsing/text_formatter.rb
26
+ - lib/parsing/tokenizer.rb
27
+ - lib/tsql_parser.rb
28
+ homepage: https://rubygems.org/gems/tsql_parser
29
+ licenses:
30
+ - MIT
31
+ metadata:
32
+ source_code_uri: https://github.com/scstauf/tsql_parser
33
+ documentation_uri: https://www.rubydoc.info/github/scstauf/tsql_parser
34
+ post_install_message:
35
+ rdoc_options: []
36
+ require_paths:
37
+ - lib
38
+ required_ruby_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 2.7.0
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirements: []
49
+ rubygems_version: 3.3.5
50
+ signing_key:
51
+ specification_version: 4
52
+ summary: A very light-weight and opinionated T-SQL parser and formatter.
53
+ test_files: []