anbt-sql-formatter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,73 @@
1
+ require "pp"
2
+
3
+ class Stack
4
+ include Enumerable
5
+
6
+ def initialize
7
+ @arr = []
8
+ end
9
+
10
+ def each
11
+ @arr.each{|item|
12
+ yield item
13
+ }
14
+ end
15
+
16
+ def clear
17
+ @arr.clear
18
+ end
19
+
20
+ def push(o)
21
+ @arr.push o
22
+ end
23
+
24
+ def pop
25
+ @arr.pop
26
+ end
27
+ end
28
+
29
+
30
+ class String
31
+ def endsWith(c)
32
+ self[-1] == c ? true : false
33
+ end
34
+
35
+ def startsWith(c)
36
+ self[0] == c ? true : false
37
+ end
38
+
39
+ def charAt(n)
40
+ self[n..n]
41
+ end
42
+
43
+ def equals(str)
44
+ self == str
45
+ end
46
+
47
+ def equalsIgnoreCase(other)
48
+ self.upcase == other.upcase
49
+ end
50
+
51
+ def trim
52
+ self.strip
53
+ end
54
+ end
55
+
56
+
57
+ class Array
58
+ def remove(n)
59
+ self.delete_at n
60
+ end
61
+
62
+ def get(n)
63
+ if n >= self.size || n <= -1
64
+ raise IndexOutOfBoundsException
65
+ end
66
+
67
+ self[n]
68
+ end
69
+
70
+ def add(n,o)
71
+ self.insert(n,o)
72
+ end
73
+ end
@@ -0,0 +1,327 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "pp"
4
+
5
+ require "anbt-sql-formatter/token"
6
+ require "anbt-sql-formatter/constants"
7
+ require "anbt-sql-formatter/helper"
8
+ require "anbt-sql-formatter/coarse-tokenizer"
9
+
10
+ class AnbtSql
11
+ class Parser
12
+
13
+ def initialize(rule)
14
+ @rule = rule
15
+
16
+ # 解析前の文字列
17
+ @before = nil
18
+
19
+ # 解析中の位置
20
+ @pos = nil
21
+
22
+ # 解析中の文字。
23
+ @char = nil
24
+
25
+ @token_pos = nil
26
+
27
+ # 2文字からなる記号。
28
+ # なお、|| は文字列結合にあたります。
29
+ @two_character_symbol = [ "<>", "<=", ">=", "||" ]
30
+ end
31
+
32
+
33
+ ##
34
+ # 2005.07.26:: Tosiki Iga \r も処理範囲に含める必要があります。
35
+ # 2005.08.12:: Tosiki Iga 65535(もとは-1)はホワイトスペースとして扱うよう変更します。
36
+ def space?(c)
37
+ return c == ' ' ||
38
+ c == "\t" ||
39
+ c == "\n" ||
40
+ c == "\r" ||
41
+ c == 65535
42
+ end
43
+
44
+
45
+ ##
46
+ # 文字として認識して妥当かどうかを判定します。
47
+ # 全角文字なども文字として認識を許容するものと判断します。
48
+ def letter?(c)
49
+ return false if space?(c)
50
+ return false if digit?(c)
51
+ return false if symbol?(c)
52
+
53
+ true
54
+ end
55
+
56
+
57
+ def digit?(c)
58
+ return "0" <= c && c <= '9'
59
+ end
60
+
61
+
62
+ ##
63
+ # "#" は文字列の一部とします
64
+ # アンダースコアは記号とは扱いません
65
+ # これ以降の文字の扱いは保留
66
+ def symbol?(c)
67
+ %w(" ? % & ' \( \) | * + , - . / : ; < = > ).include? c
68
+ #"
69
+ end
70
+
71
+
72
+ ##
73
+ # トークンを次に進めます。
74
+ # 1. posを進める。
75
+ # 2. sに結果を返す。
76
+ # 3. typeにその種類を設定する。
77
+ # 不正なSQLの場合、例外が発生します。
78
+ # ここでは、文法チェックは行っていない点に注目してください。
79
+ def next_sql_token
80
+ $stderr.puts "next_token #{@pos} <#{@before}> #{@before.length}" if $DEBUG
81
+
82
+ start_pos = @pos
83
+
84
+ if @pos >= @before.length
85
+ @pos += 1
86
+ return nil
87
+ end
88
+
89
+ @char = @before.charAt(@pos)
90
+
91
+ if space?(@char)
92
+ workString = ""
93
+ loop {
94
+ workString += @char
95
+
96
+ @char = @before.charAt(@pos+1)
97
+ if not space?(@char)
98
+ @pos += 1
99
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SPACE,
100
+ workString, start_pos)
101
+ end
102
+
103
+ @pos += 1
104
+
105
+ if @pos >= @before.length()
106
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SPACE,
107
+ workString, start_pos)
108
+ end
109
+ }
110
+
111
+
112
+ elsif @char == ";"
113
+ @pos += 1
114
+ # 2005.07.26 Tosiki Iga セミコロンは終了扱いではないようにする。
115
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SYMBOL,
116
+ ";", start_pos)
117
+
118
+ elsif digit?(@char)
119
+ if /(0x[0-9a-fA-F]+)/ =~ @before[@pos..-1] || # hex
120
+ /(\d+(\.\d+(e-?\d+)?)?)/ =~ @before[@pos..-1] # float or scientific
121
+ num = $1
122
+ @pos += num.length
123
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
124
+ num, start_pos)
125
+ end
126
+
127
+ s = ""
128
+ while (digit?(@char) || @char == '.')
129
+ # if (ch == '.') type = Token.REAL
130
+ s += @char
131
+ @pos += 1
132
+
133
+ if (@pos >= @before.length)
134
+ # 長さを超えている場合には処理中断します。
135
+ break
136
+ end
137
+
138
+ @char = @before.charAt(@pos)
139
+ end
140
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
141
+ s, start_pos)
142
+
143
+
144
+ elsif letter?(@char)
145
+ s = ""
146
+ # 文字列中のドットについては、文字列と一体として考える。
147
+ while (letter?(@char) || digit?(@char) || @char == '.')
148
+ s += @char
149
+ @pos += 1
150
+ if (@pos >= @before.length())
151
+ break
152
+ end
153
+
154
+ @char = @before.charAt(@pos)
155
+ end
156
+
157
+ if AnbtSql::Constants::SQL_RESERVED_WORDS.map{|w| w.upcase }.include?(s.upcase)
158
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::KEYWORD,
159
+ s, start_pos)
160
+ end
161
+
162
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::NAME,
163
+ s, start_pos)
164
+
165
+ elsif symbol?(@char)
166
+ s = "" + @char
167
+ @pos += 1
168
+ if (@pos >= @before.length())
169
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SYMBOL,
170
+ s, start_pos)
171
+ end
172
+
173
+ # 2文字の記号かどうか調べる
174
+ ch2 = @before.charAt(@pos)
175
+ #for (int i = 0; i < two_character_symbol.length; i++) {
176
+ for i in 0...@two_character_symbol.length
177
+ if (@two_character_symbol[i].charAt(0) == @char &&
178
+ @two_character_symbol[i].charAt(1) == ch2)
179
+ @pos += 1
180
+ s += ch2
181
+ break
182
+ end
183
+ end
184
+
185
+ if @char == "-" &&
186
+ /^(\d+(\.\d+(e-?\d+)?)?)/ =~ @before[@pos..-1] # float or scientific
187
+ num = $1
188
+ @pos += num.length
189
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
190
+ s + num, start_pos)
191
+ end
192
+
193
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SYMBOL,
194
+ s, start_pos)
195
+
196
+
197
+ else
198
+ @pos += 1
199
+ return AnbtSql::Token.new( AnbtSql::TokenConstants::UNKNOWN,
200
+ "" + @char,
201
+ start_pos )
202
+ end
203
+ end
204
+
205
+
206
+ def prepare_tokens(coarse_tokens)
207
+ @tokens = []
208
+
209
+ pos = 0
210
+ while pos < coarse_tokens.size
211
+ coarse_token = coarse_tokens[pos]
212
+
213
+ case coarse_token._type
214
+
215
+ when :quote_single
216
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
217
+ coarse_token.string)
218
+ when :quote_double
219
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::NAME,
220
+ coarse_token.string)
221
+ when :comment_single
222
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::COMMENT,
223
+ coarse_token.string.chomp)
224
+ when :comment_multi
225
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::COMMENT,
226
+ coarse_token.string)
227
+ when :plain
228
+ @before = coarse_token.string
229
+ @pos = 0
230
+ count = 0
231
+ loop {
232
+ token = next_sql_token()
233
+ if $DEBUG
234
+ pp "@" * 64, count, token, token.class
235
+ end
236
+
237
+ # if token._type == AnbtSql::TokenConstants::END_OF_SQL
238
+ if token == nil
239
+ break
240
+ end
241
+
242
+ @tokens.push token
243
+ count += 1
244
+ }
245
+ end
246
+
247
+ pos += 1
248
+ end
249
+
250
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::END_OF_SQL,
251
+ "")
252
+ end
253
+
254
+
255
+ ##
256
+ # 2つ以上並んだキーワードは1つのキーワードとみなします。
257
+ # ["a", " ", "group", " ", "by", " ", "b"]
258
+ # => ["a", " ", "group by", " ", "b"]
259
+ def concat_multiwords_keyword(tokens)
260
+ temp_kw_list = @rule.kw_multi_words.map{|kw| kw.split(" ") }
261
+
262
+ # ワード数が多い順から
263
+ temp_kw_list.sort{ |a, b|
264
+ b.size <=> a.size
265
+ }.each{|kw|
266
+ index = 0
267
+ target_tokens_size = kw.size * 2 - 1
268
+
269
+ while index <= tokens.size - target_tokens_size
270
+ temp_tokens = tokens[index, target_tokens_size].map {|x|
271
+ x.string.sub(/\s+/, " ")
272
+ }
273
+
274
+ if /#{kw.join(" ")}/i =~ temp_tokens.join
275
+ tokens[index].string = temp_tokens.join
276
+ (target_tokens_size-1).downto(1).each{|c|
277
+ tokens.delete_at(index + c)
278
+ }
279
+ end
280
+
281
+ index += 1
282
+ end
283
+ }
284
+ end
285
+
286
+
287
+ def next_token
288
+ @tokens[@token_pos]
289
+ end
290
+
291
+
292
+ ##
293
+ # SQL文字列をトークンの配列に変換し返します。
294
+ #
295
+ # sql_str:: 変換前のSQL文
296
+ def parse(sql_str)
297
+ coarse_tokens = CoarseTokenizer.new.tokenize(sql_str)
298
+
299
+ prepare_tokens(coarse_tokens)
300
+
301
+ tokens = []
302
+ count = 0
303
+ @token_pos = 0
304
+ loop {
305
+ token = next_token()
306
+
307
+ if $DEBUG
308
+ pp "=" * 64, count, token, token.class
309
+ end
310
+
311
+ if token._type == AnbtSql::TokenConstants::END_OF_SQL
312
+ break
313
+ else
314
+ ;
315
+ end
316
+
317
+ tokens.push token
318
+ count += 1
319
+ @token_pos += 1
320
+ }
321
+
322
+ concat_multiwords_keyword(tokens)
323
+
324
+ tokens
325
+ end
326
+ end
327
+ end
@@ -0,0 +1,121 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "pp"
4
+
5
+ =begin
6
+ AnbtSqlFormatter: SQL整形ツール. SQL文を決められたルールに従い整形します。
7
+
8
+ フォーマットを実施するためには、入力されるSQLがSQL文として妥当であることが前提条件となります。
9
+
10
+ このクラスが準拠するSQL整形のルールについては、下記URLを参照ください。
11
+ http://homepage2.nifty.com/igat/igapyon/diary/2005/ig050613.html
12
+
13
+ このクラスは SQLの変換規則を表します。
14
+
15
+ @author WATANABE Yoshinori (a-san) : original version at 2005.07.04.
16
+ @author IGA Tosiki : marge into blanc Framework at 2005.07.04
17
+ @author sonota : porting to Ruby 2009-2010
18
+ =end
19
+
20
+ class AnbtSql
21
+ class Rule
22
+ attr_accessor :keyword, :indent_string, :function_names, :space_after_comma
23
+ attr_accessor :kw_multi_words
24
+
25
+ # nl: New Line
26
+ # x: the keyword
27
+ attr_accessor :kw_plus1_indent_x_nl
28
+ attr_accessor :kw_minus1_indent_nl_x_plus1_indent
29
+ attr_accessor :kw_nl_x
30
+ attr_accessor :kw_nl_x_plus1_indent
31
+
32
+ # キーワードの変換規則: 何もしない
33
+ KEYWORD_NONE = 0
34
+
35
+ # キーワードの変換規則: 大文字にする
36
+ KEYWORD_UPPER_CASE = 1
37
+
38
+ # キーワードの変換規則: 小文字にする
39
+ KEYWORD_LOWER_CASE = 2
40
+
41
+
42
+ def initialize
43
+ # キーワードの変換規則.
44
+ @keyword = KEYWORD_UPPER_CASE
45
+
46
+ # インデントの文字列. 設定は自由入力とする。
47
+ # 通常は " ", " ", "\t" のいずれか。
48
+ @indent_string = " "
49
+
50
+ @space_after_comma = false
51
+
52
+ # __foo
53
+ # ____KW
54
+ @kw_plus1_indent_x_nl = %w(INSERT INTO CREATE DROP TRUNCATE TABLE CASE)
55
+
56
+ # ____foo
57
+ # __KW
58
+ # ____bar
59
+ @kw_minus1_indent_nl_x_plus1_indent = %w(FROM WHERE SET HAVING)
60
+ @kw_minus1_indent_nl_x_plus1_indent.concat ["ORDER BY", "GROUP BY"]
61
+
62
+ # __foo
63
+ # ____KW
64
+ @kw_nl_x_plus1_indent = %w(ON USING)
65
+
66
+ # __foo
67
+ # __KW
68
+ @kw_nl_x = %w(OR THEN ELSE)
69
+ # @kw_nl_x = %w(OR WHEN ELSE)
70
+
71
+ @kw_multi_words = ["ORDER BY", "GROUP BY"]
72
+
73
+ # 関数の名前。
74
+ # Java版は初期値 null
75
+ @function_names =
76
+ [
77
+ # getNumericFunctions
78
+ "ABS", "ACOS", "ASIN", "ATAN", "ATAN2", "BIT_COUNT", "CEILING",
79
+ "COS", "COT", "DEGREES", "EXP", "FLOOR", "LOG", "LOG10",
80
+ "MAX", "MIN", "MOD", "PI", "POW", "POWER", "RADIANS", "RAND",
81
+ "ROUND", "SIN", "SQRT", "TAN", "TRUNCATE",
82
+ # getStringFunctions
83
+ "ASCII", "BIN", "BIT_LENGTH", "CHAR", "CHARACTER_LENGTH",
84
+ "CHAR_LENGTH", "CONCAT", "CONCAT_WS", "CONV", "ELT",
85
+ "EXPORT_SET", "FIELD", "FIND_IN_SET", "HEX,INSERT", "INSTR",
86
+ "LCASE", "LEFT", "LENGTH", "LOAD_FILE", "LOCATE", "LOCATE",
87
+ "LOWER", "LPAD", "LTRIM", "MAKE_SET", "MATCH", "MID", "OCT",
88
+ "OCTET_LENGTH", "ORD", "POSITION", "QUOTE", "REPEAT",
89
+ "REPLACE", "REVERSE", "RIGHT", "RPAD", "RTRIM", "SOUNDEX",
90
+ "SPACE", "STRCMP", "SUBSTRING", "SUBSTRING", "SUBSTRING",
91
+ "SUBSTRING", "SUBSTRING_INDEX", "TRIM", "UCASE", "UPPER",
92
+ # getSystemFunctions
93
+ "DATABASE", "USER", "SYSTEM_USER", "SESSION_USER", "PASSWORD",
94
+ "ENCRYPT", "LAST_INSERT_ID", "VERSION",
95
+ # getTimeDateFunctions
96
+ "DAYOFWEEK", "WEEKDAY", "DAYOFMONTH", "DAYOFYEAR", "MONTH",
97
+ "DAYNAME", "MONTHNAME", "QUARTER", "WEEK", "YEAR", "HOUR",
98
+ "MINUTE", "SECOND", "PERIOD_ADD", "PERIOD_DIFF", "TO_DAYS",
99
+ "FROM_DAYS", "DATE_FORMAT", "TIME_FORMAT", "CURDATE",
100
+ "CURRENT_DATE", "CURTIME", "CURRENT_TIME", "NOW", "SYSDATE",
101
+ "CURRENT_TIMESTAMP", "UNIX_TIMESTAMP", "FROM_UNIXTIME",
102
+ "SEC_TO_TIME", "TIME_TO_SEC"
103
+ ]
104
+ end
105
+
106
+
107
+ def function?(name)
108
+ if (@function_names == nil)
109
+ return false
110
+ end
111
+
112
+ for i in 0...(@function_names.length)
113
+ if (@function_names[i].equalsIgnoreCase(name))
114
+ return true
115
+ end
116
+ end
117
+
118
+ return false
119
+ end
120
+ end
121
+ end