anbt-sql-formatter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ require "pp"
2
+
3
+ class Stack
4
+ include Enumerable
5
+
6
+ def initialize
7
+ @arr = []
8
+ end
9
+
10
+ def each
11
+ @arr.each{|item|
12
+ yield item
13
+ }
14
+ end
15
+
16
+ def clear
17
+ @arr.clear
18
+ end
19
+
20
+ def push(o)
21
+ @arr.push o
22
+ end
23
+
24
+ def pop
25
+ @arr.pop
26
+ end
27
+ end
28
+
29
+
30
+ class String
31
+ def endsWith(c)
32
+ self[-1] == c ? true : false
33
+ end
34
+
35
+ def startsWith(c)
36
+ self[0] == c ? true : false
37
+ end
38
+
39
+ def charAt(n)
40
+ self[n..n]
41
+ end
42
+
43
+ def equals(str)
44
+ self == str
45
+ end
46
+
47
+ def equalsIgnoreCase(other)
48
+ self.upcase == other.upcase
49
+ end
50
+
51
+ def trim
52
+ self.strip
53
+ end
54
+ end
55
+
56
+
57
+ class Array
58
+ def remove(n)
59
+ self.delete_at n
60
+ end
61
+
62
+ def get(n)
63
+ if n >= self.size || n <= -1
64
+ raise IndexOutOfBoundsException
65
+ end
66
+
67
+ self[n]
68
+ end
69
+
70
+ def add(n,o)
71
+ self.insert(n,o)
72
+ end
73
+ end
@@ -0,0 +1,327 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "pp"
4
+
5
+ require "anbt-sql-formatter/token"
6
+ require "anbt-sql-formatter/constants"
7
+ require "anbt-sql-formatter/helper"
8
+ require "anbt-sql-formatter/coarse-tokenizer"
9
+
10
+ class AnbtSql
11
+ class Parser
12
+
13
+ def initialize(rule)
14
+ @rule = rule
15
+
16
+ # 解析前の文字列
17
+ @before = nil
18
+
19
+ # 解析中の位置
20
+ @pos = nil
21
+
22
+ # 解析中の文字。
23
+ @char = nil
24
+
25
+ @token_pos = nil
26
+
27
+ # 2文字からなる記号。
28
+ # なお、|| は文字列結合にあたります。
29
+ @two_character_symbol = [ "<>", "<=", ">=", "||" ]
30
+ end
31
+
32
+
33
+ ##
34
+ # 2005.07.26:: Tosiki Iga \r も処理範囲に含める必要があります。
35
+ # 2005.08.12:: Tosiki Iga 65535(もとは-1)はホワイトスペースとして扱うよう変更します。
36
+ def space?(c)
37
+ return c == ' ' ||
38
+ c == "\t" ||
39
+ c == "\n" ||
40
+ c == "\r" ||
41
+ c == 65535
42
+ end
43
+
44
+
45
+ ##
46
+ # 文字として認識して妥当かどうかを判定します。
47
+ # 全角文字なども文字として認識を許容するものと判断します。
48
+ def letter?(c)
49
+ return false if space?(c)
50
+ return false if digit?(c)
51
+ return false if symbol?(c)
52
+
53
+ true
54
+ end
55
+
56
+
57
+ def digit?(c)
58
+ return "0" <= c && c <= '9'
59
+ end
60
+
61
+
62
+ ##
63
+ # "#" は文字列の一部とします
64
+ # アンダースコアは記号とは扱いません
65
+ # これ以降の文字の扱いは保留
66
+ def symbol?(c)
67
+ %w(" ? % & ' \( \) | * + , - . / : ; < = > ).include? c
68
+ #"
69
+ end
70
+
71
+
72
+ ##
73
+ # トークンを次に進めます。
74
+ # 1. posを進める。
75
+ # 2. sに結果を返す。
76
+ # 3. typeにその種類を設定する。
77
+ # 不正なSQLの場合、例外が発生します。
78
+ # ここでは、文法チェックは行っていない点に注目してください。
79
+ def next_sql_token
80
+ $stderr.puts "next_token #{@pos} <#{@before}> #{@before.length}" if $DEBUG
81
+
82
+ start_pos = @pos
83
+
84
+ if @pos >= @before.length
85
+ @pos += 1
86
+ return nil
87
+ end
88
+
89
+ @char = @before.charAt(@pos)
90
+
91
+ if space?(@char)
92
+ workString = ""
93
+ loop {
94
+ workString += @char
95
+
96
+ @char = @before.charAt(@pos+1)
97
+ if not space?(@char)
98
+ @pos += 1
99
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SPACE,
100
+ workString, start_pos)
101
+ end
102
+
103
+ @pos += 1
104
+
105
+ if @pos >= @before.length()
106
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SPACE,
107
+ workString, start_pos)
108
+ end
109
+ }
110
+
111
+
112
+ elsif @char == ";"
113
+ @pos += 1
114
+ # 2005.07.26 Tosiki Iga セミコロンは終了扱いではないようにする。
115
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SYMBOL,
116
+ ";", start_pos)
117
+
118
+ elsif digit?(@char)
119
+ if /(0x[0-9a-fA-F]+)/ =~ @before[@pos..-1] || # hex
120
+ /(\d+(\.\d+(e-?\d+)?)?)/ =~ @before[@pos..-1] # float or scientific
121
+ num = $1
122
+ @pos += num.length
123
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
124
+ num, start_pos)
125
+ end
126
+
127
+ s = ""
128
+ while (digit?(@char) || @char == '.')
129
+ # if (ch == '.') type = Token.REAL
130
+ s += @char
131
+ @pos += 1
132
+
133
+ if (@pos >= @before.length)
134
+ # 長さを超えている場合には処理中断します。
135
+ break
136
+ end
137
+
138
+ @char = @before.charAt(@pos)
139
+ end
140
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
141
+ s, start_pos)
142
+
143
+
144
+ elsif letter?(@char)
145
+ s = ""
146
+ # 文字列中のドットについては、文字列と一体として考える。
147
+ while (letter?(@char) || digit?(@char) || @char == '.')
148
+ s += @char
149
+ @pos += 1
150
+ if (@pos >= @before.length())
151
+ break
152
+ end
153
+
154
+ @char = @before.charAt(@pos)
155
+ end
156
+
157
+ if AnbtSql::Constants::SQL_RESERVED_WORDS.map{|w| w.upcase }.include?(s.upcase)
158
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::KEYWORD,
159
+ s, start_pos)
160
+ end
161
+
162
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::NAME,
163
+ s, start_pos)
164
+
165
+ elsif symbol?(@char)
166
+ s = "" + @char
167
+ @pos += 1
168
+ if (@pos >= @before.length())
169
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SYMBOL,
170
+ s, start_pos)
171
+ end
172
+
173
+ # 2文字の記号かどうか調べる
174
+ ch2 = @before.charAt(@pos)
175
+ #for (int i = 0; i < two_character_symbol.length; i++) {
176
+ for i in 0...@two_character_symbol.length
177
+ if (@two_character_symbol[i].charAt(0) == @char &&
178
+ @two_character_symbol[i].charAt(1) == ch2)
179
+ @pos += 1
180
+ s += ch2
181
+ break
182
+ end
183
+ end
184
+
185
+ if @char == "-" &&
186
+ /^(\d+(\.\d+(e-?\d+)?)?)/ =~ @before[@pos..-1] # float or scientific
187
+ num = $1
188
+ @pos += num.length
189
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
190
+ s + num, start_pos)
191
+ end
192
+
193
+ return AnbtSql::Token.new(AnbtSql::TokenConstants::SYMBOL,
194
+ s, start_pos)
195
+
196
+
197
+ else
198
+ @pos += 1
199
+ return AnbtSql::Token.new( AnbtSql::TokenConstants::UNKNOWN,
200
+ "" + @char,
201
+ start_pos )
202
+ end
203
+ end
204
+
205
+
206
+ def prepare_tokens(coarse_tokens)
207
+ @tokens = []
208
+
209
+ pos = 0
210
+ while pos < coarse_tokens.size
211
+ coarse_token = coarse_tokens[pos]
212
+
213
+ case coarse_token._type
214
+
215
+ when :quote_single
216
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::VALUE,
217
+ coarse_token.string)
218
+ when :quote_double
219
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::NAME,
220
+ coarse_token.string)
221
+ when :comment_single
222
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::COMMENT,
223
+ coarse_token.string.chomp)
224
+ when :comment_multi
225
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::COMMENT,
226
+ coarse_token.string)
227
+ when :plain
228
+ @before = coarse_token.string
229
+ @pos = 0
230
+ count = 0
231
+ loop {
232
+ token = next_sql_token()
233
+ if $DEBUG
234
+ pp "@" * 64, count, token, token.class
235
+ end
236
+
237
+ # if token._type == AnbtSql::TokenConstants::END_OF_SQL
238
+ if token == nil
239
+ break
240
+ end
241
+
242
+ @tokens.push token
243
+ count += 1
244
+ }
245
+ end
246
+
247
+ pos += 1
248
+ end
249
+
250
+ @tokens << AnbtSql::Token.new(AnbtSql::TokenConstants::END_OF_SQL,
251
+ "")
252
+ end
253
+
254
+
255
+ ##
256
+ # 2つ以上並んだキーワードは1つのキーワードとみなします。
257
+ # ["a", " ", "group", " ", "by", " ", "b"]
258
+ # => ["a", " ", "group by", " ", "b"]
259
+ def concat_multiwords_keyword(tokens)
260
+ temp_kw_list = @rule.kw_multi_words.map{|kw| kw.split(" ") }
261
+
262
+ # ワード数が多い順から
263
+ temp_kw_list.sort{ |a, b|
264
+ b.size <=> a.size
265
+ }.each{|kw|
266
+ index = 0
267
+ target_tokens_size = kw.size * 2 - 1
268
+
269
+ while index <= tokens.size - target_tokens_size
270
+ temp_tokens = tokens[index, target_tokens_size].map {|x|
271
+ x.string.sub(/\s+/, " ")
272
+ }
273
+
274
+ if /#{kw.join(" ")}/i =~ temp_tokens.join
275
+ tokens[index].string = temp_tokens.join
276
+ (target_tokens_size-1).downto(1).each{|c|
277
+ tokens.delete_at(index + c)
278
+ }
279
+ end
280
+
281
+ index += 1
282
+ end
283
+ }
284
+ end
285
+
286
+
287
+ def next_token
288
+ @tokens[@token_pos]
289
+ end
290
+
291
+
292
+ ##
293
+ # SQL文字列をトークンの配列に変換し返します。
294
+ #
295
+ # sql_str:: 変換前のSQL文
296
+ def parse(sql_str)
297
+ coarse_tokens = CoarseTokenizer.new.tokenize(sql_str)
298
+
299
+ prepare_tokens(coarse_tokens)
300
+
301
+ tokens = []
302
+ count = 0
303
+ @token_pos = 0
304
+ loop {
305
+ token = next_token()
306
+
307
+ if $DEBUG
308
+ pp "=" * 64, count, token, token.class
309
+ end
310
+
311
+ if token._type == AnbtSql::TokenConstants::END_OF_SQL
312
+ break
313
+ else
314
+ ;
315
+ end
316
+
317
+ tokens.push token
318
+ count += 1
319
+ @token_pos += 1
320
+ }
321
+
322
+ concat_multiwords_keyword(tokens)
323
+
324
+ tokens
325
+ end
326
+ end
327
+ end
@@ -0,0 +1,121 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "pp"
4
+
5
+ =begin
6
+ AnbtSqlFormatter: SQL整形ツール. SQL文を決められたルールに従い整形します。
7
+
8
+ フォーマットを実施するためには、入力されるSQLがSQL文として妥当であることが前提条件となります。
9
+
10
+ このクラスが準拠するSQL整形のルールについては、下記URLを参照ください。
11
+ http://homepage2.nifty.com/igat/igapyon/diary/2005/ig050613.html
12
+
13
+ このクラスは SQLの変換規則を表します。
14
+
15
+ @author WATANABE Yoshinori (a-san) : original version at 2005.07.04.
16
+ @author IGA Tosiki : marge into blanc Framework at 2005.07.04
17
+ @author sonota : porting to Ruby 2009-2010
18
+ =end
19
+
20
+ class AnbtSql
21
+ class Rule
22
+ attr_accessor :keyword, :indent_string, :function_names, :space_after_comma
23
+ attr_accessor :kw_multi_words
24
+
25
+ # nl: New Line
26
+ # x: the keyword
27
+ attr_accessor :kw_plus1_indent_x_nl
28
+ attr_accessor :kw_minus1_indent_nl_x_plus1_indent
29
+ attr_accessor :kw_nl_x
30
+ attr_accessor :kw_nl_x_plus1_indent
31
+
32
+ # キーワードの変換規則: 何もしない
33
+ KEYWORD_NONE = 0
34
+
35
+ # キーワードの変換規則: 大文字にする
36
+ KEYWORD_UPPER_CASE = 1
37
+
38
+ # キーワードの変換規則: 小文字にする
39
+ KEYWORD_LOWER_CASE = 2
40
+
41
+
42
+ def initialize
43
+ # キーワードの変換規則.
44
+ @keyword = KEYWORD_UPPER_CASE
45
+
46
+ # インデントの文字列. 設定は自由入力とする。
47
+ # 通常は " ", " ", "\t" のいずれか。
48
+ @indent_string = " "
49
+
50
+ @space_after_comma = false
51
+
52
+ # __foo
53
+ # ____KW
54
+ @kw_plus1_indent_x_nl = %w(INSERT INTO CREATE DROP TRUNCATE TABLE CASE)
55
+
56
+ # ____foo
57
+ # __KW
58
+ # ____bar
59
+ @kw_minus1_indent_nl_x_plus1_indent = %w(FROM WHERE SET HAVING)
60
+ @kw_minus1_indent_nl_x_plus1_indent.concat ["ORDER BY", "GROUP BY"]
61
+
62
+ # __foo
63
+ # ____KW
64
+ @kw_nl_x_plus1_indent = %w(ON USING)
65
+
66
+ # __foo
67
+ # __KW
68
+ @kw_nl_x = %w(OR THEN ELSE)
69
+ # @kw_nl_x = %w(OR WHEN ELSE)
70
+
71
+ @kw_multi_words = ["ORDER BY", "GROUP BY"]
72
+
73
+ # 関数の名前。
74
+ # Java版は初期値 null
75
+ @function_names =
76
+ [
77
+ # getNumericFunctions
78
+ "ABS", "ACOS", "ASIN", "ATAN", "ATAN2", "BIT_COUNT", "CEILING",
79
+ "COS", "COT", "DEGREES", "EXP", "FLOOR", "LOG", "LOG10",
80
+ "MAX", "MIN", "MOD", "PI", "POW", "POWER", "RADIANS", "RAND",
81
+ "ROUND", "SIN", "SQRT", "TAN", "TRUNCATE",
82
+ # getStringFunctions
83
+ "ASCII", "BIN", "BIT_LENGTH", "CHAR", "CHARACTER_LENGTH",
84
+ "CHAR_LENGTH", "CONCAT", "CONCAT_WS", "CONV", "ELT",
85
+ "EXPORT_SET", "FIELD", "FIND_IN_SET", "HEX,INSERT", "INSTR",
86
+ "LCASE", "LEFT", "LENGTH", "LOAD_FILE", "LOCATE", "LOCATE",
87
+ "LOWER", "LPAD", "LTRIM", "MAKE_SET", "MATCH", "MID", "OCT",
88
+ "OCTET_LENGTH", "ORD", "POSITION", "QUOTE", "REPEAT",
89
+ "REPLACE", "REVERSE", "RIGHT", "RPAD", "RTRIM", "SOUNDEX",
90
+ "SPACE", "STRCMP", "SUBSTRING", "SUBSTRING", "SUBSTRING",
91
+ "SUBSTRING", "SUBSTRING_INDEX", "TRIM", "UCASE", "UPPER",
92
+ # getSystemFunctions
93
+ "DATABASE", "USER", "SYSTEM_USER", "SESSION_USER", "PASSWORD",
94
+ "ENCRYPT", "LAST_INSERT_ID", "VERSION",
95
+ # getTimeDateFunctions
96
+ "DAYOFWEEK", "WEEKDAY", "DAYOFMONTH", "DAYOFYEAR", "MONTH",
97
+ "DAYNAME", "MONTHNAME", "QUARTER", "WEEK", "YEAR", "HOUR",
98
+ "MINUTE", "SECOND", "PERIOD_ADD", "PERIOD_DIFF", "TO_DAYS",
99
+ "FROM_DAYS", "DATE_FORMAT", "TIME_FORMAT", "CURDATE",
100
+ "CURRENT_DATE", "CURTIME", "CURRENT_TIME", "NOW", "SYSDATE",
101
+ "CURRENT_TIMESTAMP", "UNIX_TIMESTAMP", "FROM_UNIXTIME",
102
+ "SEC_TO_TIME", "TIME_TO_SEC"
103
+ ]
104
+ end
105
+
106
+
107
+ def function?(name)
108
+ if (@function_names == nil)
109
+ return false
110
+ end
111
+
112
+ for i in 0...(@function_names.length)
113
+ if (@function_names[i].equalsIgnoreCase(name))
114
+ return true
115
+ end
116
+ end
117
+
118
+ return false
119
+ end
120
+ end
121
+ end