tsql_shparser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/tsql_shparser.rb +198 -0
- data/lib/tsql_shparser/tsql_stmt.rb +673 -0
- data/lib/tsql_shparser/tsql_tokenizer.rb +375 -0
- data/test/tst_tokenizer.rb +211 -0
- metadata +40 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
#
|
2
|
+
# tsql_shparser.rb: Shallow Parser for t-SQL
|
3
|
+
# Copyright (c) 2005-2006 Shashank Date (shanko_date@yahoo.com)
|
4
|
+
#
|
5
|
+
# License: Ruby's
|
6
|
+
#
|
7
|
+
# This program is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
10
|
+
#
|
11
|
+
#
|
12
|
+
require 'tsql_shparser/tsql_tokenizer'
|
13
|
+
require 'tsql_shparser/tsql_stmt'
|
14
|
+
|
15
|
+
## Shallow Parser for t-SQL: Creates an array of arrays of Token objects for each
|
16
|
+
## t-SQL Statement parsed as input and generates an array of TSQLStmt objects.
|
17
|
+
class TSQL_ShParser
|
18
|
+
VERSION = "0.0.1"
|
19
|
+
|
20
|
+
@@start = ["SELECT","INSERT","UPDATE","DELETE","DROP","ALTER","CREATE","TRUNCATE"]
|
21
|
+
@@first = ["ALTER", "BACKUP", "BEGIN", "BREAK", "CHECKPOINT", "CONTINUE", "DENY", "GRANT", "REVOKE",
|
22
|
+
"BULK", "CLOSE", "COMMIT", "CREATE","DBCC", "DEALLOCATE",
|
23
|
+
"DECLARE", "DELETE","DROP", "DUMP", "ERRLVL", "EXEC", "EXECUTE", "EXIT", "FETCH",
|
24
|
+
"GO", "GOTO", "IF", "INSERT","KILL", "LOAD", "OPEN", "OPENDATASOURCE",
|
25
|
+
"OPENQUERY", "OPENROWSET","PRINT","RAISERROR", "READTEXT", "RECONFIGURE",
|
26
|
+
"RESTORE","RETURN", "ROLLBACK", "SAVE", "SELECT", "SET", "SETUSER", "SHUTDOWN",
|
27
|
+
"STATISTICS","TRUNCATE", "UPDATE", "UPDATETEXT", "USE","WAITFOR", "WHILE", "WRITETEXT"]
|
28
|
+
|
29
|
+
def initialize(file=nil)
|
30
|
+
@input_file = file
|
31
|
+
@tok = Tokenizer.new(file)
|
32
|
+
@tok.tokenize_file if file
|
33
|
+
end
|
34
|
+
|
35
|
+
## Return nil if it is not a sub-select
|
36
|
+
## Return relative index (w.r.t SELECT) of the token
|
37
|
+
## prior to the leftmost LEFT_PARAN otherwise
|
38
|
+
def is_sub_select?
|
39
|
+
|
40
|
+
ss = nil
|
41
|
+
|
42
|
+
# Make sure that we are indeed looking at the sub-SELECT
|
43
|
+
# The SELECT token is already consumed, so we have to look back to verify
|
44
|
+
curr = @tok.look_back(1)
|
45
|
+
return ss unless (curr.token_value == 'SELECT')
|
46
|
+
|
47
|
+
# If there is no token before SELECT then this is not a sub-SELECT
|
48
|
+
prev = @tok.look_back(2)
|
49
|
+
return ss unless prev
|
50
|
+
|
51
|
+
n = 3
|
52
|
+
|
53
|
+
# Take care of the arbitrary/redundant nesting of expressions in ( )
|
54
|
+
while(prev && (prev.token_value == LEFT_PARAN))
|
55
|
+
prev = @tok.look_back(n)
|
56
|
+
n += 1
|
57
|
+
end
|
58
|
+
|
59
|
+
# Check the token before the left most LEFT_PARAN
|
60
|
+
follow = ['SELECT','DISTINCT','PERCENT','JOIN','WHERE',
|
61
|
+
'BY','IN','ANY','ALL','EXISTS','UNION','FROM',
|
62
|
+
'<','>','=','>=','<=','<>',',']
|
63
|
+
|
64
|
+
prev_prev = @tok.look_back(n)
|
65
|
+
|
66
|
+
ss = n if prev && follow.include?(prev.token_value)
|
67
|
+
|
68
|
+
# Consider the case: SELECT TOP 10 (select ...)
|
69
|
+
|
70
|
+
if (prev && (prev.token_value =~ /^\d+$/))
|
71
|
+
ss = n if (prev_prev && (prev_prev.token_value == 'TOP'))
|
72
|
+
end
|
73
|
+
|
74
|
+
# Consider the case UNION ALL
|
75
|
+
|
76
|
+
if (prev && (prev.token_value == 'ALL'))
|
77
|
+
ss = n if (prev_prev && (prev_prev.token_value == 'UNION'))
|
78
|
+
end
|
79
|
+
|
80
|
+
## Finally, consider the case of SELECT being part of the INSERT statement
|
81
|
+
#
|
82
|
+
# skip the (optional) list of column names that may
|
83
|
+
# follow the name of the table in an INSERT, for e.g:
|
84
|
+
# INSERT INTO Table1 (COL1, COL2, COL3)
|
85
|
+
# SELECT 'A1','B2','C3';
|
86
|
+
if (prev && prev.token_value == RIGHT_PARAN)
|
87
|
+
|
88
|
+
prev = @tok.look_back(n)
|
89
|
+
|
90
|
+
# Rewind to the matching LEFT_PARAN
|
91
|
+
# We are more strict here and verify that all tokens
|
92
|
+
# are either Id or Comma: not considering the possibility of a Dot
|
93
|
+
while (prev and ((prev.token_type == :Id) or (prev.token_type == :Comma)))
|
94
|
+
prev = @tok.look_back(n)
|
95
|
+
n += 1
|
96
|
+
end
|
97
|
+
|
98
|
+
# This must be the LEFT_PARAN
|
99
|
+
prev = @tok.look_back(n) if (prev and (prev.token_value == LEFT_PARAN))
|
100
|
+
end
|
101
|
+
|
102
|
+
while (prev and ((prev.token_type == :Id) or (prev.token_type == :Dot)))
|
103
|
+
prev = @tok.look_back(n)
|
104
|
+
n += 1
|
105
|
+
end
|
106
|
+
|
107
|
+
# ??? prev = @tok.look_back(n) ???
|
108
|
+
ss = n if (prev and prev.token_value == 'INSERT')
|
109
|
+
|
110
|
+
prev_prev = @tok.look_back(n)
|
111
|
+
ss = n+1 if (prev and (prev.token_value == 'INTO') and prev_prev and (prev_prev.token_value == 'INSERT'))
|
112
|
+
|
113
|
+
ss
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
## Keep consuming tokens till you detect the end of the t-SQL statement.
|
118
|
+
## It is not very accurate about determining the end especially if the
|
119
|
+
## statements are redundantly nested inside paranthesis or are immediately
|
120
|
+
## followed by user-defined functions or stored procs.
|
121
|
+
## It takes in as its argument a category which is one of the symbols in
|
122
|
+
## the @@start array. It returns an array (possibly empty) of Token objects.
|
123
|
+
def parse_stmt(category="SELECT")
|
124
|
+
|
125
|
+
stmt = []
|
126
|
+
|
127
|
+
curr = @tok.get_next_token
|
128
|
+
return stmt if (curr.token_value != category)
|
129
|
+
|
130
|
+
stmt << curr
|
131
|
+
|
132
|
+
found_set = false if category == 'UPDATE'
|
133
|
+
case_end = 0
|
134
|
+
|
135
|
+
prev = curr.token_value
|
136
|
+
|
137
|
+
# Loop to find the end of the stmt
|
138
|
+
loop do
|
139
|
+
|
140
|
+
curr = @tok.get_next_token
|
141
|
+
break unless curr
|
142
|
+
|
143
|
+
curr_tok = curr.token_value
|
144
|
+
case_end += 1 if curr_tok == 'CASE'
|
145
|
+
case_end -= 1 if curr_tok == 'END'
|
146
|
+
|
147
|
+
if ((not is_sub_select?) and (@@first.include?(curr_tok) or (curr.token_type == :Label) or (curr_tok == ';') or ((case_end < 0) and (curr_tok == 'END'))))
|
148
|
+
# Handle TRIGGER Syntax
|
149
|
+
unless (['OF','FOR','AFTER', ',','IF','AND','OR'].include?(prev) and ['INSERT','UPDATE','DELETE'].include?(curr_tok))
|
150
|
+
if ((category == 'UPDATE') and (not found_set) and (curr_tok == 'SET'))
|
151
|
+
found_set = true
|
152
|
+
else
|
153
|
+
@tok.unget_token
|
154
|
+
break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
stmt << curr
|
160
|
+
prev = curr_tok
|
161
|
+
end
|
162
|
+
|
163
|
+
stmt
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
## Parse the string passed as the argument (str). If no string is passed,
|
168
|
+
## it is assumed that the parser constructor was invoked with a file-name
|
169
|
+
## which was used to open an existing file and was parsed successfully.
|
170
|
+
## If no file-name or string was given, an empty array is returned.
|
171
|
+
## Otherwise an array of TSQLStmt objects is returned, one object per t-SQL
|
172
|
+
## statement that starts with the symbol in @@start.
|
173
|
+
def parse(str=nil)
|
174
|
+
|
175
|
+
stmts = []
|
176
|
+
|
177
|
+
@tok.tokenize_string(str) if str
|
178
|
+
t = @tok.get_next_token
|
179
|
+
while t
|
180
|
+
|
181
|
+
category = t.token_value
|
182
|
+
|
183
|
+
if @@start.include?(category)
|
184
|
+
@tok.unget_token
|
185
|
+
s = parse_stmt(category)
|
186
|
+
stmts << s unless s == []
|
187
|
+
end
|
188
|
+
|
189
|
+
t = @tok.get_next_token
|
190
|
+
|
191
|
+
end
|
192
|
+
|
193
|
+
stmts.map{|st| TSQLStmt.new(st)}
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
@@ -0,0 +1,673 @@
|
|
1
|
+
#
|
2
|
+
# tsql_stmt.rb: Shallow Parser for t-SQL
|
3
|
+
# Copyright (c) 2005-2006 Shashank Date (shanko_date@yahoo.com)
|
4
|
+
#
|
5
|
+
# License: Ruby's
|
6
|
+
#
|
7
|
+
# This program is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
10
|
+
#
|
11
|
+
#
|
12
|
+
|
13
|
+
LEFT_PARAN = "("
|
14
|
+
RIGHT_PARAN = ")"
|
15
|
+
|
16
|
+
## A facade class which adds helper methods to the Array class
|
17
|
+
## and converts it into a TokenStream
|
18
|
+
class TokenStream < Array
|
19
|
+
VERSION = "0.0.1"
|
20
|
+
def initialize(arr=[])
|
21
|
+
@arr = super(arr)
|
22
|
+
end
|
23
|
+
|
24
|
+
def +(arr); TokenStream.new(super); end
|
25
|
+
def -(arr); TokenStream.new(super); end
|
26
|
+
def slice(range); TokenStream.new(super); end
|
27
|
+
|
28
|
+
def to_arr
|
29
|
+
(@arr ? @arr.collect{|t| t.token_value} : @arr)
|
30
|
+
end
|
31
|
+
|
32
|
+
def find_matching_paran(from)
|
33
|
+
|
34
|
+
list = self.slice(from..-1).to_arr
|
35
|
+
|
36
|
+
m = level = 0
|
37
|
+
if (list[0] == LEFT_PARAN)
|
38
|
+
list.each_index { |i|
|
39
|
+
level += 1 if (list[i] == LEFT_PARAN)
|
40
|
+
level -= 1 if (list[i] == RIGHT_PARAN)
|
41
|
+
(m = i; break) if (level == 0)
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
return m
|
46
|
+
end
|
47
|
+
|
48
|
+
def find_matching_word(words,from=0)
|
49
|
+
list = self.slice(from..-1).to_arr
|
50
|
+
pos = 0 # assume that the word will NEVER be the first word in the list
|
51
|
+
words.each{|word|
|
52
|
+
m = level = 0
|
53
|
+
list.each_index { |i|
|
54
|
+
level += 1 if list[i] == LEFT_PARAN
|
55
|
+
level -= 1 if list[i] == RIGHT_PARAN
|
56
|
+
(m = i; break) if ((list[i] == word) and (level == 0))
|
57
|
+
p [i,level,m] if $DEBUG
|
58
|
+
}
|
59
|
+
(pos = m; break) if m > 0
|
60
|
+
}
|
61
|
+
pos
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
## The main class which identifies the various clauses for the t-SQL Statement.
|
67
|
+
## It constructs the TSQLStmt object from the array of tokens parsed by the Parser.
|
68
|
+
## Generates a TokenStream object out of this array and provides methods to
|
69
|
+
## query the TokenStream.
|
70
|
+
class TSQLStmt
|
71
|
+
VERSION = "0.0.1"
|
72
|
+
## Hash used in determining which clauses follow the clause indicated by the key
|
73
|
+
@@follow = {
|
74
|
+
'SELECT' => ['INTO','FROM','JOIN','WHERE','GROUP','HAVING','UNION','ORDER'],
|
75
|
+
'FROM' => ['JOIN','WHERE','GROUP','HAVING','UNION','ORDER'],
|
76
|
+
'JOIN' => ['WHERE','GROUP','HAVING','UNION','ORDER'],
|
77
|
+
'WHERE' => ['GROUP','HAVING','UNION','ORDER'],
|
78
|
+
'GROUP' => ['HAVING','UNION','ORDER'],
|
79
|
+
'HAVING' => ['UNION','ORDER'],
|
80
|
+
'UNION' => ['ORDER'],
|
81
|
+
'SET' => ['FROM','JOIN','WHERE','UNION'], # Part of the UPDATE
|
82
|
+
}
|
83
|
+
|
84
|
+
attr_reader :typ
|
85
|
+
attr_reader :stmt
|
86
|
+
attr_reader :line
|
87
|
+
attr_reader :col
|
88
|
+
|
89
|
+
## Constructor: takes in array of tokens created by the ShParser.
|
90
|
+
## Each array has tokens of exactly one SQL statement. Sometimes the array
|
91
|
+
## may have some extra tokens towards the end, since the parser could not
|
92
|
+
## correctly identify the end of the statement.
|
93
|
+
def initialize(st)
|
94
|
+
@stmt = st
|
95
|
+
if (@stmt and @stmt[0])
|
96
|
+
@typ = @stmt[0].token_value
|
97
|
+
@typ = 'INTO' if ((@typ == 'SELECT') and (TokenStream.new(@stmt).find_matching_word('INTO') > 0))
|
98
|
+
@line = @stmt[0].line
|
99
|
+
@col = @stmt[0].col
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# ---------------------
|
104
|
+
def hdr(start)
|
105
|
+
hdr = TokenStream.new
|
106
|
+
return hdr unless (self.typ == start)
|
107
|
+
|
108
|
+
@stmt.each_with_index{|tok,i|
|
109
|
+
break unless (tok.token_type == :KeyWord)
|
110
|
+
hdr += [tok]
|
111
|
+
}
|
112
|
+
|
113
|
+
hdr
|
114
|
+
end
|
115
|
+
|
116
|
+
private :hdr
|
117
|
+
|
118
|
+
# ---------------------
|
119
|
+
|
120
|
+
def name_picker(list,msg="",&block)
|
121
|
+
|
122
|
+
cols,i,n = [], 0, list.length
|
123
|
+
while (i < n)
|
124
|
+
tok = list[i]
|
125
|
+
if tok.token_value == 'SELECT'
|
126
|
+
|
127
|
+
if ((i > 0) and (list[i-1].token_value == LEFT_PARAN))
|
128
|
+
j = list.find_matching_paran(i-1)
|
129
|
+
st = list.slice(i...(i+j-1))
|
130
|
+
i += j
|
131
|
+
else
|
132
|
+
st = list.slice(i..-1)
|
133
|
+
i = n
|
134
|
+
end
|
135
|
+
|
136
|
+
cols += TSQLStmt.new(st).send(msg) if msg.length > 0
|
137
|
+
|
138
|
+
else
|
139
|
+
|
140
|
+
cols << tok if ((tok.token_type == :Id) and
|
141
|
+
((list[i+1] == nil) or (not [LEFT_PARAN,"."].include?(list[i+1].token_value))) and
|
142
|
+
((block_given?) ? block.call(i,list[i-1],list[i+1]) : true))
|
143
|
+
i += 1
|
144
|
+
end
|
145
|
+
end
|
146
|
+
TokenStream.new(cols)
|
147
|
+
end
|
148
|
+
|
149
|
+
private :name_picker
|
150
|
+
|
151
|
+
# ---------------------
|
152
|
+
|
153
|
+
def clause(keyword,follow=nil,st=nil)
|
154
|
+
klause = TokenStream.new
|
155
|
+
return [klause,0] unless @stmt
|
156
|
+
|
157
|
+
follow ||= @@follow[keyword]
|
158
|
+
st ||= self.stmt
|
159
|
+
|
160
|
+
n = st.find_matching_word(keyword)
|
161
|
+
|
162
|
+
if n > 0
|
163
|
+
m = st.find_matching_word(follow,n)
|
164
|
+
klause = st.slice(n..((m == 0) ? -1: n+m-1))
|
165
|
+
end
|
166
|
+
|
167
|
+
[klause,n]
|
168
|
+
end
|
169
|
+
|
170
|
+
private :clause
|
171
|
+
|
172
|
+
# ---------------------
|
173
|
+
|
174
|
+
## Convert the array of tokens to a TokenStream object
|
175
|
+
def stmt; TokenStream.new(@stmt); end
|
176
|
+
|
177
|
+
## List all the expressions in the SELECT part of the statement
|
178
|
+
## Returns an array of TokenStream objects. Each object in the
|
179
|
+
## array is exactly one expression of the SELECT.
|
180
|
+
def list_select_expressions
|
181
|
+
columns,list = [],[]
|
182
|
+
return unless (@stmt and ((self.typ == 'SELECT') or (self.typ == 'INTO')))
|
183
|
+
|
184
|
+
# Skip the header part of the SELECT
|
185
|
+
prev,n = nil,nil
|
186
|
+
@stmt.each_with_index{|tok,i|
|
187
|
+
|
188
|
+
(prev = tok; next) if ["SELECT","ALL", "DISTINCT", "TOP", "PERCENT", "WITH", "TIES"].include?(tok.token_value)
|
189
|
+
next if ((tok.token_value =~ /^\d+$/) and prev and (prev.token_value == 'TOP'))
|
190
|
+
n = i
|
191
|
+
break
|
192
|
+
}
|
193
|
+
|
194
|
+
# Find the end of the expression list.
|
195
|
+
if n
|
196
|
+
m = self.stmt.find_matching_word(@@follow['SELECT'],n)
|
197
|
+
list = self.stmt.slice(n..((m == 0) ? -1: n+m-1))
|
198
|
+
end
|
199
|
+
|
200
|
+
i = 0
|
201
|
+
column = TokenStream.new
|
202
|
+
|
203
|
+
while i < list.length
|
204
|
+
tok = list[i]
|
205
|
+
k = ((tok.token_value == LEFT_PARAN) ? list.find_matching_paran(i) : 1)
|
206
|
+
raise "I cannot find matching paran in:\n #{list.slice(i..-1).to_arr.inspect}\n" if k == 0
|
207
|
+
|
208
|
+
column += list.slice(i...(i+k))
|
209
|
+
if (list[i+k-1] and (list[i+k-1].token_value == ","))
|
210
|
+
columns << column
|
211
|
+
column = TokenStream.new
|
212
|
+
end
|
213
|
+
i += k
|
214
|
+
end
|
215
|
+
columns << column
|
216
|
+
rescue
|
217
|
+
puts $!.to_s
|
218
|
+
ensure
|
219
|
+
return columns
|
220
|
+
end
|
221
|
+
|
222
|
+
def hdr_select
|
223
|
+
hdr = TokenStream.new
|
224
|
+
return hdr unless ((self.typ == 'SELECT') or (self.typ == 'INTO'))
|
225
|
+
prev = nil
|
226
|
+
@stmt.each{|tok|
|
227
|
+
if (tok.token_type == :KeyWord)
|
228
|
+
hdr += [tok]
|
229
|
+
elsif (tok.token_type == :Number) and (prev.token_value == 'TOP')
|
230
|
+
hdr += [tok]
|
231
|
+
else
|
232
|
+
break
|
233
|
+
end
|
234
|
+
prev = tok
|
235
|
+
}
|
236
|
+
hdr
|
237
|
+
end
|
238
|
+
|
239
|
+
|
240
|
+
def hdr_insert; hdr('INSERT'); end
|
241
|
+
def hdr_update; hdr('UPDATE'); end
|
242
|
+
def hdr_delete; hdr('DELETE'); end
|
243
|
+
|
244
|
+
|
245
|
+
def columns_delete(all=false)
|
246
|
+
cols = TokenStream.new
|
247
|
+
return cols unless self.typ == 'DELETE'
|
248
|
+
|
249
|
+
i = ((@stmt[1].token_value == 'FROM') ? 3 : 2)
|
250
|
+
|
251
|
+
cols
|
252
|
+
end
|
253
|
+
|
254
|
+
def columns_insert(all=false)
|
255
|
+
cols = TokenStream.new
|
256
|
+
return cols unless self.typ == 'INSERT'
|
257
|
+
|
258
|
+
j = 0
|
259
|
+
i = ((@stmt[1].token_value == 'INTO') ? 3 : 2)
|
260
|
+
|
261
|
+
return cols if ((@stmt[i] == nil) or (@stmt[i].token_value == 'EXECUTE') or (@stmt[i].token_value == 'EXEC'))
|
262
|
+
|
263
|
+
if (@stmt[i].token_value == LEFT_PARAN)
|
264
|
+
j = self.stmt.find_matching_paran(i)
|
265
|
+
end
|
266
|
+
j += i
|
267
|
+
|
268
|
+
cols += name_picker(self.stmt.slice(i..j)) if j > i
|
269
|
+
cols += name_picker(self.stmt.slice((j+1)..-1),"columns_all") if all
|
270
|
+
cols
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
def columns_select(all=false)
|
275
|
+
cols = TokenStream.new
|
276
|
+
self.list_select_expressions.each{|list|
|
277
|
+
cols += name_picker(list,(all ? "columns_all" :"columns_select")){|i,prev,nxt|
|
278
|
+
((nxt == nil) or (nxt.token_value != '=')) and
|
279
|
+
((i < 1) or (not ['AS',RIGHT_PARAN,'END'].include?(prev.token_value))) and
|
280
|
+
((i < 1) or (not [:Id,:String,:Number,:HostVariable].include?(prev.token_type)))
|
281
|
+
}
|
282
|
+
}
|
283
|
+
cols
|
284
|
+
end
|
285
|
+
|
286
|
+
def columns_all
|
287
|
+
all_cols = TokenStream.new
|
288
|
+
return all_cols if ['DROP','TRUNCATE'].include?(self.typ)
|
289
|
+
|
290
|
+
all_cols += columns_select(true) + columns_insert(true) +
|
291
|
+
columns_update(true) + columns_from(true) +
|
292
|
+
columns_join_on(true) + columns_where(true) +
|
293
|
+
columns_group_by(true) + columns_having(true) +
|
294
|
+
columns_order_by()
|
295
|
+
|
296
|
+
list_union.each{|st|
|
297
|
+
n = 0
|
298
|
+
st.each_with_index{|tok,i| (n=i; break) if tok.token_value == 'SELECT'}
|
299
|
+
ss = TSQLStmt.new(st.slice(n..-1))
|
300
|
+
all_cols += ss.columns_select(true) + ss.columns_from(true) +
|
301
|
+
ss.columns_join_on(true) + ss.columns_where(true) +
|
302
|
+
ss.columns_group_by(true) + ss.columns_having(true) +
|
303
|
+
ss.columns_order_by()
|
304
|
+
}
|
305
|
+
all_cols
|
306
|
+
end
|
307
|
+
|
308
|
+
def table_into; TokenStream.new((self.typ == 'INTO') ? [clause_into[1]] : []); end
|
309
|
+
def table_update; TokenStream.new((self.typ == 'UPDATE') ? [@stmt[1]] : []); end
|
310
|
+
def table_truncate; TokenStream.new((self.typ == 'TRUNCATE') ? [@stmt[2]] : []); end
|
311
|
+
def table_create; TokenStream.new(((self.typ == 'CREATE') and
|
312
|
+
(self.typ == 'TABLE')) ? [@stmt[2]] : []); end
|
313
|
+
def table_alter; TokenStream.new(((self.typ == 'ALTER') and
|
314
|
+
(@stmt[1].token_value == 'TABLE')) ? [@stmt[2]] : []); end
|
315
|
+
def tables_drop
|
316
|
+
tbls = TokenStream.new
|
317
|
+
tbls = TokenStream.new(@stmt.select{|tok| tok.token_type == :Id}) if ((self.typ == 'DROP') and @stmt[1] and (@stmt[1].token_value == 'TABLE'))
|
318
|
+
tbls
|
319
|
+
end
|
320
|
+
|
321
|
+
def tables_from(all=false)
|
322
|
+
tbls = name_picker(clause_from,(all ? "tables_all" : "tables_from")){|i,prev|
|
323
|
+
((i < 1) or (not ['AS',RIGHT_PARAN].include?(prev.token_value))) and
|
324
|
+
((i < 1) or (prev.token_type != :Id))
|
325
|
+
}
|
326
|
+
tbls
|
327
|
+
end
|
328
|
+
|
329
|
+
def tables_where(all=false)
|
330
|
+
tbls = TokenStream.new
|
331
|
+
list_sub_selects(clause_where).each{|ss|
|
332
|
+
tbls += TSQLStmt.new(ss.slice(1...-1)).tables_all
|
333
|
+
}
|
334
|
+
tbls
|
335
|
+
end
|
336
|
+
|
337
|
+
def table_insert
|
338
|
+
tbl = []
|
339
|
+
if (self.typ == 'INSERT')
|
340
|
+
tbl = ((@stmt[1].token_value == 'INTO') ? [@stmt[2]] : [@stmt[1]])
|
341
|
+
end
|
342
|
+
TokenStream.new(tbl)
|
343
|
+
end
|
344
|
+
|
345
|
+
def table_delete
|
346
|
+
tbl = []
|
347
|
+
if (self.typ == 'DELETE')
|
348
|
+
tbl = ((@stmt[1].token_value == 'FROM') ? [@stmt[2]] : [@stmt[1]])
|
349
|
+
end
|
350
|
+
TokenStream.new(tbl)
|
351
|
+
end
|
352
|
+
|
353
|
+
def tables_join(all=false)
|
354
|
+
tbls = TokenStream.new
|
355
|
+
clause_join.each{|list|
|
356
|
+
|
357
|
+
tbls += name_picker(list,(all ? "tables_all" : "tables_join")){|i,prev,nxt|
|
358
|
+
((i < 1) or (not ['AS',RIGHT_PARAN].include?(prev.token_value))) and
|
359
|
+
((i < 1) or (prev.token_type != :Id)) and
|
360
|
+
(i < list.to_arr.index('ON'))
|
361
|
+
|
362
|
+
}
|
363
|
+
}
|
364
|
+
tbls
|
365
|
+
end
|
366
|
+
|
367
|
+
def tables_all(kind=nil)
|
368
|
+
mod_tbls = table_insert + table_delete + table_update + table_into + table_create +
|
369
|
+
tables_drop + table_truncate + table_alter
|
370
|
+
ref_tbls = tables_where + tables_from(true) + tables_join(true)
|
371
|
+
list_union.each{|st|
|
372
|
+
n = 0
|
373
|
+
st.each_with_index{|tok,i| (n=i; break) if tok.token_value == 'SELECT'}
|
374
|
+
ss = TSQLStmt.new(st.slice(n..-1))
|
375
|
+
ref_tbls += ss.tables_from(true) + ss.tables_join(true)
|
376
|
+
}
|
377
|
+
all_tbls = mod_tbls + ref_tbls
|
378
|
+
(kind ? ((kind == 'mod') ? mod_tbls : ref_tbls) : all_tbls)
|
379
|
+
end
|
380
|
+
|
381
|
+
def tables_referred; tables_all('ref'); end
|
382
|
+
def tables_modified; tables_all('mod'); end
|
383
|
+
|
384
|
+
def clause_into
|
385
|
+
|
386
|
+
into = TokenStream.new
|
387
|
+
|
388
|
+
if @stmt
|
389
|
+
n = nil
|
390
|
+
@stmt.each_with_index{|tok,i|
|
391
|
+
(n = i; break) if tok.token_value == 'INTO'
|
392
|
+
}
|
393
|
+
into = self.stmt.slice(n..(n+1)) if n
|
394
|
+
end
|
395
|
+
|
396
|
+
into
|
397
|
+
|
398
|
+
end
|
399
|
+
|
400
|
+
def clause_order_by
|
401
|
+
order_by = TokenStream.new
|
402
|
+
return order_by unless @stmt
|
403
|
+
|
404
|
+
n = self.stmt.find_matching_word('ORDER')
|
405
|
+
order_by = self.stmt.slice(n..-1) if n > 0
|
406
|
+
order_by
|
407
|
+
end
|
408
|
+
|
409
|
+
def clause_from
|
410
|
+
k,n = clause('FROM')
|
411
|
+
if n > 0
|
412
|
+
k.pop if k[-1].token_value == 'OUTER'
|
413
|
+
k.pop if ['CROSS','INNER','LEFT','RIGHT','FULL'].include?(k[-1].token_value)
|
414
|
+
end
|
415
|
+
k
|
416
|
+
end
|
417
|
+
|
418
|
+
def columns_from(all=false)
|
419
|
+
name_picker(clause_from,(all ? "columns_all" : "")){|i,prev| false }
|
420
|
+
end
|
421
|
+
|
422
|
+
def alias_columns(all=false)
|
423
|
+
cols = TokenStream.new
|
424
|
+
self.list_select_expressions.each{|list|
|
425
|
+
cols += name_picker(list)
|
426
|
+
}
|
427
|
+
cols - columns_select
|
428
|
+
end
|
429
|
+
|
430
|
+
|
431
|
+
def alias_from(all=false)
|
432
|
+
tbls = name_picker(clause_from)
|
433
|
+
tbls - tables_from(all)
|
434
|
+
end
|
435
|
+
|
436
|
+
def alias_hash(of="ALL")
|
437
|
+
case of.upcase
|
438
|
+
when "FROM"
|
439
|
+
name_alias = name_picker(clause_from).to_arr
|
440
|
+
names = tables_from.to_arr
|
441
|
+
when "JOIN"
|
442
|
+
name_alias = []
|
443
|
+
clause_join.each{|list|
|
444
|
+
name_alias += name_picker(list).to_arr
|
445
|
+
}
|
446
|
+
names = tables_join.to_arr
|
447
|
+
when "ALL"
|
448
|
+
name_alias = name_picker(clause_from).to_arr
|
449
|
+
clause_join.each{|list|
|
450
|
+
name_alias += name_picker(list).to_arr
|
451
|
+
}
|
452
|
+
names = (tables_from + tables_join).to_arr
|
453
|
+
end
|
454
|
+
|
455
|
+
a_hash={}
|
456
|
+
|
457
|
+
names.each{|nm|
|
458
|
+
i = name_alias.index(nm)
|
459
|
+
a = nil
|
460
|
+
a = name_alias[i+1] if i and (not names.include?(name_alias[i+1]))
|
461
|
+
a_hash[nm] = a if a
|
462
|
+
}
|
463
|
+
|
464
|
+
a_hash
|
465
|
+
end
|
466
|
+
|
467
|
+
def list_join(join_cl,prev)
|
468
|
+
|
469
|
+
join_lyst = []
|
470
|
+
return join_lyst if join_cl.length == 0
|
471
|
+
|
472
|
+
n = join_cl.find_matching_word('JOIN',1)
|
473
|
+
|
474
|
+
while n > 0
|
475
|
+
j = join_cl.slice(0..n)
|
476
|
+
if ((prev.length > 0) and (prev[-1].token_value == 'OUTER'))
|
477
|
+
j.unshift(prev[-1])
|
478
|
+
prev = prev.slice(0...-1)
|
479
|
+
end
|
480
|
+
|
481
|
+
if ((prev.length > 0) and (['INNER','LEFT','RIGHT','CROSS','FULL'].include?(prev[-1].token_value)))
|
482
|
+
j.unshift(prev[-1])
|
483
|
+
end
|
484
|
+
|
485
|
+
prev = j
|
486
|
+
|
487
|
+
j = j.slice(0...-1) if (j[-1].token_value == 'OUTER')
|
488
|
+
j = j.slice(0...-1) if (['INNER','LEFT','RIGHT','CROSS','FULL'].include?(j[-1].token_value))
|
489
|
+
join_lyst << TokenStream.new(j)
|
490
|
+
|
491
|
+
|
492
|
+
join_cl = join_cl.slice((n+1)..-1)
|
493
|
+
n = join_cl.find_matching_word('JOIN',1)
|
494
|
+
end
|
495
|
+
|
496
|
+
if ((prev.length > 0) and (prev[-1].token_value == 'OUTER'))
|
497
|
+
join_cl.unshift(prev[-1])
|
498
|
+
prev = prev.slice(0...-1)
|
499
|
+
end
|
500
|
+
|
501
|
+
if ((prev.length > 0) and (['INNER','LEFT','RIGHT','CROSS','FULL'].include?(prev[-1].token_value)))
|
502
|
+
join_cl.unshift(prev[-1])
|
503
|
+
end
|
504
|
+
|
505
|
+
join_lyst << join_cl
|
506
|
+
join_lyst
|
507
|
+
|
508
|
+
end
|
509
|
+
|
510
|
+
def clause_join
|
511
|
+
|
512
|
+
return [] unless @stmt
|
513
|
+
|
514
|
+
join,n = clause('JOIN')
|
515
|
+
prev = ((n > 1) ? @stmt.slice((n-2)..(n-1)) : [])
|
516
|
+
list_join(join,prev)
|
517
|
+
|
518
|
+
end
|
519
|
+
|
520
|
+
def columns_join_on(all=false)
|
521
|
+
cols = TokenStream.new
|
522
|
+
clause_join.each{|list|
|
523
|
+
cols += name_picker(list,(all ? "columns_all" : "columns_join_on")){|i,prev|
|
524
|
+
((i < 1) or (not ['JOIN','AS',RIGHT_PARAN].include?(prev.token_value))) and
|
525
|
+
((i < 1) or (prev.token_type != :Id)) and
|
526
|
+
(i > list.to_arr.index('ON'))
|
527
|
+
}
|
528
|
+
}
|
529
|
+
cols
|
530
|
+
end
|
531
|
+
|
532
|
+
|
533
|
+
def alias_join(all=false)
|
534
|
+
tbls = TokenStream.new
|
535
|
+
clause_join.each{|list| tbls += name_picker(list)}
|
536
|
+
tbls - tables_join(all) - columns_join_on(all)
|
537
|
+
end
|
538
|
+
|
539
|
+
def clause_where; k,n = clause('WHERE'); k; end
|
540
|
+
def columns_where(all=false); name_picker(clause_where,(all ? "columns_all" : "columns_where")); end
|
541
|
+
|
542
|
+
def clause_group_by; k,n = clause('GROUP'); k; end
|
543
|
+
def columns_group_by(all=false); name_picker(clause_group_by,(all ? "columns_all" : "")) end
|
544
|
+
|
545
|
+
def clause_having; k,n = clause('HAVING'); k; end
|
546
|
+
def columns_having(all=false); name_picker(clause_having,(all ? "columns_all" : "")) end
|
547
|
+
|
548
|
+
def clause_set; k,n = clause('SET'); k; end
|
549
|
+
|
550
|
+
def columns_update(all=false)
|
551
|
+
cols = TokenStream.new
|
552
|
+
return cols unless self.typ == 'UPDATE'
|
553
|
+
if all
|
554
|
+
cols += name_picker(clause_set,"columns_all")
|
555
|
+
else
|
556
|
+
cols += name_picker(clause_set,"columns_update"){|i,prev,nxt|
|
557
|
+
((i < 1) or (['SET',','].include?(prev.token_value))) and
|
558
|
+
((nxt == nil) or (nxt.token_value == '='))
|
559
|
+
}
|
560
|
+
end
|
561
|
+
|
562
|
+
cols
|
563
|
+
end
|
564
|
+
|
565
|
+
def list_union
|
566
|
+
|
567
|
+
return [] unless @stmt
|
568
|
+
|
569
|
+
union_lyst = []
|
570
|
+
|
571
|
+
n = self.stmt.find_matching_word('UNION')
|
572
|
+
return union_lyst if n == 0
|
573
|
+
|
574
|
+
union_cl = self.stmt.slice(n..-1)
|
575
|
+
n = union_cl.find_matching_word('UNION',1)
|
576
|
+
while n > 0
|
577
|
+
u = union_cl.slice(0..n)
|
578
|
+
union_lyst << u
|
579
|
+
union_cl = union_cl.slice((n+1)..-1)
|
580
|
+
n = union_cl.find_matching_word('UNION',1)
|
581
|
+
end
|
582
|
+
|
583
|
+
n = union_cl.find_matching_word('ORDER',1)
|
584
|
+
union_lyst << ((n > 0) ? union_cl.slice(0..n) : union_cl)
|
585
|
+
|
586
|
+
union_lyst
|
587
|
+
|
588
|
+
end
|
589
|
+
|
590
|
+
|
591
|
+
def columns_order_by
|
592
|
+
# Does not handle case ... end
|
593
|
+
clause_order_by.select{|tok|
|
594
|
+
(not ['ORDER','BY',',','ASC','DESC'].include?(tok.token_value)) and (tok.token_type == :Id)
|
595
|
+
#((tok.token_type == :Number) or (tok.token_type == :Id))
|
596
|
+
}
|
597
|
+
end
|
598
|
+
|
599
|
+
def list_sub_selects(st=nil)
|
600
|
+
return [] unless @stmt
|
601
|
+
|
602
|
+
sub_selects = []
|
603
|
+
|
604
|
+
ss = st || self.stmt
|
605
|
+
begin
|
606
|
+
n,prev = nil,nil
|
607
|
+
|
608
|
+
# Find the first occurance of SELECT
|
609
|
+
ss.each_with_index{|tok,i|
|
610
|
+
(n = i; break) if ((tok.token_value == 'SELECT') and prev and (prev.token_value == LEFT_PARAN))
|
611
|
+
prev = tok
|
612
|
+
}
|
613
|
+
|
614
|
+
if n
|
615
|
+
m = ss.find_matching_paran(n-1)
|
616
|
+
sub_selects << ss.slice((n-1)...(n+m))
|
617
|
+
ss = ss.slice((n+m)..-1)
|
618
|
+
end
|
619
|
+
|
620
|
+
end while n
|
621
|
+
|
622
|
+
sub_selects
|
623
|
+
end
|
624
|
+
|
625
|
+
def method_missing(methId,*args)
|
626
|
+
str = methId.id2name
|
627
|
+
str = ((str[-1] == ?s) ? str.chop : (str + 's'))
|
628
|
+
self.send(str,*args) if self.respond_to?(str)
|
629
|
+
end
|
630
|
+
alias column_aliases alias_columns
|
631
|
+
alias from_aliases alias_from
|
632
|
+
alias join_aliases alias_join
|
633
|
+
|
634
|
+
alias into clause_into
|
635
|
+
alias from clause_from
|
636
|
+
alias join_on clause_join
|
637
|
+
alias where clause_where
|
638
|
+
alias group_by clause_group_by
|
639
|
+
alias having clause_having
|
640
|
+
alias order_by clause_order_by
|
641
|
+
alias set clause_set
|
642
|
+
|
643
|
+
alias into_table table_into
|
644
|
+
alias delete_table table_delete
|
645
|
+
alias insert_table table_insert
|
646
|
+
alias update_table table_update
|
647
|
+
alias truncate_table table_truncate
|
648
|
+
alias create_table table_create
|
649
|
+
alias alter_table table_alter
|
650
|
+
|
651
|
+
alias drop_tables tables_drop
|
652
|
+
alias from_tables tables_from
|
653
|
+
alias join_tables tables_join
|
654
|
+
alias all_tables tables_all
|
655
|
+
alias where_tables tables_where
|
656
|
+
|
657
|
+
alias referred_tables tables_referred
|
658
|
+
alias modified_tables tables_modified
|
659
|
+
|
660
|
+
alias all_columns columns_all
|
661
|
+
alias select_columns columns_select
|
662
|
+
alias where_columns columns_where
|
663
|
+
alias group_by_columns columns_group_by
|
664
|
+
alias having_columns columns_having
|
665
|
+
alias order_by_columns columns_order_by
|
666
|
+
alias join_columns columns_join_on
|
667
|
+
alias insert_columns columns_insert
|
668
|
+
alias update_columns columns_update
|
669
|
+
end
|
670
|
+
|
671
|
+
|
672
|
+
|
673
|
+
|