tsql_shparser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/tsql_shparser.rb +198 -0
- data/lib/tsql_shparser/tsql_stmt.rb +673 -0
- data/lib/tsql_shparser/tsql_tokenizer.rb +375 -0
- data/test/tst_tokenizer.rb +211 -0
- metadata +40 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
#
|
2
|
+
# tsql_shparser.rb: Shallow Parser for t-SQL
|
3
|
+
# Copyright (c) 2005-2006 Shashank Date (shanko_date@yahoo.com)
|
4
|
+
#
|
5
|
+
# License: Ruby's
|
6
|
+
#
|
7
|
+
# This program is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
10
|
+
#
|
11
|
+
#
|
12
|
+
require 'tsql_shparser/tsql_tokenizer'
|
13
|
+
require 'tsql_shparser/tsql_stmt'
|
14
|
+
|
15
|
+
## Shallow Parser for t-SQL: Creates an array of arrays of Token objects for each
|
16
|
+
## t-SQL Statement parsed as input and generates an array of TSQLStmt objects.
|
17
|
+
class TSQL_ShParser
|
18
|
+
VERSION = "0.0.1"
|
19
|
+
|
20
|
+
@@start = ["SELECT","INSERT","UPDATE","DELETE","DROP","ALTER","CREATE","TRUNCATE"]
|
21
|
+
@@first = ["ALTER", "BACKUP", "BEGIN", "BREAK", "CHECKPOINT", "CONTINUE", "DENY", "GRANT", "REVOKE",
|
22
|
+
"BULK", "CLOSE", "COMMIT", "CREATE","DBCC", "DEALLOCATE",
|
23
|
+
"DECLARE", "DELETE","DROP", "DUMP", "ERRLVL", "EXEC", "EXECUTE", "EXIT", "FETCH",
|
24
|
+
"GO", "GOTO", "IF", "INSERT","KILL", "LOAD", "OPEN", "OPENDATASOURCE",
|
25
|
+
"OPENQUERY", "OPENROWSET","PRINT","RAISERROR", "READTEXT", "RECONFIGURE",
|
26
|
+
"RESTORE","RETURN", "ROLLBACK", "SAVE", "SELECT", "SET", "SETUSER", "SHUTDOWN",
|
27
|
+
"STATISTICS","TRUNCATE", "UPDATE", "UPDATETEXT", "USE","WAITFOR", "WHILE", "WRITETEXT"]
|
28
|
+
|
29
|
+
def initialize(file=nil)
|
30
|
+
@input_file = file
|
31
|
+
@tok = Tokenizer.new(file)
|
32
|
+
@tok.tokenize_file if file
|
33
|
+
end
|
34
|
+
|
35
|
+
## Return nil if it is not a sub-select
|
36
|
+
## Return relative index (w.r.t SELECT) of the token
|
37
|
+
## prior to the leftmost LEFT_PARAN otherwise
|
38
|
+
def is_sub_select?
|
39
|
+
|
40
|
+
ss = nil
|
41
|
+
|
42
|
+
# Make sure that we are indeed looking at the sub-SELECT
|
43
|
+
# The SELECT token is already consumed, so we have to look back to verify
|
44
|
+
curr = @tok.look_back(1)
|
45
|
+
return ss unless (curr.token_value == 'SELECT')
|
46
|
+
|
47
|
+
# If there is no token before SELECT then this is not a sub-SELECT
|
48
|
+
prev = @tok.look_back(2)
|
49
|
+
return ss unless prev
|
50
|
+
|
51
|
+
n = 3
|
52
|
+
|
53
|
+
# Take care of the arbitrary/redundant nesting of expressions in ( )
|
54
|
+
while(prev && (prev.token_value == LEFT_PARAN))
|
55
|
+
prev = @tok.look_back(n)
|
56
|
+
n += 1
|
57
|
+
end
|
58
|
+
|
59
|
+
# Check the token before the left most LEFT_PARAN
|
60
|
+
follow = ['SELECT','DISTINCT','PERCENT','JOIN','WHERE',
|
61
|
+
'BY','IN','ANY','ALL','EXISTS','UNION','FROM',
|
62
|
+
'<','>','=','>=','<=','<>',',']
|
63
|
+
|
64
|
+
prev_prev = @tok.look_back(n)
|
65
|
+
|
66
|
+
ss = n if prev && follow.include?(prev.token_value)
|
67
|
+
|
68
|
+
# Consider the case: SELECT TOP 10 (select ...)
|
69
|
+
|
70
|
+
if (prev && (prev.token_value =~ /^\d+$/))
|
71
|
+
ss = n if (prev_prev && (prev_prev.token_value == 'TOP'))
|
72
|
+
end
|
73
|
+
|
74
|
+
# Consider the case UNION ALL
|
75
|
+
|
76
|
+
if (prev && (prev.token_value == 'ALL'))
|
77
|
+
ss = n if (prev_prev && (prev_prev.token_value == 'UNION'))
|
78
|
+
end
|
79
|
+
|
80
|
+
## Finally, consider the case of SELECT being part of the INSERT statement
|
81
|
+
#
|
82
|
+
# skip the (optional) list of column names that may
|
83
|
+
# follow the name of the table in an INSERT, for e.g:
|
84
|
+
# INSERT INTO Table1 (COL1, COL2, COL3)
|
85
|
+
# SELECT 'A1','B2','C3';
|
86
|
+
if (prev && prev.token_value == RIGHT_PARAN)
|
87
|
+
|
88
|
+
prev = @tok.look_back(n)
|
89
|
+
|
90
|
+
# Rewind to the matching LEFT_PARAN
|
91
|
+
# We are more strict here and verify that all tokens
|
92
|
+
# are either Id or Comma: not considering the possibility of a Dot
|
93
|
+
while (prev and ((prev.token_type == :Id) or (prev.token_type == :Comma)))
|
94
|
+
prev = @tok.look_back(n)
|
95
|
+
n += 1
|
96
|
+
end
|
97
|
+
|
98
|
+
# This must be the LEFT_PARAN
|
99
|
+
prev = @tok.look_back(n) if (prev and (prev.token_value == LEFT_PARAN))
|
100
|
+
end
|
101
|
+
|
102
|
+
while (prev and ((prev.token_type == :Id) or (prev.token_type == :Dot)))
|
103
|
+
prev = @tok.look_back(n)
|
104
|
+
n += 1
|
105
|
+
end
|
106
|
+
|
107
|
+
# ??? prev = @tok.look_back(n) ???
|
108
|
+
ss = n if (prev and prev.token_value == 'INSERT')
|
109
|
+
|
110
|
+
prev_prev = @tok.look_back(n)
|
111
|
+
ss = n+1 if (prev and (prev.token_value == 'INTO') and prev_prev and (prev_prev.token_value == 'INSERT'))
|
112
|
+
|
113
|
+
ss
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
## Keep consuming tokens till you detect the end of the t-SQL statement.
|
118
|
+
## It is not very accurate about determining the end especially if the
|
119
|
+
## statements are redundantly nested inside paranthesis or are immediately
|
120
|
+
## followed by user-defined functions or stored procs.
|
121
|
+
## It takes in as its argument a category which is one of the symbols in
|
122
|
+
## the @@start array. It returns an array (possibly empty) of Token objects.
|
123
|
+
def parse_stmt(category="SELECT")
|
124
|
+
|
125
|
+
stmt = []
|
126
|
+
|
127
|
+
curr = @tok.get_next_token
|
128
|
+
return stmt if (curr.token_value != category)
|
129
|
+
|
130
|
+
stmt << curr
|
131
|
+
|
132
|
+
found_set = false if category == 'UPDATE'
|
133
|
+
case_end = 0
|
134
|
+
|
135
|
+
prev = curr.token_value
|
136
|
+
|
137
|
+
# Loop to find the end of the stmt
|
138
|
+
loop do
|
139
|
+
|
140
|
+
curr = @tok.get_next_token
|
141
|
+
break unless curr
|
142
|
+
|
143
|
+
curr_tok = curr.token_value
|
144
|
+
case_end += 1 if curr_tok == 'CASE'
|
145
|
+
case_end -= 1 if curr_tok == 'END'
|
146
|
+
|
147
|
+
if ((not is_sub_select?) and (@@first.include?(curr_tok) or (curr.token_type == :Label) or (curr_tok == ';') or ((case_end < 0) and (curr_tok == 'END'))))
|
148
|
+
# Handle TRIGGER Syntax
|
149
|
+
unless (['OF','FOR','AFTER', ',','IF','AND','OR'].include?(prev) and ['INSERT','UPDATE','DELETE'].include?(curr_tok))
|
150
|
+
if ((category == 'UPDATE') and (not found_set) and (curr_tok == 'SET'))
|
151
|
+
found_set = true
|
152
|
+
else
|
153
|
+
@tok.unget_token
|
154
|
+
break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
stmt << curr
|
160
|
+
prev = curr_tok
|
161
|
+
end
|
162
|
+
|
163
|
+
stmt
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
## Parse the string passed as the argument (str). If no string is passed,
|
168
|
+
## it is assumed that the parser constructor was invoked with a file-name
|
169
|
+
## which was used to open an existing file and was parsed successfully.
|
170
|
+
## If no file-name or string was given, an empty array is returned.
|
171
|
+
## Otherwise an array of TSQLStmt objects is returned, one object per t-SQL
|
172
|
+
## statement that starts with the symbol in @@start.
|
173
|
+
def parse(str=nil)
|
174
|
+
|
175
|
+
stmts = []
|
176
|
+
|
177
|
+
@tok.tokenize_string(str) if str
|
178
|
+
t = @tok.get_next_token
|
179
|
+
while t
|
180
|
+
|
181
|
+
category = t.token_value
|
182
|
+
|
183
|
+
if @@start.include?(category)
|
184
|
+
@tok.unget_token
|
185
|
+
s = parse_stmt(category)
|
186
|
+
stmts << s unless s == []
|
187
|
+
end
|
188
|
+
|
189
|
+
t = @tok.get_next_token
|
190
|
+
|
191
|
+
end
|
192
|
+
|
193
|
+
stmts.map{|st| TSQLStmt.new(st)}
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
@@ -0,0 +1,673 @@
|
|
1
|
+
#
|
2
|
+
# tsql_stmt.rb: Shallow Parser for t-SQL
|
3
|
+
# Copyright (c) 2005-2006 Shashank Date (shanko_date@yahoo.com)
|
4
|
+
#
|
5
|
+
# License: Ruby's
|
6
|
+
#
|
7
|
+
# This program is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
10
|
+
#
|
11
|
+
#
|
12
|
+
|
13
|
+
LEFT_PARAN = "("
|
14
|
+
RIGHT_PARAN = ")"
|
15
|
+
|
16
|
+
## A facade class which adds helper methods to the Array class
|
17
|
+
## and converts it into a TokenStream
|
18
|
+
class TokenStream < Array
|
19
|
+
VERSION = "0.0.1"
|
20
|
+
def initialize(arr=[])
|
21
|
+
@arr = super(arr)
|
22
|
+
end
|
23
|
+
|
24
|
+
def +(arr); TokenStream.new(super); end
|
25
|
+
def -(arr); TokenStream.new(super); end
|
26
|
+
def slice(range); TokenStream.new(super); end
|
27
|
+
|
28
|
+
def to_arr
|
29
|
+
(@arr ? @arr.collect{|t| t.token_value} : @arr)
|
30
|
+
end
|
31
|
+
|
32
|
+
def find_matching_paran(from)
|
33
|
+
|
34
|
+
list = self.slice(from..-1).to_arr
|
35
|
+
|
36
|
+
m = level = 0
|
37
|
+
if (list[0] == LEFT_PARAN)
|
38
|
+
list.each_index { |i|
|
39
|
+
level += 1 if (list[i] == LEFT_PARAN)
|
40
|
+
level -= 1 if (list[i] == RIGHT_PARAN)
|
41
|
+
(m = i; break) if (level == 0)
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
return m
|
46
|
+
end
|
47
|
+
|
48
|
+
def find_matching_word(words,from=0)
|
49
|
+
list = self.slice(from..-1).to_arr
|
50
|
+
pos = 0 # assume that the word will NEVER be the first word in the list
|
51
|
+
words.each{|word|
|
52
|
+
m = level = 0
|
53
|
+
list.each_index { |i|
|
54
|
+
level += 1 if list[i] == LEFT_PARAN
|
55
|
+
level -= 1 if list[i] == RIGHT_PARAN
|
56
|
+
(m = i; break) if ((list[i] == word) and (level == 0))
|
57
|
+
p [i,level,m] if $DEBUG
|
58
|
+
}
|
59
|
+
(pos = m; break) if m > 0
|
60
|
+
}
|
61
|
+
pos
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
## The main class which identifies the various clauses for the t-SQL Statement.
|
67
|
+
## It constructs the TSQLStmt object from the array of tokens parsed by the Parser.
|
68
|
+
## Generates a TokenStream object out of this array and provides methods to
|
69
|
+
## query the TokenStream.
|
70
|
+
class TSQLStmt
|
71
|
+
VERSION = "0.0.1"
|
72
|
+
## Hash used in determining which clauses follow the clause indicated by the key
|
73
|
+
@@follow = {
|
74
|
+
'SELECT' => ['INTO','FROM','JOIN','WHERE','GROUP','HAVING','UNION','ORDER'],
|
75
|
+
'FROM' => ['JOIN','WHERE','GROUP','HAVING','UNION','ORDER'],
|
76
|
+
'JOIN' => ['WHERE','GROUP','HAVING','UNION','ORDER'],
|
77
|
+
'WHERE' => ['GROUP','HAVING','UNION','ORDER'],
|
78
|
+
'GROUP' => ['HAVING','UNION','ORDER'],
|
79
|
+
'HAVING' => ['UNION','ORDER'],
|
80
|
+
'UNION' => ['ORDER'],
|
81
|
+
'SET' => ['FROM','JOIN','WHERE','UNION'], # Part of the UPDATE
|
82
|
+
}
|
83
|
+
|
84
|
+
attr_reader :typ
|
85
|
+
attr_reader :stmt
|
86
|
+
attr_reader :line
|
87
|
+
attr_reader :col
|
88
|
+
|
89
|
+
## Constructor: takes in array of tokens created by the ShParser.
|
90
|
+
## Each array has tokens of exactly one SQL statement. Sometimes the array
|
91
|
+
## may have some extra tokens towards the end, since the parser could not
|
92
|
+
## correctly identify the end of the statement.
|
93
|
+
def initialize(st)
|
94
|
+
@stmt = st
|
95
|
+
if (@stmt and @stmt[0])
|
96
|
+
@typ = @stmt[0].token_value
|
97
|
+
@typ = 'INTO' if ((@typ == 'SELECT') and (TokenStream.new(@stmt).find_matching_word('INTO') > 0))
|
98
|
+
@line = @stmt[0].line
|
99
|
+
@col = @stmt[0].col
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# ---------------------
|
104
|
+
def hdr(start)
|
105
|
+
hdr = TokenStream.new
|
106
|
+
return hdr unless (self.typ == start)
|
107
|
+
|
108
|
+
@stmt.each_with_index{|tok,i|
|
109
|
+
break unless (tok.token_type == :KeyWord)
|
110
|
+
hdr += [tok]
|
111
|
+
}
|
112
|
+
|
113
|
+
hdr
|
114
|
+
end
|
115
|
+
|
116
|
+
private :hdr
|
117
|
+
|
118
|
+
# ---------------------
|
119
|
+
|
120
|
+
def name_picker(list,msg="",&block)
|
121
|
+
|
122
|
+
cols,i,n = [], 0, list.length
|
123
|
+
while (i < n)
|
124
|
+
tok = list[i]
|
125
|
+
if tok.token_value == 'SELECT'
|
126
|
+
|
127
|
+
if ((i > 0) and (list[i-1].token_value == LEFT_PARAN))
|
128
|
+
j = list.find_matching_paran(i-1)
|
129
|
+
st = list.slice(i...(i+j-1))
|
130
|
+
i += j
|
131
|
+
else
|
132
|
+
st = list.slice(i..-1)
|
133
|
+
i = n
|
134
|
+
end
|
135
|
+
|
136
|
+
cols += TSQLStmt.new(st).send(msg) if msg.length > 0
|
137
|
+
|
138
|
+
else
|
139
|
+
|
140
|
+
cols << tok if ((tok.token_type == :Id) and
|
141
|
+
((list[i+1] == nil) or (not [LEFT_PARAN,"."].include?(list[i+1].token_value))) and
|
142
|
+
((block_given?) ? block.call(i,list[i-1],list[i+1]) : true))
|
143
|
+
i += 1
|
144
|
+
end
|
145
|
+
end
|
146
|
+
TokenStream.new(cols)
|
147
|
+
end
|
148
|
+
|
149
|
+
private :name_picker
|
150
|
+
|
151
|
+
# ---------------------
|
152
|
+
|
153
|
+
def clause(keyword,follow=nil,st=nil)
|
154
|
+
klause = TokenStream.new
|
155
|
+
return [klause,0] unless @stmt
|
156
|
+
|
157
|
+
follow ||= @@follow[keyword]
|
158
|
+
st ||= self.stmt
|
159
|
+
|
160
|
+
n = st.find_matching_word(keyword)
|
161
|
+
|
162
|
+
if n > 0
|
163
|
+
m = st.find_matching_word(follow,n)
|
164
|
+
klause = st.slice(n..((m == 0) ? -1: n+m-1))
|
165
|
+
end
|
166
|
+
|
167
|
+
[klause,n]
|
168
|
+
end
|
169
|
+
|
170
|
+
private :clause
|
171
|
+
|
172
|
+
# ---------------------
|
173
|
+
|
174
|
+
## Convert the array of tokens to a TokenStream object
|
175
|
+
def stmt; TokenStream.new(@stmt); end
|
176
|
+
|
177
|
+
## List all the expressions in the SELECT part of the statement
|
178
|
+
## Returns an array of TokenStream objects. Each object in the
|
179
|
+
## array is exactly one expression of the SELECT.
|
180
|
+
def list_select_expressions
|
181
|
+
columns,list = [],[]
|
182
|
+
return unless (@stmt and ((self.typ == 'SELECT') or (self.typ == 'INTO')))
|
183
|
+
|
184
|
+
# Skip the header part of the SELECT
|
185
|
+
prev,n = nil,nil
|
186
|
+
@stmt.each_with_index{|tok,i|
|
187
|
+
|
188
|
+
(prev = tok; next) if ["SELECT","ALL", "DISTINCT", "TOP", "PERCENT", "WITH", "TIES"].include?(tok.token_value)
|
189
|
+
next if ((tok.token_value =~ /^\d+$/) and prev and (prev.token_value == 'TOP'))
|
190
|
+
n = i
|
191
|
+
break
|
192
|
+
}
|
193
|
+
|
194
|
+
# Find the end of the expression list.
|
195
|
+
if n
|
196
|
+
m = self.stmt.find_matching_word(@@follow['SELECT'],n)
|
197
|
+
list = self.stmt.slice(n..((m == 0) ? -1: n+m-1))
|
198
|
+
end
|
199
|
+
|
200
|
+
i = 0
|
201
|
+
column = TokenStream.new
|
202
|
+
|
203
|
+
while i < list.length
|
204
|
+
tok = list[i]
|
205
|
+
k = ((tok.token_value == LEFT_PARAN) ? list.find_matching_paran(i) : 1)
|
206
|
+
raise "I cannot find matching paran in:\n #{list.slice(i..-1).to_arr.inspect}\n" if k == 0
|
207
|
+
|
208
|
+
column += list.slice(i...(i+k))
|
209
|
+
if (list[i+k-1] and (list[i+k-1].token_value == ","))
|
210
|
+
columns << column
|
211
|
+
column = TokenStream.new
|
212
|
+
end
|
213
|
+
i += k
|
214
|
+
end
|
215
|
+
columns << column
|
216
|
+
rescue
|
217
|
+
puts $!.to_s
|
218
|
+
ensure
|
219
|
+
return columns
|
220
|
+
end
|
221
|
+
|
222
|
+
def hdr_select
|
223
|
+
hdr = TokenStream.new
|
224
|
+
return hdr unless ((self.typ == 'SELECT') or (self.typ == 'INTO'))
|
225
|
+
prev = nil
|
226
|
+
@stmt.each{|tok|
|
227
|
+
if (tok.token_type == :KeyWord)
|
228
|
+
hdr += [tok]
|
229
|
+
elsif (tok.token_type == :Number) and (prev.token_value == 'TOP')
|
230
|
+
hdr += [tok]
|
231
|
+
else
|
232
|
+
break
|
233
|
+
end
|
234
|
+
prev = tok
|
235
|
+
}
|
236
|
+
hdr
|
237
|
+
end
|
238
|
+
|
239
|
+
|
240
|
+
def hdr_insert; hdr('INSERT'); end
|
241
|
+
def hdr_update; hdr('UPDATE'); end
|
242
|
+
def hdr_delete; hdr('DELETE'); end
|
243
|
+
|
244
|
+
|
245
|
+
def columns_delete(all=false)
|
246
|
+
cols = TokenStream.new
|
247
|
+
return cols unless self.typ == 'DELETE'
|
248
|
+
|
249
|
+
i = ((@stmt[1].token_value == 'FROM') ? 3 : 2)
|
250
|
+
|
251
|
+
cols
|
252
|
+
end
|
253
|
+
|
254
|
+
def columns_insert(all=false)
|
255
|
+
cols = TokenStream.new
|
256
|
+
return cols unless self.typ == 'INSERT'
|
257
|
+
|
258
|
+
j = 0
|
259
|
+
i = ((@stmt[1].token_value == 'INTO') ? 3 : 2)
|
260
|
+
|
261
|
+
return cols if ((@stmt[i] == nil) or (@stmt[i].token_value == 'EXECUTE') or (@stmt[i].token_value == 'EXEC'))
|
262
|
+
|
263
|
+
if (@stmt[i].token_value == LEFT_PARAN)
|
264
|
+
j = self.stmt.find_matching_paran(i)
|
265
|
+
end
|
266
|
+
j += i
|
267
|
+
|
268
|
+
cols += name_picker(self.stmt.slice(i..j)) if j > i
|
269
|
+
cols += name_picker(self.stmt.slice((j+1)..-1),"columns_all") if all
|
270
|
+
cols
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
def columns_select(all=false)
|
275
|
+
cols = TokenStream.new
|
276
|
+
self.list_select_expressions.each{|list|
|
277
|
+
cols += name_picker(list,(all ? "columns_all" :"columns_select")){|i,prev,nxt|
|
278
|
+
((nxt == nil) or (nxt.token_value != '=')) and
|
279
|
+
((i < 1) or (not ['AS',RIGHT_PARAN,'END'].include?(prev.token_value))) and
|
280
|
+
((i < 1) or (not [:Id,:String,:Number,:HostVariable].include?(prev.token_type)))
|
281
|
+
}
|
282
|
+
}
|
283
|
+
cols
|
284
|
+
end
|
285
|
+
|
286
|
+
def columns_all
|
287
|
+
all_cols = TokenStream.new
|
288
|
+
return all_cols if ['DROP','TRUNCATE'].include?(self.typ)
|
289
|
+
|
290
|
+
all_cols += columns_select(true) + columns_insert(true) +
|
291
|
+
columns_update(true) + columns_from(true) +
|
292
|
+
columns_join_on(true) + columns_where(true) +
|
293
|
+
columns_group_by(true) + columns_having(true) +
|
294
|
+
columns_order_by()
|
295
|
+
|
296
|
+
list_union.each{|st|
|
297
|
+
n = 0
|
298
|
+
st.each_with_index{|tok,i| (n=i; break) if tok.token_value == 'SELECT'}
|
299
|
+
ss = TSQLStmt.new(st.slice(n..-1))
|
300
|
+
all_cols += ss.columns_select(true) + ss.columns_from(true) +
|
301
|
+
ss.columns_join_on(true) + ss.columns_where(true) +
|
302
|
+
ss.columns_group_by(true) + ss.columns_having(true) +
|
303
|
+
ss.columns_order_by()
|
304
|
+
}
|
305
|
+
all_cols
|
306
|
+
end
|
307
|
+
|
308
|
+
def table_into; TokenStream.new((self.typ == 'INTO') ? [clause_into[1]] : []); end
|
309
|
+
def table_update; TokenStream.new((self.typ == 'UPDATE') ? [@stmt[1]] : []); end
|
310
|
+
def table_truncate; TokenStream.new((self.typ == 'TRUNCATE') ? [@stmt[2]] : []); end
|
311
|
+
def table_create; TokenStream.new(((self.typ == 'CREATE') and
|
312
|
+
(self.typ == 'TABLE')) ? [@stmt[2]] : []); end
|
313
|
+
def table_alter; TokenStream.new(((self.typ == 'ALTER') and
|
314
|
+
(@stmt[1].token_value == 'TABLE')) ? [@stmt[2]] : []); end
|
315
|
+
def tables_drop
|
316
|
+
tbls = TokenStream.new
|
317
|
+
tbls = TokenStream.new(@stmt.select{|tok| tok.token_type == :Id}) if ((self.typ == 'DROP') and @stmt[1] and (@stmt[1].token_value == 'TABLE'))
|
318
|
+
tbls
|
319
|
+
end
|
320
|
+
|
321
|
+
def tables_from(all=false)
|
322
|
+
tbls = name_picker(clause_from,(all ? "tables_all" : "tables_from")){|i,prev|
|
323
|
+
((i < 1) or (not ['AS',RIGHT_PARAN].include?(prev.token_value))) and
|
324
|
+
((i < 1) or (prev.token_type != :Id))
|
325
|
+
}
|
326
|
+
tbls
|
327
|
+
end
|
328
|
+
|
329
|
+
def tables_where(all=false)
|
330
|
+
tbls = TokenStream.new
|
331
|
+
list_sub_selects(clause_where).each{|ss|
|
332
|
+
tbls += TSQLStmt.new(ss.slice(1...-1)).tables_all
|
333
|
+
}
|
334
|
+
tbls
|
335
|
+
end
|
336
|
+
|
337
|
+
def table_insert
|
338
|
+
tbl = []
|
339
|
+
if (self.typ == 'INSERT')
|
340
|
+
tbl = ((@stmt[1].token_value == 'INTO') ? [@stmt[2]] : [@stmt[1]])
|
341
|
+
end
|
342
|
+
TokenStream.new(tbl)
|
343
|
+
end
|
344
|
+
|
345
|
+
def table_delete
|
346
|
+
tbl = []
|
347
|
+
if (self.typ == 'DELETE')
|
348
|
+
tbl = ((@stmt[1].token_value == 'FROM') ? [@stmt[2]] : [@stmt[1]])
|
349
|
+
end
|
350
|
+
TokenStream.new(tbl)
|
351
|
+
end
|
352
|
+
|
353
|
+
def tables_join(all=false)
|
354
|
+
tbls = TokenStream.new
|
355
|
+
clause_join.each{|list|
|
356
|
+
|
357
|
+
tbls += name_picker(list,(all ? "tables_all" : "tables_join")){|i,prev,nxt|
|
358
|
+
((i < 1) or (not ['AS',RIGHT_PARAN].include?(prev.token_value))) and
|
359
|
+
((i < 1) or (prev.token_type != :Id)) and
|
360
|
+
(i < list.to_arr.index('ON'))
|
361
|
+
|
362
|
+
}
|
363
|
+
}
|
364
|
+
tbls
|
365
|
+
end
|
366
|
+
|
367
|
+
def tables_all(kind=nil)
|
368
|
+
mod_tbls = table_insert + table_delete + table_update + table_into + table_create +
|
369
|
+
tables_drop + table_truncate + table_alter
|
370
|
+
ref_tbls = tables_where + tables_from(true) + tables_join(true)
|
371
|
+
list_union.each{|st|
|
372
|
+
n = 0
|
373
|
+
st.each_with_index{|tok,i| (n=i; break) if tok.token_value == 'SELECT'}
|
374
|
+
ss = TSQLStmt.new(st.slice(n..-1))
|
375
|
+
ref_tbls += ss.tables_from(true) + ss.tables_join(true)
|
376
|
+
}
|
377
|
+
all_tbls = mod_tbls + ref_tbls
|
378
|
+
(kind ? ((kind == 'mod') ? mod_tbls : ref_tbls) : all_tbls)
|
379
|
+
end
|
380
|
+
|
381
|
+
def tables_referred; tables_all('ref'); end
|
382
|
+
def tables_modified; tables_all('mod'); end
|
383
|
+
|
384
|
+
def clause_into
|
385
|
+
|
386
|
+
into = TokenStream.new
|
387
|
+
|
388
|
+
if @stmt
|
389
|
+
n = nil
|
390
|
+
@stmt.each_with_index{|tok,i|
|
391
|
+
(n = i; break) if tok.token_value == 'INTO'
|
392
|
+
}
|
393
|
+
into = self.stmt.slice(n..(n+1)) if n
|
394
|
+
end
|
395
|
+
|
396
|
+
into
|
397
|
+
|
398
|
+
end
|
399
|
+
|
400
|
+
def clause_order_by
|
401
|
+
order_by = TokenStream.new
|
402
|
+
return order_by unless @stmt
|
403
|
+
|
404
|
+
n = self.stmt.find_matching_word('ORDER')
|
405
|
+
order_by = self.stmt.slice(n..-1) if n > 0
|
406
|
+
order_by
|
407
|
+
end
|
408
|
+
|
409
|
+
def clause_from
|
410
|
+
k,n = clause('FROM')
|
411
|
+
if n > 0
|
412
|
+
k.pop if k[-1].token_value == 'OUTER'
|
413
|
+
k.pop if ['CROSS','INNER','LEFT','RIGHT','FULL'].include?(k[-1].token_value)
|
414
|
+
end
|
415
|
+
k
|
416
|
+
end
|
417
|
+
|
418
|
+
def columns_from(all=false)
|
419
|
+
name_picker(clause_from,(all ? "columns_all" : "")){|i,prev| false }
|
420
|
+
end
|
421
|
+
|
422
|
+
def alias_columns(all=false)
|
423
|
+
cols = TokenStream.new
|
424
|
+
self.list_select_expressions.each{|list|
|
425
|
+
cols += name_picker(list)
|
426
|
+
}
|
427
|
+
cols - columns_select
|
428
|
+
end
|
429
|
+
|
430
|
+
|
431
|
+
def alias_from(all=false)
|
432
|
+
tbls = name_picker(clause_from)
|
433
|
+
tbls - tables_from(all)
|
434
|
+
end
|
435
|
+
|
436
|
+
def alias_hash(of="ALL")
|
437
|
+
case of.upcase
|
438
|
+
when "FROM"
|
439
|
+
name_alias = name_picker(clause_from).to_arr
|
440
|
+
names = tables_from.to_arr
|
441
|
+
when "JOIN"
|
442
|
+
name_alias = []
|
443
|
+
clause_join.each{|list|
|
444
|
+
name_alias += name_picker(list).to_arr
|
445
|
+
}
|
446
|
+
names = tables_join.to_arr
|
447
|
+
when "ALL"
|
448
|
+
name_alias = name_picker(clause_from).to_arr
|
449
|
+
clause_join.each{|list|
|
450
|
+
name_alias += name_picker(list).to_arr
|
451
|
+
}
|
452
|
+
names = (tables_from + tables_join).to_arr
|
453
|
+
end
|
454
|
+
|
455
|
+
a_hash={}
|
456
|
+
|
457
|
+
names.each{|nm|
|
458
|
+
i = name_alias.index(nm)
|
459
|
+
a = nil
|
460
|
+
a = name_alias[i+1] if i and (not names.include?(name_alias[i+1]))
|
461
|
+
a_hash[nm] = a if a
|
462
|
+
}
|
463
|
+
|
464
|
+
a_hash
|
465
|
+
end
|
466
|
+
|
467
|
+
def list_join(join_cl,prev)
|
468
|
+
|
469
|
+
join_lyst = []
|
470
|
+
return join_lyst if join_cl.length == 0
|
471
|
+
|
472
|
+
n = join_cl.find_matching_word('JOIN',1)
|
473
|
+
|
474
|
+
while n > 0
|
475
|
+
j = join_cl.slice(0..n)
|
476
|
+
if ((prev.length > 0) and (prev[-1].token_value == 'OUTER'))
|
477
|
+
j.unshift(prev[-1])
|
478
|
+
prev = prev.slice(0...-1)
|
479
|
+
end
|
480
|
+
|
481
|
+
if ((prev.length > 0) and (['INNER','LEFT','RIGHT','CROSS','FULL'].include?(prev[-1].token_value)))
|
482
|
+
j.unshift(prev[-1])
|
483
|
+
end
|
484
|
+
|
485
|
+
prev = j
|
486
|
+
|
487
|
+
j = j.slice(0...-1) if (j[-1].token_value == 'OUTER')
|
488
|
+
j = j.slice(0...-1) if (['INNER','LEFT','RIGHT','CROSS','FULL'].include?(j[-1].token_value))
|
489
|
+
join_lyst << TokenStream.new(j)
|
490
|
+
|
491
|
+
|
492
|
+
join_cl = join_cl.slice((n+1)..-1)
|
493
|
+
n = join_cl.find_matching_word('JOIN',1)
|
494
|
+
end
|
495
|
+
|
496
|
+
if ((prev.length > 0) and (prev[-1].token_value == 'OUTER'))
|
497
|
+
join_cl.unshift(prev[-1])
|
498
|
+
prev = prev.slice(0...-1)
|
499
|
+
end
|
500
|
+
|
501
|
+
if ((prev.length > 0) and (['INNER','LEFT','RIGHT','CROSS','FULL'].include?(prev[-1].token_value)))
|
502
|
+
join_cl.unshift(prev[-1])
|
503
|
+
end
|
504
|
+
|
505
|
+
join_lyst << join_cl
|
506
|
+
join_lyst
|
507
|
+
|
508
|
+
end
|
509
|
+
|
510
|
+
def clause_join
|
511
|
+
|
512
|
+
return [] unless @stmt
|
513
|
+
|
514
|
+
join,n = clause('JOIN')
|
515
|
+
prev = ((n > 1) ? @stmt.slice((n-2)..(n-1)) : [])
|
516
|
+
list_join(join,prev)
|
517
|
+
|
518
|
+
end
|
519
|
+
|
520
|
+
def columns_join_on(all=false)
|
521
|
+
cols = TokenStream.new
|
522
|
+
clause_join.each{|list|
|
523
|
+
cols += name_picker(list,(all ? "columns_all" : "columns_join_on")){|i,prev|
|
524
|
+
((i < 1) or (not ['JOIN','AS',RIGHT_PARAN].include?(prev.token_value))) and
|
525
|
+
((i < 1) or (prev.token_type != :Id)) and
|
526
|
+
(i > list.to_arr.index('ON'))
|
527
|
+
}
|
528
|
+
}
|
529
|
+
cols
|
530
|
+
end
|
531
|
+
|
532
|
+
|
533
|
+
def alias_join(all=false)
|
534
|
+
tbls = TokenStream.new
|
535
|
+
clause_join.each{|list| tbls += name_picker(list)}
|
536
|
+
tbls - tables_join(all) - columns_join_on(all)
|
537
|
+
end
|
538
|
+
|
539
|
+
def clause_where; k,n = clause('WHERE'); k; end
|
540
|
+
def columns_where(all=false); name_picker(clause_where,(all ? "columns_all" : "columns_where")); end
|
541
|
+
|
542
|
+
def clause_group_by; k,n = clause('GROUP'); k; end
|
543
|
+
def columns_group_by(all=false); name_picker(clause_group_by,(all ? "columns_all" : "")) end
|
544
|
+
|
545
|
+
def clause_having; k,n = clause('HAVING'); k; end
|
546
|
+
def columns_having(all=false); name_picker(clause_having,(all ? "columns_all" : "")) end
|
547
|
+
|
548
|
+
def clause_set; k,n = clause('SET'); k; end
|
549
|
+
|
550
|
+
def columns_update(all=false)
|
551
|
+
cols = TokenStream.new
|
552
|
+
return cols unless self.typ == 'UPDATE'
|
553
|
+
if all
|
554
|
+
cols += name_picker(clause_set,"columns_all")
|
555
|
+
else
|
556
|
+
cols += name_picker(clause_set,"columns_update"){|i,prev,nxt|
|
557
|
+
((i < 1) or (['SET',','].include?(prev.token_value))) and
|
558
|
+
((nxt == nil) or (nxt.token_value == '='))
|
559
|
+
}
|
560
|
+
end
|
561
|
+
|
562
|
+
cols
|
563
|
+
end
|
564
|
+
|
565
|
+
def list_union
|
566
|
+
|
567
|
+
return [] unless @stmt
|
568
|
+
|
569
|
+
union_lyst = []
|
570
|
+
|
571
|
+
n = self.stmt.find_matching_word('UNION')
|
572
|
+
return union_lyst if n == 0
|
573
|
+
|
574
|
+
union_cl = self.stmt.slice(n..-1)
|
575
|
+
n = union_cl.find_matching_word('UNION',1)
|
576
|
+
while n > 0
|
577
|
+
u = union_cl.slice(0..n)
|
578
|
+
union_lyst << u
|
579
|
+
union_cl = union_cl.slice((n+1)..-1)
|
580
|
+
n = union_cl.find_matching_word('UNION',1)
|
581
|
+
end
|
582
|
+
|
583
|
+
n = union_cl.find_matching_word('ORDER',1)
|
584
|
+
union_lyst << ((n > 0) ? union_cl.slice(0..n) : union_cl)
|
585
|
+
|
586
|
+
union_lyst
|
587
|
+
|
588
|
+
end
|
589
|
+
|
590
|
+
|
591
|
+
def columns_order_by
|
592
|
+
# Does not handle case ... end
|
593
|
+
clause_order_by.select{|tok|
|
594
|
+
(not ['ORDER','BY',',','ASC','DESC'].include?(tok.token_value)) and (tok.token_type == :Id)
|
595
|
+
#((tok.token_type == :Number) or (tok.token_type == :Id))
|
596
|
+
}
|
597
|
+
end
|
598
|
+
|
599
|
+
def list_sub_selects(st=nil)
|
600
|
+
return [] unless @stmt
|
601
|
+
|
602
|
+
sub_selects = []
|
603
|
+
|
604
|
+
ss = st || self.stmt
|
605
|
+
begin
|
606
|
+
n,prev = nil,nil
|
607
|
+
|
608
|
+
# Find the first occurance of SELECT
|
609
|
+
ss.each_with_index{|tok,i|
|
610
|
+
(n = i; break) if ((tok.token_value == 'SELECT') and prev and (prev.token_value == LEFT_PARAN))
|
611
|
+
prev = tok
|
612
|
+
}
|
613
|
+
|
614
|
+
if n
|
615
|
+
m = ss.find_matching_paran(n-1)
|
616
|
+
sub_selects << ss.slice((n-1)...(n+m))
|
617
|
+
ss = ss.slice((n+m)..-1)
|
618
|
+
end
|
619
|
+
|
620
|
+
end while n
|
621
|
+
|
622
|
+
sub_selects
|
623
|
+
end
|
624
|
+
|
625
|
+
def method_missing(methId,*args)
|
626
|
+
str = methId.id2name
|
627
|
+
str = ((str[-1] == ?s) ? str.chop : (str + 's'))
|
628
|
+
self.send(str,*args) if self.respond_to?(str)
|
629
|
+
end
|
630
|
+
alias column_aliases alias_columns
|
631
|
+
alias from_aliases alias_from
|
632
|
+
alias join_aliases alias_join
|
633
|
+
|
634
|
+
alias into clause_into
|
635
|
+
alias from clause_from
|
636
|
+
alias join_on clause_join
|
637
|
+
alias where clause_where
|
638
|
+
alias group_by clause_group_by
|
639
|
+
alias having clause_having
|
640
|
+
alias order_by clause_order_by
|
641
|
+
alias set clause_set
|
642
|
+
|
643
|
+
alias into_table table_into
|
644
|
+
alias delete_table table_delete
|
645
|
+
alias insert_table table_insert
|
646
|
+
alias update_table table_update
|
647
|
+
alias truncate_table table_truncate
|
648
|
+
alias create_table table_create
|
649
|
+
alias alter_table table_alter
|
650
|
+
|
651
|
+
alias drop_tables tables_drop
|
652
|
+
alias from_tables tables_from
|
653
|
+
alias join_tables tables_join
|
654
|
+
alias all_tables tables_all
|
655
|
+
alias where_tables tables_where
|
656
|
+
|
657
|
+
alias referred_tables tables_referred
|
658
|
+
alias modified_tables tables_modified
|
659
|
+
|
660
|
+
alias all_columns columns_all
|
661
|
+
alias select_columns columns_select
|
662
|
+
alias where_columns columns_where
|
663
|
+
alias group_by_columns columns_group_by
|
664
|
+
alias having_columns columns_having
|
665
|
+
alias order_by_columns columns_order_by
|
666
|
+
alias join_columns columns_join_on
|
667
|
+
alias insert_columns columns_insert
|
668
|
+
alias update_columns columns_update
|
669
|
+
end
|
670
|
+
|
671
|
+
|
672
|
+
|
673
|
+
|