vorax 0.1.0pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.rspec +1 -0
- data/LICENSE.txt +22 -0
- data/README.md +45 -0
- data/Rakefile +30 -0
- data/lib/vorax/base_funnel.rb +30 -0
- data/lib/vorax/output/html_convertor.rb +120 -0
- data/lib/vorax/output/html_funnel.rb +79 -0
- data/lib/vorax/output/pagezip_convertor.rb +20 -0
- data/lib/vorax/output/tablezip_convertor.rb +22 -0
- data/lib/vorax/output/vertical_convertor.rb +53 -0
- data/lib/vorax/output/zip_convertor.rb +117 -0
- data/lib/vorax/parser/argument.rb~ +125 -0
- data/lib/vorax/parser/body_split.rb +168 -0
- data/lib/vorax/parser/conn_string.rb +104 -0
- data/lib/vorax/parser/grammars/alias.rb +912 -0
- data/lib/vorax/parser/grammars/alias.rl +146 -0
- data/lib/vorax/parser/grammars/column.rb +454 -0
- data/lib/vorax/parser/grammars/column.rl +64 -0
- data/lib/vorax/parser/grammars/common.rl +98 -0
- data/lib/vorax/parser/grammars/package_spec.rb +1186 -0
- data/lib/vorax/parser/grammars/package_spec.rl +78 -0
- data/lib/vorax/parser/grammars/plsql_def.rb +469 -0
- data/lib/vorax/parser/grammars/plsql_def.rl +59 -0
- data/lib/vorax/parser/grammars/statement.rb +925 -0
- data/lib/vorax/parser/grammars/statement.rl +83 -0
- data/lib/vorax/parser/parser.rb +320 -0
- data/lib/vorax/parser/plsql_structure.rb +158 -0
- data/lib/vorax/parser/plsql_walker.rb +143 -0
- data/lib/vorax/parser/statement_inspector.rb~ +52 -0
- data/lib/vorax/parser/stmt_inspector.rb +78 -0
- data/lib/vorax/parser/target_ref.rb +110 -0
- data/lib/vorax/sqlplus.rb +281 -0
- data/lib/vorax/version.rb +7 -0
- data/lib/vorax/vorax_io.rb +70 -0
- data/lib/vorax.rb +60 -0
- data/spec/column_spec.rb +40 -0
- data/spec/conn_string_spec.rb +53 -0
- data/spec/package_spec_spec.rb +48 -0
- data/spec/pagezip_spec.rb +153 -0
- data/spec/parser_spec.rb +299 -0
- data/spec/plsql_structure_spec.rb +44 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/sql/create_objects.sql +69 -0
- data/spec/sql/dbms_crypto.spc +339 -0
- data/spec/sql/dbms_crypto.~spc +339 -0
- data/spec/sql/dbms_stats.spc +4097 -0
- data/spec/sql/drop_user.sql +10 -0
- data/spec/sql/muci.spc +24 -0
- data/spec/sql/setup_user.sql +22 -0
- data/spec/sql/test.pkg +67 -0
- data/spec/sqlplus_spec.rb +52 -0
- data/spec/stmt_inspector_spec.rb +84 -0
- data/spec/tablezip_spec.rb +111 -0
- data/spec/vertical_spec.rb +150 -0
- data/vorax.gemspec +21 -0
- metadata +139 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
%%{
|
2
|
+
|
3
|
+
machine statement;
|
4
|
+
|
5
|
+
action parse_start {
|
6
|
+
eof = pe
|
7
|
+
}
|
8
|
+
|
9
|
+
action parse_error {
|
10
|
+
}
|
11
|
+
|
12
|
+
action mark_as_anonymous {
|
13
|
+
stmt_type = 'ANONYMOUS'
|
14
|
+
}
|
15
|
+
|
16
|
+
action mark_as_sqlplus_command {
|
17
|
+
stmt_type = 'SQLPLUS'
|
18
|
+
}
|
19
|
+
|
20
|
+
action mark_as_sql {
|
21
|
+
stmt_type = nil
|
22
|
+
}
|
23
|
+
|
24
|
+
action mark_type {
|
25
|
+
tail = data[(0...p)]
|
26
|
+
type = tail[/\w+\Z/]
|
27
|
+
stmt_type = type.upcase if type
|
28
|
+
}
|
29
|
+
|
30
|
+
action mark_body {
|
31
|
+
stmt_type << ' BODY'
|
32
|
+
}
|
33
|
+
|
34
|
+
include common "common.rl";
|
35
|
+
|
36
|
+
# parsing rules baby
|
37
|
+
anonymous_block = ((K_BEGIN | K_DECLARE) ws+) @mark_as_anonymous;
|
38
|
+
simple_module = (K_TRIGGER | K_FUNCTION | K_PROCEDURE) %mark_type;
|
39
|
+
package_module = (K_PACKAGE %mark_type) (ws+ K_BODY %mark_body)?;
|
40
|
+
type_module = (K_TYPE %mark_type) (ws+ K_BODY %mark_body)?;
|
41
|
+
java_module = ((K_AND ws+ (K_RESOLVE | K_COMPILE) ws+ K_NOFORCE ws+) |
|
42
|
+
(K_AND ws+ (K_RESOLVE | K_COMPILE) ws+) |
|
43
|
+
(K_NOFORCE ws+))? (K_JAVA %mark_type);
|
44
|
+
plsql_module = K_CREATE ws+ (K_OR ws+ K_REPLACE ws+)?
|
45
|
+
(simple_module |
|
46
|
+
package_module |
|
47
|
+
type_module |
|
48
|
+
java_module) ws+;
|
49
|
+
set_transaction = (K_SET ws+ K_TRANSACTION ws+) @ mark_as_sql;
|
50
|
+
sqlplus_command = (((K_ACCEPT | K_ARCHIVE | K_ATTRIBUTE | K_BREAK | K_BTITLE | K_CLEAR | K_COLUMN |
|
51
|
+
K_COMPUTE | K_CONNECT | K_COPY | K_DEFINE | K_DESCRIBE | K_DISCONNECT | K_EXECUTE |
|
52
|
+
K_EXIT | K_HELP | K_HOST | K_PASSWORD | K_PAUSE | K_PRINT | K_PROMPT |
|
53
|
+
K_RECOVER | K_REMARK | K_REPFOOTER | K_REPHEADER | K_RUN | K_SAVE | K_SET | K_SHOW | K_SHUTDOWN |
|
54
|
+
K_SPOOL | K_START | K_STARTUP | K_STORE | K_TIMING | K_TITLE | K_UNDEFINE | K_VARIABLE | K_WHENEVER |
|
55
|
+
K_XQUERY) ws+) | ('@@' | '@' | '/' | '!')) @ mark_as_sqlplus_command;
|
56
|
+
|
57
|
+
main := ws* (anonymous_block | set_transaction | sqlplus_command | plsql_module) >parse_start $err(parse_error);
|
58
|
+
|
59
|
+
}%%
|
60
|
+
|
61
|
+
module Vorax
|
62
|
+
|
63
|
+
module Parser
|
64
|
+
|
65
|
+
# Gets the type of the provided statement.
|
66
|
+
#
|
67
|
+
# @param data [String] the statement
|
68
|
+
# @return [String] 'SQLPLUS' for an sqlplus statement, 'FUNCTION|PROCEDURE|PACKAGE|TYPE...' for
|
69
|
+
# a PL/SQL block, 'ANONYMOUS' for an anonymous plsql block
|
70
|
+
def self.statement_type(data)
|
71
|
+
stmt_type = nil
|
72
|
+
data << "\n"
|
73
|
+
%% write data;
|
74
|
+
%% write init;
|
75
|
+
%% write exec;
|
76
|
+
data.chop!
|
77
|
+
return stmt_type
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
@@ -0,0 +1,320 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Vorax
|
4
|
+
|
5
|
+
# Provides parsing utilities.
|
6
|
+
module Parser
|
7
|
+
|
8
|
+
END_LINE = /\r\n?|\n/ unless defined?(END_LINE)
|
9
|
+
SQLPLUS_TERMINATOR = END_LINE unless defined?(SQLPLUS_TERMINATOR)
|
10
|
+
SEMI_COLON_TERMINATOR = /;/ unless defined?(SEMI_COLON_TERMINATOR)
|
11
|
+
SLASH_TERMINATOR = Regexp.new('(?:' + END_LINE.to_s + '\s*\/[ \t]*' + END_LINE.to_s + ')') unless defined?(SLASH_TERMINATOR)
|
12
|
+
PLSQL_SPEC = /(?:\bpackage\b|\btype\b)/i
|
13
|
+
SUBPROG = /(?:\bfunction\b|\bprocedure\b)/i
|
14
|
+
BEGIN_MODULE = /(?:\bbegin\b)/i
|
15
|
+
END_MODULE = /(?:\bend\b)/i
|
16
|
+
|
17
|
+
# Given an expression with parenthesis, it is walking it so that to
|
18
|
+
# keep track of the open/close paren, in a balanced way.
|
19
|
+
#
|
20
|
+
# @param text [String] the string to be walked
|
21
|
+
# @return [String] the paren expression
|
22
|
+
def self.walk_balanced_paren(text)
|
23
|
+
walker = PLSQLWalker.new(text)
|
24
|
+
level = 0
|
25
|
+
start_pos = 0
|
26
|
+
end_pos = 0
|
27
|
+
walker.register_spot(/[(]/) do |scanner|
|
28
|
+
start_pos = scanner.pos - 1 if level == 0
|
29
|
+
level += 1
|
30
|
+
end
|
31
|
+
walker.register_spot(/[)]/) do |scanner|
|
32
|
+
level -= 1
|
33
|
+
if level <= 0
|
34
|
+
end_pos = scanner.pos
|
35
|
+
scanner.terminate
|
36
|
+
end
|
37
|
+
end
|
38
|
+
walker.walk
|
39
|
+
text[start_pos, end_pos]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Remove all comments from the provided statement. Pay attention that every
|
43
|
+
# comment is replaced by a blank in order to cover the case where a comment is
|
44
|
+
# used as a whitespace (e.g. select * from/*comment*/dual).
|
45
|
+
#
|
46
|
+
# @param statement [String] the statement to be cleaned up of comments
|
47
|
+
# @return [String] the statement without any comment
|
48
|
+
def self.remove_all_comments(statement)
|
49
|
+
comment_areas = []
|
50
|
+
result = statement
|
51
|
+
walker = PLSQLWalker.new(statement, false)
|
52
|
+
|
53
|
+
callback = lambda do |scanner, end_pattern|
|
54
|
+
start_pos = scanner.pos - scanner.matched.length
|
55
|
+
text = scanner.scan_until(end_pattern)
|
56
|
+
if text
|
57
|
+
comment_areas << (start_pos..scanner.pos - 1)
|
58
|
+
else
|
59
|
+
scanner.terminate
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
walker.register_spot(PLSQLWalker::BEGIN_ML_COMMENT) do |scanner|
|
64
|
+
callback.call(scanner, PLSQLWalker::END_ML_COMMENT)
|
65
|
+
end
|
66
|
+
|
67
|
+
walker.register_spot(PLSQLWalker::BEGIN_SL_COMMENT) do |scanner|
|
68
|
+
callback.call(scanner, PLSQLWalker::END_SL_COMMENT)
|
69
|
+
end
|
70
|
+
|
71
|
+
walker.register_default_plsql_quoting_spot()
|
72
|
+
walker.register_default_double_quoting_spot()
|
73
|
+
walker.register_default_single_quoting_spot()
|
74
|
+
walker.walk
|
75
|
+
offset = 0
|
76
|
+
comment_areas.each do |interval|
|
77
|
+
r = (interval.min - offset .. interval.max - offset)
|
78
|
+
result[r] = " "
|
79
|
+
offset += (interval.max - interval.min)
|
80
|
+
end
|
81
|
+
result
|
82
|
+
end
|
83
|
+
|
84
|
+
# Remove the trailing comments from the provided statement.
|
85
|
+
#
|
86
|
+
# @param statement [String] the statement to be cleaned up
|
87
|
+
# @return the statement without the trailing comments.
|
88
|
+
def self.remove_trailing_comments(statement)
|
89
|
+
stmt = statement
|
90
|
+
begin
|
91
|
+
stmt.gsub!(/(?:--[^\n]*\s*\z)|(?:\/\*.*?\*\/\s*\z)/m, '')
|
92
|
+
end while !$~.nil?
|
93
|
+
stmt
|
94
|
+
end
|
95
|
+
|
96
|
+
# Get the function/procedure to which the argument on the
|
97
|
+
# provided position belongs.
|
98
|
+
#
|
99
|
+
# @param statement [String] the statement to be parsed
|
100
|
+
# @param position [int] the position index where the
|
101
|
+
# argument should be given
|
102
|
+
def self.argument_belongs_to(statement, position = nil)
|
103
|
+
position = statement.length unless position
|
104
|
+
stmt = Parser.remove_all_comments(statement[(0...position)])
|
105
|
+
stmt.reverse!
|
106
|
+
level = 0
|
107
|
+
walker = PLSQLWalker.new(stmt, false)
|
108
|
+
arg_owner = ""
|
109
|
+
|
110
|
+
squote_fallback = lambda do |scanner|
|
111
|
+
scanner.skip_until(PLSQLWalker::BEGIN_SINGLE_QUOTING)
|
112
|
+
if scanner.matched == "'"
|
113
|
+
begin
|
114
|
+
scanner.skip_until(/\'+/)
|
115
|
+
end while (scanner.matched != "'" && !scanner.eos?)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
extract_module = lambda do |scanner|
|
120
|
+
module_name = ""
|
121
|
+
while !scanner.eos?
|
122
|
+
# consume leading whitspaces
|
123
|
+
scanner.scan(/\s*/)
|
124
|
+
if scanner.check(/"/) == '"'
|
125
|
+
# we have a quoted identifier
|
126
|
+
module_name << scanner.scan(/"/)
|
127
|
+
module_name << scanner.scan_until(/"/)
|
128
|
+
else
|
129
|
+
# unquoted identifier
|
130
|
+
module_name << scanner.scan(/\S+/)
|
131
|
+
end
|
132
|
+
# consume trailing whitespaces
|
133
|
+
scanner.scan(/\s*/)
|
134
|
+
|
135
|
+
# might be a dblink
|
136
|
+
if scanner.check(/@/) == '@'
|
137
|
+
module_name << scanner.scan(/@/)
|
138
|
+
next
|
139
|
+
end
|
140
|
+
|
141
|
+
# might be package or a schema
|
142
|
+
if scanner.check(/\./) == '.'
|
143
|
+
module_name << scanner.scan(/\./)
|
144
|
+
next
|
145
|
+
end
|
146
|
+
scanner.terminate
|
147
|
+
end
|
148
|
+
module_name.reverse!
|
149
|
+
end
|
150
|
+
|
151
|
+
walker.register_spot(/'[\]})>]/) do |scanner|
|
152
|
+
# pay attention, it's reveresed
|
153
|
+
if scanner.matched =~ /\'\]/
|
154
|
+
squote_fallback.call(scanner) unless scanner.skip_until(/\[\'q/)
|
155
|
+
elsif scanner.matched =~ /\'[}]/
|
156
|
+
squote_fallback.call(scanner) unless scanner.skip_until(/[{]\'q/)
|
157
|
+
elsif scanner.matched =~ /\'[)]/
|
158
|
+
squote_fallback.call(scanner) unless scanner.skip_until(/[(]\'q/)
|
159
|
+
elsif scanner.matched =~ /\'[>]/
|
160
|
+
squote_fallback.call(scanner) unless scanner.skip_until(/[<]\'q/)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
walker.register_spot(/[)]/) do |scanner|
|
165
|
+
level += 1
|
166
|
+
end
|
167
|
+
|
168
|
+
walker.register_spot(/[(]/) do |scanner|
|
169
|
+
if level == 0
|
170
|
+
arg_owner = extract_module.call(scanner)
|
171
|
+
else
|
172
|
+
level -= 1
|
173
|
+
scanner.terminate if level < 0 #give up, it's an invalid statement
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
walker.walk
|
178
|
+
return arg_owner
|
179
|
+
end
|
180
|
+
|
181
|
+
# Given the html output of a script, it extracts all tables into a nice
|
182
|
+
# ruby array. This method returns a hash with the following meaning:
|
183
|
+
# :resultset => an array with resultsets from all queries which
|
184
|
+
# generated the <html> output. For example, if the
|
185
|
+
# html parameter contains the output of two valid
|
186
|
+
# queries, the :resultset will contain:
|
187
|
+
#
|
188
|
+
# [ # an array with all result sets
|
189
|
+
# [ # the resultset of the first query
|
190
|
+
# [val11, val12],
|
191
|
+
# [val21, val22],
|
192
|
+
# ...
|
193
|
+
# [valn1, valn2]
|
194
|
+
# ],
|
195
|
+
# [ # the result set of the second query
|
196
|
+
# [v11, v12, v13],
|
197
|
+
# [v21, v22, v23],
|
198
|
+
# ...
|
199
|
+
# [vn1, vn2, vn3]
|
200
|
+
# ]
|
201
|
+
# ]
|
202
|
+
# If errors are detected into the output they are extracted into the
|
203
|
+
# :errors attribute of the returining hash.
|
204
|
+
#
|
205
|
+
# @param html [String] the html to be parsed
|
206
|
+
# @return a hash with the parsed content
|
207
|
+
def self.query_result(html)
|
208
|
+
nbsp = Nokogiri::HTML(" ").text
|
209
|
+
hash = {:resultset => [], :errors => []}
|
210
|
+
doc = Nokogiri::HTML(html)
|
211
|
+
hash[:errors] = doc.xpath('/html/body/text()').map{ |n| n.text }.grep(/\nORA-[0-9]+/)
|
212
|
+
doc.xpath('//table').each do |table|
|
213
|
+
resultset = []
|
214
|
+
table.xpath('tr').each do |tr|
|
215
|
+
row = []
|
216
|
+
# replace nbsp with a plain blank in order to not confuse
|
217
|
+
# the ragel parser, in case it's used
|
218
|
+
tr.xpath('td').each { |td| row << td.text.strip.gsub(nbsp, " ") }
|
219
|
+
resultset << row unless row.empty?
|
220
|
+
end
|
221
|
+
hash[:resultset] << resultset
|
222
|
+
end
|
223
|
+
return hash
|
224
|
+
end
|
225
|
+
|
226
|
+
# Prepare the provided statement for sqlplus execution. The prepare phase consists in
|
227
|
+
# adding the right end separator according to the statement type.
|
228
|
+
#
|
229
|
+
# @param statement [String] the statement to be prepared
|
230
|
+
# @return [String] the statement with the proper end separator appended
|
231
|
+
def self.prepare_exec(statement)
|
232
|
+
stmt = Parser.remove_trailing_comments(statement)
|
233
|
+
type = Parser.statement_type(stmt)
|
234
|
+
if type == 'SQLPLUS'
|
235
|
+
# do nothing
|
236
|
+
elsif !type.nil?
|
237
|
+
# a plsql block. We need a trailing /
|
238
|
+
stmt = "#{stmt.strip}\n/\n" if stmt !~ /\n\s*\/\s*\z/
|
239
|
+
else
|
240
|
+
# normal statement. It should have a trailing ;
|
241
|
+
stmt = "#{stmt.strip};" if stmt !~ /;\s*\z/
|
242
|
+
end
|
243
|
+
return stmt
|
244
|
+
end
|
245
|
+
|
246
|
+
# Get the current statement for the provided position.
|
247
|
+
#
|
248
|
+
# @param script_content [String] the script within which the current statement must
|
249
|
+
# be detected
|
250
|
+
# @param position [int] the absolute position within the script content for which
|
251
|
+
# the current statement must be found out
|
252
|
+
# @param params [Hash] additional options. The following parameters may be
|
253
|
+
# provided:
|
254
|
+
#
|
255
|
+
# :plsql_blocks => whenever or not to consider PL/SQL blocks when the current
|
256
|
+
# statement is detected. By default is true.
|
257
|
+
# :sqlplus_commands => whenever or not to consider SQLPLUS commands when
|
258
|
+
# trying to detect the current statement
|
259
|
+
#
|
260
|
+
# @return [Hash] a hash with the following keys: :statement => the current statement
|
261
|
+
# which corresponds to the provided position, :range => the statement boundaries
|
262
|
+
# within the whole script
|
263
|
+
def self.current_statement(script_content, position=0, params = {})
|
264
|
+
opts = {
|
265
|
+
:plsql_blocks => true,
|
266
|
+
:sqlplus_commands => true
|
267
|
+
}.merge(params)
|
268
|
+
start_pos = 0
|
269
|
+
end_pos = 0
|
270
|
+
|
271
|
+
walker = PLSQLWalker.new(script_content)
|
272
|
+
|
273
|
+
walker.register_spot(Parser::SEMI_COLON_TERMINATOR) do |scanner|
|
274
|
+
type = Parser.statement_type(scanner.string[(start_pos..scanner.pos)])
|
275
|
+
if type
|
276
|
+
if opts[:plsql_blocks] && type != 'SQLPLUS'
|
277
|
+
#this is a plsql block, eat till the slash terminator
|
278
|
+
unless scanner.scan_until(Parser::SLASH_TERMINATOR)
|
279
|
+
#it's an invalid statement
|
280
|
+
scanner.terminate
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
if (start_pos..scanner.pos).include?(position)
|
285
|
+
# include the terminator
|
286
|
+
end_pos = scanner.pos
|
287
|
+
scanner.terminate
|
288
|
+
else
|
289
|
+
start_pos = scanner.pos
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
walker.register_spot(Parser::SLASH_TERMINATOR) do |scanner|
|
294
|
+
if (start_pos..scanner.pos).include?(position)
|
295
|
+
# include the terminator
|
296
|
+
end_pos = scanner.pos
|
297
|
+
scanner.terminate
|
298
|
+
else
|
299
|
+
start_pos = scanner.pos
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
if opts[:sqlplus_commands]
|
304
|
+
walker.register_spot(Parser::SQLPLUS_TERMINATOR) do |scanner|
|
305
|
+
type = Parser.statement_type(scanner.string[(start_pos..scanner.pos)])
|
306
|
+
if type && type == 'SQLPLUS' && (start_pos..scanner.pos).include?(position)
|
307
|
+
end_pos = scanner.pos - scanner.matched.length
|
308
|
+
scanner.terminate
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
walker.walk
|
314
|
+
{:statement => script_content[(start_pos...end_pos)], :range => (start_pos...end_pos)}
|
315
|
+
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'tree'
|
4
|
+
|
5
|
+
module Vorax
|
6
|
+
|
7
|
+
module Parser
|
8
|
+
|
9
|
+
class Region
|
10
|
+
|
11
|
+
attr_accessor :start_pos, :end_pos
|
12
|
+
attr_reader :name, :type
|
13
|
+
|
14
|
+
def initialize(name, type, start_pos = 0, end_pos = 0)
|
15
|
+
@name = name
|
16
|
+
@type = type
|
17
|
+
@start_pos = start_pos
|
18
|
+
@end_pos = end_pos
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_s
|
22
|
+
"#{@name}[#{@type}]: #{@start_pos}"
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
class PlsqlStructure
|
28
|
+
|
29
|
+
PLSQL_SPEC = /(?:\bpackage\b|\btype\b)/i unless defined?(PLSQL_SPEC)
|
30
|
+
SUBPROG = /(?:\bfunction\b|\bprocedure\b)/i unless defined?(SUBPROG)
|
31
|
+
BEGIN_MODULE = /(?:\bbegin\b)/i unless defined?(BEGIN_MODULE)
|
32
|
+
END_MODULE = /(?:\bend\b)/i unless defined?(END_MODULE)
|
33
|
+
FOR_STMT = /(?:\bfor\b)/i unless defined?(FOR_STMT)
|
34
|
+
LOOP_STMT = /(?:\bfor\b)/i unless defined?(LOOP_STMT)
|
35
|
+
IF_STMT = /(?:\bfor\b)/i unless defined?(LOOP_STMT)
|
36
|
+
|
37
|
+
attr_reader :text
|
38
|
+
|
39
|
+
def initialize(text)
|
40
|
+
@text = text
|
41
|
+
@root = Tree::TreeNode.new("root", nil)
|
42
|
+
@walker = PLSQLWalker.new(text)
|
43
|
+
@level = 0
|
44
|
+
@current_parent = @root
|
45
|
+
@begin_level = 0
|
46
|
+
register_spots()
|
47
|
+
@walker.walk
|
48
|
+
rescue Exception => e
|
49
|
+
# be prepare for any nasting parse error.
|
50
|
+
# Failing here is kind of usual, having in mind
|
51
|
+
# that we often parse incomplete code.
|
52
|
+
Vorax.debug(e.to_s)
|
53
|
+
end
|
54
|
+
|
55
|
+
def tree
|
56
|
+
@root
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def assign_parent(node)
|
62
|
+
@current_parent = node
|
63
|
+
end
|
64
|
+
|
65
|
+
def register_spots
|
66
|
+
register_plsql_spec_spot()
|
67
|
+
register_slash_terminator_spot()
|
68
|
+
register_subprog_spot()
|
69
|
+
register_begin_spot()
|
70
|
+
register_end_spot()
|
71
|
+
end
|
72
|
+
|
73
|
+
def register_plsql_spec_spot
|
74
|
+
@walker.register_spot(PLSQL_SPEC) do |scanner|
|
75
|
+
if @level == 0
|
76
|
+
meta_data = Parser.plsql_def("#{scanner.matched}#{scanner.rest}")
|
77
|
+
if meta_data[:type] == 'SPEC' || meta_data[:type] == 'BODY'
|
78
|
+
# is it a spec or a body?
|
79
|
+
region = Region.new(meta_data[:name], meta_data[:type], scanner.pos)
|
80
|
+
assign_parent(@current_parent << Tree::TreeNode.new(region.to_s, region))
|
81
|
+
@level += 1
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def register_slash_terminator_spot
|
88
|
+
@walker.register_spot(Parser::SLASH_TERMINATOR) do |scanner|
|
89
|
+
# this should apply to the last top level node
|
90
|
+
if @root.has_children?
|
91
|
+
if @root.children.last.content
|
92
|
+
@root.children.last.content.end_pos = scanner.pos
|
93
|
+
end
|
94
|
+
assign_parent(@root)
|
95
|
+
@level = 0
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def register_subprog_spot
|
101
|
+
@walker.register_spot(SUBPROG) do |scanner|
|
102
|
+
subprog_name = scanner.peek(32)[/(?:"[^"]+")|(?:[A-Z0-9$_#]+)/i]
|
103
|
+
if scanner.matched =~ /function/i
|
104
|
+
subprog_type = 'FUNCTION'
|
105
|
+
elsif scanner.matched =~ /procedure/i
|
106
|
+
subprog_type = 'PROCEDURE'
|
107
|
+
end
|
108
|
+
start_pos = scanner.pos - scanner.matched.length
|
109
|
+
region = Region.new(subprog_name, subprog_type, scanner.pos)
|
110
|
+
node = Tree::TreeNode.new(region.to_s, region)
|
111
|
+
@current_parent << node
|
112
|
+
if @current_parent && @current_parent.content
|
113
|
+
if @current_parent.content.type != 'SPEC'
|
114
|
+
@level += 1
|
115
|
+
assign_parent(node)
|
116
|
+
else
|
117
|
+
node.content.end_pos = node.content.start_pos
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def register_begin_spot
|
124
|
+
@walker.register_spot(BEGIN_MODULE) do |scanner|
|
125
|
+
@begin_level += 1
|
126
|
+
if @begin_level > 1
|
127
|
+
# start a new region
|
128
|
+
region = Region.new('anonymous', 'BLOCK', scanner.pos)
|
129
|
+
@level += 1
|
130
|
+
assign_parent(@current_parent << Tree::TreeNode.new(region.to_s, region))
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def register_end_spot
|
136
|
+
@walker.register_spot(END_MODULE) do |scanner|
|
137
|
+
# we have an "end" match. first of all check if it's not part
|
138
|
+
# of an conditional compiling "$end" definition
|
139
|
+
char_behind = scanner.string[scanner.pos - scanner.matched.length - 1, 1]
|
140
|
+
if char_behind != '$'
|
141
|
+
metadata = Parser.plsql_def("#{scanner.matched}#{scanner.rest}")
|
142
|
+
if metadata[:type] == 'END' && metadata[:end_def] > 0
|
143
|
+
@begin_level -= 1 if @begin_level > 0
|
144
|
+
@level -= 1 if @level > 0
|
145
|
+
if @current_parent.content
|
146
|
+
@current_parent.content.end_pos = (scanner.pos - 1) + (metadata[:end_def] - 1)
|
147
|
+
end
|
148
|
+
assign_parent(@current_parent.parent)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'strscan'
|
4
|
+
|
5
|
+
module Vorax
|
6
|
+
|
7
|
+
module Parser
|
8
|
+
|
9
|
+
# A PLSQL string scanner which is looking for interesting points within
|
10
|
+
# the provided source code. This is used instead of a fully fledged parser
|
11
|
+
# for speed considerations.
|
12
|
+
class PLSQLWalker
|
13
|
+
|
14
|
+
BEGIN_ML_COMMENT = /\/\*/ unless defined?(BEGIN_ML_COMMENT)
|
15
|
+
END_ML_COMMENT = /\*\// unless defined?(END_ML_COMMENT)
|
16
|
+
BEGIN_SL_COMMENT = /--/ unless defined?(BEGIN_SL_COMMENT)
|
17
|
+
END_SL_COMMENT = Parser::END_LINE unless defined?(END_SL_COMMENT)
|
18
|
+
BEGIN_PLSQL_SPECIAL_QUOTING = /q'[!\[{(<]/ unless defined?(BEGIN_PLSQL_SPECIAL_QUOTING)
|
19
|
+
BEGIN_DOUBLE_QUOTING = /[\"]/ unless defined?(BEGIN_DOUBLE_QUOTING)
|
20
|
+
BEGIN_SINGLE_QUOTING = /[']/ unless defined?(BEGIN_SINGLE_QUOTING)
|
21
|
+
|
22
|
+
# Create a new parse walker.
|
23
|
+
#
|
24
|
+
# @param text [String] the text to be walked/parsed
|
25
|
+
# @param create_default_spots [boolean] whenever or not to create default
|
26
|
+
# detection spots: multiline comments, singleline comments and quoted literals
|
27
|
+
def initialize(text, create_default_spots=true)
|
28
|
+
@text = text
|
29
|
+
@matchers = []
|
30
|
+
@ss = StringScanner.new(text)
|
31
|
+
create_default_spots() if create_default_spots
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns the string scanner used for walking the string.
|
35
|
+
#
|
36
|
+
# @return [StringScanner] the string scanner
|
37
|
+
def scanner
|
38
|
+
@ss
|
39
|
+
end
|
40
|
+
|
41
|
+
# Register a new detection spot. The order of specifying these spots is important.
|
42
|
+
#
|
43
|
+
# @param pattern [Regexp] the spot regular expression
|
44
|
+
# @param callback [Procedure] what to do when this spot is detected. The registered
|
45
|
+
# block is always called with the string scanner object. Please do not use "return"
|
46
|
+
# to exit from the defined block.
|
47
|
+
def register_spot(pattern, &callback)
|
48
|
+
@matchers << {:pattern => pattern, :callback => callback}
|
49
|
+
end
|
50
|
+
|
51
|
+
# Walk the text and trigger the registered callbacks. It returns the text which was
|
52
|
+
# successfully walked.
|
53
|
+
def walk
|
54
|
+
global_matcher = Regexp.new(@matchers.map { |e| e[:pattern].to_s }.join('|'),
|
55
|
+
Regexp::IGNORECASE)
|
56
|
+
while !@ss.eos?
|
57
|
+
if match = @ss.scan_until(global_matcher)
|
58
|
+
current_pos = @ss.pos
|
59
|
+
@matchers.each do |matcher|
|
60
|
+
if @ss.matched =~ matcher[:pattern]
|
61
|
+
matcher[:callback].call(@ss)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
else
|
65
|
+
@ss.terminate
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Register a spot to walk a multiline comment.
|
71
|
+
def register_default_ml_comment_spot
|
72
|
+
register_spot(BEGIN_ML_COMMENT) do |scanner|
|
73
|
+
scanner.scan_until(END_ML_COMMENT)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Register a spot to walk a single line comment.
|
78
|
+
def register_default_sl_comment_spot
|
79
|
+
register_spot(BEGIN_SL_COMMENT) do |scanner|
|
80
|
+
scanner.scan_until(END_SL_COMMENT)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Register a spot to walk a plsql special quoting liternal.
|
85
|
+
def register_default_plsql_quoting_spot
|
86
|
+
register_spot(BEGIN_PLSQL_SPECIAL_QUOTING) do |scanner|
|
87
|
+
scanner.scan_until(END_SL_COMMENT)
|
88
|
+
if scanner.matched =~ /q'\[/
|
89
|
+
scanner.scan_until(/\]'/)
|
90
|
+
elsif scanner.matched =~ /q'[{]/
|
91
|
+
scanner.scan_until(/[}]'/)
|
92
|
+
elsif scanner.matched =~ /q'[(]/
|
93
|
+
scanner.scan_until(/[)]'/)
|
94
|
+
elsif scanner.matched =~ /q'[<]/
|
95
|
+
scanner.scan_until(/[>]'/)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Register a spot to walk a double quoted literal.
|
101
|
+
def register_default_double_quoting_spot
|
102
|
+
register_spot(BEGIN_DOUBLE_QUOTING) do |scanner|
|
103
|
+
scanner.scan_until(/"/)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Register a spot to walk a single quoted literal.
|
108
|
+
def register_default_single_quoting_spot
|
109
|
+
register_spot(BEGIN_SINGLE_QUOTING) do |scanner|
|
110
|
+
collector = ''
|
111
|
+
begin
|
112
|
+
if match = scanner.scan_until(/\'+/)
|
113
|
+
collector << match
|
114
|
+
end
|
115
|
+
end while (scanner.matched != "'" && !scanner.eos?)
|
116
|
+
collector
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
def create_default_spots
|
123
|
+
# define a multiline comment spot
|
124
|
+
register_default_ml_comment_spot()
|
125
|
+
|
126
|
+
# define a single line comment spot
|
127
|
+
register_default_sl_comment_spot()
|
128
|
+
|
129
|
+
# define special PLSQL quotes spot
|
130
|
+
register_default_plsql_quoting_spot()
|
131
|
+
|
132
|
+
# register a double quoted string spot
|
133
|
+
register_default_double_quoting_spot()
|
134
|
+
|
135
|
+
# register a single quoted string spot
|
136
|
+
register_default_single_quoting_spot()
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|