vorax 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/.gitignore +7 -0
  2. data/.rspec +1 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +45 -0
  5. data/Rakefile +30 -0
  6. data/lib/vorax/base_funnel.rb +30 -0
  7. data/lib/vorax/output/html_convertor.rb +120 -0
  8. data/lib/vorax/output/html_funnel.rb +79 -0
  9. data/lib/vorax/output/pagezip_convertor.rb +20 -0
  10. data/lib/vorax/output/tablezip_convertor.rb +22 -0
  11. data/lib/vorax/output/vertical_convertor.rb +53 -0
  12. data/lib/vorax/output/zip_convertor.rb +117 -0
  13. data/lib/vorax/parser/argument.rb~ +125 -0
  14. data/lib/vorax/parser/body_split.rb +168 -0
  15. data/lib/vorax/parser/conn_string.rb +104 -0
  16. data/lib/vorax/parser/grammars/alias.rb +912 -0
  17. data/lib/vorax/parser/grammars/alias.rl +146 -0
  18. data/lib/vorax/parser/grammars/column.rb +454 -0
  19. data/lib/vorax/parser/grammars/column.rl +64 -0
  20. data/lib/vorax/parser/grammars/common.rl +98 -0
  21. data/lib/vorax/parser/grammars/package_spec.rb +1186 -0
  22. data/lib/vorax/parser/grammars/package_spec.rl +78 -0
  23. data/lib/vorax/parser/grammars/plsql_def.rb +469 -0
  24. data/lib/vorax/parser/grammars/plsql_def.rl +59 -0
  25. data/lib/vorax/parser/grammars/statement.rb +925 -0
  26. data/lib/vorax/parser/grammars/statement.rl +83 -0
  27. data/lib/vorax/parser/parser.rb +320 -0
  28. data/lib/vorax/parser/plsql_structure.rb +158 -0
  29. data/lib/vorax/parser/plsql_walker.rb +143 -0
  30. data/lib/vorax/parser/statement_inspector.rb~ +52 -0
  31. data/lib/vorax/parser/stmt_inspector.rb +78 -0
  32. data/lib/vorax/parser/target_ref.rb +110 -0
  33. data/lib/vorax/sqlplus.rb +281 -0
  34. data/lib/vorax/version.rb +7 -0
  35. data/lib/vorax/vorax_io.rb +70 -0
  36. data/lib/vorax.rb +60 -0
  37. data/spec/column_spec.rb +40 -0
  38. data/spec/conn_string_spec.rb +53 -0
  39. data/spec/package_spec_spec.rb +48 -0
  40. data/spec/pagezip_spec.rb +153 -0
  41. data/spec/parser_spec.rb +299 -0
  42. data/spec/plsql_structure_spec.rb +44 -0
  43. data/spec/spec_helper.rb +13 -0
  44. data/spec/sql/create_objects.sql +69 -0
  45. data/spec/sql/dbms_crypto.spc +339 -0
  46. data/spec/sql/dbms_crypto.~spc +339 -0
  47. data/spec/sql/dbms_stats.spc +4097 -0
  48. data/spec/sql/drop_user.sql +10 -0
  49. data/spec/sql/muci.spc +24 -0
  50. data/spec/sql/setup_user.sql +22 -0
  51. data/spec/sql/test.pkg +67 -0
  52. data/spec/sqlplus_spec.rb +52 -0
  53. data/spec/stmt_inspector_spec.rb +84 -0
  54. data/spec/tablezip_spec.rb +111 -0
  55. data/spec/vertical_spec.rb +150 -0
  56. data/vorax.gemspec +21 -0
  57. metadata +139 -0
@@ -0,0 +1,83 @@
1
+ %%{
2
+
3
+ machine statement;
4
+
5
+ action parse_start {
6
+ eof = pe
7
+ }
8
+
9
+ action parse_error {
10
+ }
11
+
12
+ action mark_as_anonymous {
13
+ stmt_type = 'ANONYMOUS'
14
+ }
15
+
16
+ action mark_as_sqlplus_command {
17
+ stmt_type = 'SQLPLUS'
18
+ }
19
+
20
+ action mark_as_sql {
21
+ stmt_type = nil
22
+ }
23
+
24
+ action mark_type {
25
+ tail = data[(0...p)]
26
+ type = tail[/\w+\Z/]
27
+ stmt_type = type.upcase if type
28
+ }
29
+
30
+ action mark_body {
31
+ stmt_type << ' BODY'
32
+ }
33
+
34
+ include common "common.rl";
35
+
36
+ # parsing rules baby
37
+ anonymous_block = ((K_BEGIN | K_DECLARE) ws+) @mark_as_anonymous;
38
+ simple_module = (K_TRIGGER | K_FUNCTION | K_PROCEDURE) %mark_type;
39
+ package_module = (K_PACKAGE %mark_type) (ws+ K_BODY %mark_body)?;
40
+ type_module = (K_TYPE %mark_type) (ws+ K_BODY %mark_body)?;
41
+ java_module = ((K_AND ws+ (K_RESOLVE | K_COMPILE) ws+ K_NOFORCE ws+) |
42
+ (K_AND ws+ (K_RESOLVE | K_COMPILE) ws+) |
43
+ (K_NOFORCE ws+))? (K_JAVA %mark_type);
44
+ plsql_module = K_CREATE ws+ (K_OR ws+ K_REPLACE ws+)?
45
+ (simple_module |
46
+ package_module |
47
+ type_module |
48
+ java_module) ws+;
49
+ set_transaction = (K_SET ws+ K_TRANSACTION ws+) @ mark_as_sql;
50
+ sqlplus_command = (((K_ACCEPT | K_ARCHIVE | K_ATTRIBUTE | K_BREAK | K_BTITLE | K_CLEAR | K_COLUMN |
51
+ K_COMPUTE | K_CONNECT | K_COPY | K_DEFINE | K_DESCRIBE | K_DISCONNECT | K_EXECUTE |
52
+ K_EXIT | K_HELP | K_HOST | K_PASSWORD | K_PAUSE | K_PRINT | K_PROMPT |
53
+ K_RECOVER | K_REMARK | K_REPFOOTER | K_REPHEADER | K_RUN | K_SAVE | K_SET | K_SHOW | K_SHUTDOWN |
54
+ K_SPOOL | K_START | K_STARTUP | K_STORE | K_TIMING | K_TITLE | K_UNDEFINE | K_VARIABLE | K_WHENEVER |
55
+ K_XQUERY) ws+) | ('@@' | '@' | '/' | '!')) @ mark_as_sqlplus_command;
56
+
57
+ main := ws* (anonymous_block | set_transaction | sqlplus_command | plsql_module) >parse_start $err(parse_error);
58
+
59
+ }%%
60
+
61
+ module Vorax
62
+
63
+ module Parser
64
+
65
+ # Gets the type of the provided statement.
66
+ #
67
+ # @param data [String] the statement
68
+ # @return [String] 'SQLPLUS' for an sqlplus statement, 'FUNCTION|PROCEDURE|PACKAGE|TYPE...' for
69
+ # a PL/SQL block, 'ANONYMOUS' for an anonymous plsql block
70
+ def self.statement_type(data)
71
+ stmt_type = nil
72
+ data << "\n"
73
+ %% write data;
74
+ %% write init;
75
+ %% write exec;
76
+ data.chop!
77
+ return stmt_type
78
+ end
79
+
80
+ end
81
+
82
+ end
83
+
@@ -0,0 +1,320 @@
1
+ # encoding: utf-8
2
+
3
+ module Vorax
4
+
5
+ # Provides parsing utilities.
6
+ module Parser
7
+
8
+ END_LINE = /\r\n?|\n/ unless defined?(END_LINE)
9
+ SQLPLUS_TERMINATOR = END_LINE unless defined?(SQLPLUS_TERMINATOR)
10
+ SEMI_COLON_TERMINATOR = /;/ unless defined?(SEMI_COLON_TERMINATOR)
11
+ SLASH_TERMINATOR = Regexp.new('(?:' + END_LINE.to_s + '\s*\/[ \t]*' + END_LINE.to_s + ')') unless defined?(SLASH_TERMINATOR)
12
+ PLSQL_SPEC = /(?:\bpackage\b|\btype\b)/i
13
+ SUBPROG = /(?:\bfunction\b|\bprocedure\b)/i
14
+ BEGIN_MODULE = /(?:\bbegin\b)/i
15
+ END_MODULE = /(?:\bend\b)/i
16
+
17
+ # Given an expression with parenthesis, it is walking it so that to
18
+ # keep track of the open/close paren, in a balanced way.
19
+ #
20
+ # @param text [String] the string to be walked
21
+ # @return [String] the paren expression
22
+ def self.walk_balanced_paren(text)
23
+ walker = PLSQLWalker.new(text)
24
+ level = 0
25
+ start_pos = 0
26
+ end_pos = 0
27
+ walker.register_spot(/[(]/) do |scanner|
28
+ start_pos = scanner.pos - 1 if level == 0
29
+ level += 1
30
+ end
31
+ walker.register_spot(/[)]/) do |scanner|
32
+ level -= 1
33
+ if level <= 0
34
+ end_pos = scanner.pos
35
+ scanner.terminate
36
+ end
37
+ end
38
+ walker.walk
39
+ text[start_pos, end_pos]
40
+ end
41
+
42
+ # Remove all comments from the provided statement. Pay attention that every
43
+ # comment is replaced by a blank in order to cover the case where a comment is
44
+ # used as a whitespace (e.g. select * from/*comment*/dual).
45
+ #
46
+ # @param statement [String] the statement to be cleaned up of comments
47
+ # @return [String] the statement without any comment
48
+ def self.remove_all_comments(statement)
49
+ comment_areas = []
50
+ result = statement
51
+ walker = PLSQLWalker.new(statement, false)
52
+
53
+ callback = lambda do |scanner, end_pattern|
54
+ start_pos = scanner.pos - scanner.matched.length
55
+ text = scanner.scan_until(end_pattern)
56
+ if text
57
+ comment_areas << (start_pos..scanner.pos - 1)
58
+ else
59
+ scanner.terminate
60
+ end
61
+ end
62
+
63
+ walker.register_spot(PLSQLWalker::BEGIN_ML_COMMENT) do |scanner|
64
+ callback.call(scanner, PLSQLWalker::END_ML_COMMENT)
65
+ end
66
+
67
+ walker.register_spot(PLSQLWalker::BEGIN_SL_COMMENT) do |scanner|
68
+ callback.call(scanner, PLSQLWalker::END_SL_COMMENT)
69
+ end
70
+
71
+ walker.register_default_plsql_quoting_spot()
72
+ walker.register_default_double_quoting_spot()
73
+ walker.register_default_single_quoting_spot()
74
+ walker.walk
75
+ offset = 0
76
+ comment_areas.each do |interval|
77
+ r = (interval.min - offset .. interval.max - offset)
78
+ result[r] = " "
79
+ offset += (interval.max - interval.min)
80
+ end
81
+ result
82
+ end
83
+
84
+ # Remove the trailing comments from the provided statement.
85
+ #
86
+ # @param statement [String] the statement to be cleaned up
87
+ # @return the statement without the trailing comments.
88
+ def self.remove_trailing_comments(statement)
89
+ stmt = statement
90
+ begin
91
+ stmt.gsub!(/(?:--[^\n]*\s*\z)|(?:\/\*.*?\*\/\s*\z)/m, '')
92
+ end while !$~.nil?
93
+ stmt
94
+ end
95
+
96
+ # Get the function/procedure to which the argument on the
97
+ # provided position belongs.
98
+ #
99
+ # @param statement [String] the statement to be parsed
100
+ # @param position [int] the position index where the
101
+ # argument should be given
102
+ def self.argument_belongs_to(statement, position = nil)
103
+ position = statement.length unless position
104
+ stmt = Parser.remove_all_comments(statement[(0...position)])
105
+ stmt.reverse!
106
+ level = 0
107
+ walker = PLSQLWalker.new(stmt, false)
108
+ arg_owner = ""
109
+
110
+ squote_fallback = lambda do |scanner|
111
+ scanner.skip_until(PLSQLWalker::BEGIN_SINGLE_QUOTING)
112
+ if scanner.matched == "'"
113
+ begin
114
+ scanner.skip_until(/\'+/)
115
+ end while (scanner.matched != "'" && !scanner.eos?)
116
+ end
117
+ end
118
+
119
+ extract_module = lambda do |scanner|
120
+ module_name = ""
121
+ while !scanner.eos?
122
+ # consume leading whitspaces
123
+ scanner.scan(/\s*/)
124
+ if scanner.check(/"/) == '"'
125
+ # we have a quoted identifier
126
+ module_name << scanner.scan(/"/)
127
+ module_name << scanner.scan_until(/"/)
128
+ else
129
+ # unquoted identifier
130
+ module_name << scanner.scan(/\S+/)
131
+ end
132
+ # consume trailing whitespaces
133
+ scanner.scan(/\s*/)
134
+
135
+ # might be a dblink
136
+ if scanner.check(/@/) == '@'
137
+ module_name << scanner.scan(/@/)
138
+ next
139
+ end
140
+
141
+ # might be package or a schema
142
+ if scanner.check(/\./) == '.'
143
+ module_name << scanner.scan(/\./)
144
+ next
145
+ end
146
+ scanner.terminate
147
+ end
148
+ module_name.reverse!
149
+ end
150
+
151
+ walker.register_spot(/'[\]})>]/) do |scanner|
152
+ # pay attention, it's reveresed
153
+ if scanner.matched =~ /\'\]/
154
+ squote_fallback.call(scanner) unless scanner.skip_until(/\[\'q/)
155
+ elsif scanner.matched =~ /\'[}]/
156
+ squote_fallback.call(scanner) unless scanner.skip_until(/[{]\'q/)
157
+ elsif scanner.matched =~ /\'[)]/
158
+ squote_fallback.call(scanner) unless scanner.skip_until(/[(]\'q/)
159
+ elsif scanner.matched =~ /\'[>]/
160
+ squote_fallback.call(scanner) unless scanner.skip_until(/[<]\'q/)
161
+ end
162
+ end
163
+
164
+ walker.register_spot(/[)]/) do |scanner|
165
+ level += 1
166
+ end
167
+
168
+ walker.register_spot(/[(]/) do |scanner|
169
+ if level == 0
170
+ arg_owner = extract_module.call(scanner)
171
+ else
172
+ level -= 1
173
+ scanner.terminate if level < 0 #give up, it's an invalid statement
174
+ end
175
+ end
176
+
177
+ walker.walk
178
+ return arg_owner
179
+ end
180
+
181
+ # Given the html output of a script, it extracts all tables into a nice
182
+ # ruby array. This method returns a hash with the following meaning:
183
+ # :resultset => an array with resultsets from all queries which
184
+ # generated the <html> output. For example, if the
185
+ # html parameter contains the output of two valid
186
+ # queries, the :resultset will contain:
187
+ #
188
+ # [ # an array with all result sets
189
+ # [ # the resultset of the first query
190
+ # [val11, val12],
191
+ # [val21, val22],
192
+ # ...
193
+ # [valn1, valn2]
194
+ # ],
195
+ # [ # the result set of the second query
196
+ # [v11, v12, v13],
197
+ # [v21, v22, v23],
198
+ # ...
199
+ # [vn1, vn2, vn3]
200
+ # ]
201
+ # ]
202
+ # If errors are detected into the output they are extracted into the
203
+ # :errors attribute of the returining hash.
204
+ #
205
+ # @param html [String] the html to be parsed
206
+ # @return a hash with the parsed content
207
+ def self.query_result(html)
208
+ nbsp = Nokogiri::HTML("&nbsp;").text
209
+ hash = {:resultset => [], :errors => []}
210
+ doc = Nokogiri::HTML(html)
211
+ hash[:errors] = doc.xpath('/html/body/text()').map{ |n| n.text }.grep(/\nORA-[0-9]+/)
212
+ doc.xpath('//table').each do |table|
213
+ resultset = []
214
+ table.xpath('tr').each do |tr|
215
+ row = []
216
+ # replace nbsp with a plain blank in order to not confuse
217
+ # the ragel parser, in case it's used
218
+ tr.xpath('td').each { |td| row << td.text.strip.gsub(nbsp, " ") }
219
+ resultset << row unless row.empty?
220
+ end
221
+ hash[:resultset] << resultset
222
+ end
223
+ return hash
224
+ end
225
+
226
+ # Prepare the provided statement for sqlplus execution. The prepare phase consists in
227
+ # adding the right end separator according to the statement type.
228
+ #
229
+ # @param statement [String] the statement to be prepared
230
+ # @return [String] the statement with the proper end separator appended
231
+ def self.prepare_exec(statement)
232
+ stmt = Parser.remove_trailing_comments(statement)
233
+ type = Parser.statement_type(stmt)
234
+ if type == 'SQLPLUS'
235
+ # do nothing
236
+ elsif !type.nil?
237
+ # a plsql block. We need a trailing /
238
+ stmt = "#{stmt.strip}\n/\n" if stmt !~ /\n\s*\/\s*\z/
239
+ else
240
+ # normal statement. It should have a trailing ;
241
+ stmt = "#{stmt.strip};" if stmt !~ /;\s*\z/
242
+ end
243
+ return stmt
244
+ end
245
+
246
+ # Get the current statement for the provided position.
247
+ #
248
+ # @param script_content [String] the script within which the current statement must
249
+ # be detected
250
+ # @param position [int] the absolute position within the script content for which
251
+ # the current statement must be found out
252
+ # @param params [Hash] additional options. The following parameters may be
253
+ # provided:
254
+ #
255
+ # :plsql_blocks => whenever or not to consider PL/SQL blocks when the current
256
+ # statement is detected. By default is true.
257
+ # :sqlplus_commands => whenever or not to consider SQLPLUS commands when
258
+ # trying to detect the current statement
259
+ #
260
+ # @return [Hash] a hash with the following keys: :statement => the current statement
261
+ # which corresponds to the provided position, :range => the statement boundaries
262
+ # within the whole script
263
+ def self.current_statement(script_content, position=0, params = {})
264
+ opts = {
265
+ :plsql_blocks => true,
266
+ :sqlplus_commands => true
267
+ }.merge(params)
268
+ start_pos = 0
269
+ end_pos = 0
270
+
271
+ walker = PLSQLWalker.new(script_content)
272
+
273
+ walker.register_spot(Parser::SEMI_COLON_TERMINATOR) do |scanner|
274
+ type = Parser.statement_type(scanner.string[(start_pos..scanner.pos)])
275
+ if type
276
+ if opts[:plsql_blocks] && type != 'SQLPLUS'
277
+ #this is a plsql block, eat till the slash terminator
278
+ unless scanner.scan_until(Parser::SLASH_TERMINATOR)
279
+ #it's an invalid statement
280
+ scanner.terminate
281
+ end
282
+ end
283
+ end
284
+ if (start_pos..scanner.pos).include?(position)
285
+ # include the terminator
286
+ end_pos = scanner.pos
287
+ scanner.terminate
288
+ else
289
+ start_pos = scanner.pos
290
+ end
291
+ end
292
+
293
+ walker.register_spot(Parser::SLASH_TERMINATOR) do |scanner|
294
+ if (start_pos..scanner.pos).include?(position)
295
+ # include the terminator
296
+ end_pos = scanner.pos
297
+ scanner.terminate
298
+ else
299
+ start_pos = scanner.pos
300
+ end
301
+ end
302
+
303
+ if opts[:sqlplus_commands]
304
+ walker.register_spot(Parser::SQLPLUS_TERMINATOR) do |scanner|
305
+ type = Parser.statement_type(scanner.string[(start_pos..scanner.pos)])
306
+ if type && type == 'SQLPLUS' && (start_pos..scanner.pos).include?(position)
307
+ end_pos = scanner.pos - scanner.matched.length
308
+ scanner.terminate
309
+ end
310
+ end
311
+ end
312
+
313
+ walker.walk
314
+ {:statement => script_content[(start_pos...end_pos)], :range => (start_pos...end_pos)}
315
+
316
+ end
317
+
318
+ end
319
+
320
+ end
@@ -0,0 +1,158 @@
1
+ # encoding: utf-8
2
+
3
+ require 'tree'
4
+
5
+ module Vorax
6
+
7
+ module Parser
8
+
9
+ class Region
10
+
11
+ attr_accessor :start_pos, :end_pos
12
+ attr_reader :name, :type
13
+
14
+ def initialize(name, type, start_pos = 0, end_pos = 0)
15
+ @name = name
16
+ @type = type
17
+ @start_pos = start_pos
18
+ @end_pos = end_pos
19
+ end
20
+
21
+ def to_s
22
+ "#{@name}[#{@type}]: #{@start_pos}"
23
+ end
24
+
25
+ end
26
+
27
+ class PlsqlStructure
28
+
29
+ PLSQL_SPEC = /(?:\bpackage\b|\btype\b)/i unless defined?(PLSQL_SPEC)
30
+ SUBPROG = /(?:\bfunction\b|\bprocedure\b)/i unless defined?(SUBPROG)
31
+ BEGIN_MODULE = /(?:\bbegin\b)/i unless defined?(BEGIN_MODULE)
32
+ END_MODULE = /(?:\bend\b)/i unless defined?(END_MODULE)
33
+ FOR_STMT = /(?:\bfor\b)/i unless defined?(FOR_STMT)
34
+ LOOP_STMT = /(?:\bfor\b)/i unless defined?(LOOP_STMT)
35
+ IF_STMT = /(?:\bfor\b)/i unless defined?(LOOP_STMT)
36
+
37
+ attr_reader :text
38
+
39
+ def initialize(text)
40
+ @text = text
41
+ @root = Tree::TreeNode.new("root", nil)
42
+ @walker = PLSQLWalker.new(text)
43
+ @level = 0
44
+ @current_parent = @root
45
+ @begin_level = 0
46
+ register_spots()
47
+ @walker.walk
48
+ rescue Exception => e
49
+ # be prepare for any nasting parse error.
50
+ # Failing here is kind of usual, having in mind
51
+ # that we often parse incomplete code.
52
+ Vorax.debug(e.to_s)
53
+ end
54
+
55
+ def tree
56
+ @root
57
+ end
58
+
59
+ private
60
+
61
+ def assign_parent(node)
62
+ @current_parent = node
63
+ end
64
+
65
+ def register_spots
66
+ register_plsql_spec_spot()
67
+ register_slash_terminator_spot()
68
+ register_subprog_spot()
69
+ register_begin_spot()
70
+ register_end_spot()
71
+ end
72
+
73
+ def register_plsql_spec_spot
74
+ @walker.register_spot(PLSQL_SPEC) do |scanner|
75
+ if @level == 0
76
+ meta_data = Parser.plsql_def("#{scanner.matched}#{scanner.rest}")
77
+ if meta_data[:type] == 'SPEC' || meta_data[:type] == 'BODY'
78
+ # is it a spec or a body?
79
+ region = Region.new(meta_data[:name], meta_data[:type], scanner.pos)
80
+ assign_parent(@current_parent << Tree::TreeNode.new(region.to_s, region))
81
+ @level += 1
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ def register_slash_terminator_spot
88
+ @walker.register_spot(Parser::SLASH_TERMINATOR) do |scanner|
89
+ # this should apply to the last top level node
90
+ if @root.has_children?
91
+ if @root.children.last.content
92
+ @root.children.last.content.end_pos = scanner.pos
93
+ end
94
+ assign_parent(@root)
95
+ @level = 0
96
+ end
97
+ end
98
+ end
99
+
100
+ def register_subprog_spot
101
+ @walker.register_spot(SUBPROG) do |scanner|
102
+ subprog_name = scanner.peek(32)[/(?:"[^"]+")|(?:[A-Z0-9$_#]+)/i]
103
+ if scanner.matched =~ /function/i
104
+ subprog_type = 'FUNCTION'
105
+ elsif scanner.matched =~ /procedure/i
106
+ subprog_type = 'PROCEDURE'
107
+ end
108
+ start_pos = scanner.pos - scanner.matched.length
109
+ region = Region.new(subprog_name, subprog_type, scanner.pos)
110
+ node = Tree::TreeNode.new(region.to_s, region)
111
+ @current_parent << node
112
+ if @current_parent && @current_parent.content
113
+ if @current_parent.content.type != 'SPEC'
114
+ @level += 1
115
+ assign_parent(node)
116
+ else
117
+ node.content.end_pos = node.content.start_pos
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ def register_begin_spot
124
+ @walker.register_spot(BEGIN_MODULE) do |scanner|
125
+ @begin_level += 1
126
+ if @begin_level > 1
127
+ # start a new region
128
+ region = Region.new('anonymous', 'BLOCK', scanner.pos)
129
+ @level += 1
130
+ assign_parent(@current_parent << Tree::TreeNode.new(region.to_s, region))
131
+ end
132
+ end
133
+ end
134
+
135
+ def register_end_spot
136
+ @walker.register_spot(END_MODULE) do |scanner|
137
+ # we have an "end" match. first of all check if it's not part
138
+ # of an conditional compiling "$end" definition
139
+ char_behind = scanner.string[scanner.pos - scanner.matched.length - 1, 1]
140
+ if char_behind != '$'
141
+ metadata = Parser.plsql_def("#{scanner.matched}#{scanner.rest}")
142
+ if metadata[:type] == 'END' && metadata[:end_def] > 0
143
+ @begin_level -= 1 if @begin_level > 0
144
+ @level -= 1 if @level > 0
145
+ if @current_parent.content
146
+ @current_parent.content.end_pos = (scanner.pos - 1) + (metadata[:end_def] - 1)
147
+ end
148
+ assign_parent(@current_parent.parent)
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ end
155
+
156
+ end
157
+
158
+ end
@@ -0,0 +1,143 @@
1
+ # encoding: utf-8
2
+
3
+ require 'strscan'
4
+
5
+ module Vorax
6
+
7
+ module Parser
8
+
9
+ # A PLSQL string scanner which is looking for interesting points within
10
+ # the provided source code. This is used instead of a fully fledged parser
11
+ # for speed considerations.
12
+ class PLSQLWalker
13
+
14
+ BEGIN_ML_COMMENT = /\/\*/ unless defined?(BEGIN_ML_COMMENT)
15
+ END_ML_COMMENT = /\*\// unless defined?(END_ML_COMMENT)
16
+ BEGIN_SL_COMMENT = /--/ unless defined?(BEGIN_SL_COMMENT)
17
+ END_SL_COMMENT = Parser::END_LINE unless defined?(END_SL_COMMENT)
18
+ BEGIN_PLSQL_SPECIAL_QUOTING = /q'[!\[{(<]/ unless defined?(BEGIN_PLSQL_SPECIAL_QUOTING)
19
+ BEGIN_DOUBLE_QUOTING = /[\"]/ unless defined?(BEGIN_DOUBLE_QUOTING)
20
+ BEGIN_SINGLE_QUOTING = /[']/ unless defined?(BEGIN_SINGLE_QUOTING)
21
+
22
+ # Create a new parse walker.
23
+ #
24
+ # @param text [String] the text to be walked/parsed
25
+ # @param create_default_spots [boolean] whenever or not to create default
26
+ # detection spots: multiline comments, singleline comments and quoted literals
27
+ def initialize(text, create_default_spots=true)
28
+ @text = text
29
+ @matchers = []
30
+ @ss = StringScanner.new(text)
31
+ create_default_spots() if create_default_spots
32
+ end
33
+
34
+ # Returns the string scanner used for walking the string.
35
+ #
36
+ # @return [StringScanner] the string scanner
37
+ def scanner
38
+ @ss
39
+ end
40
+
41
+ # Register a new detection spot. The order of specifying these spots is important.
42
+ #
43
+ # @param pattern [Regexp] the spot regular expression
44
+ # @param callback [Procedure] what to do when this spot is detected. The registered
45
+ # block is always called with the string scanner object. Please do not use "return"
46
+ # to exit from the defined block.
47
+ def register_spot(pattern, &callback)
48
+ @matchers << {:pattern => pattern, :callback => callback}
49
+ end
50
+
51
+ # Walk the text and trigger the registered callbacks. It returns the text which was
52
+ # successfully walked.
53
+ def walk
54
+ global_matcher = Regexp.new(@matchers.map { |e| e[:pattern].to_s }.join('|'),
55
+ Regexp::IGNORECASE)
56
+ while !@ss.eos?
57
+ if match = @ss.scan_until(global_matcher)
58
+ current_pos = @ss.pos
59
+ @matchers.each do |matcher|
60
+ if @ss.matched =~ matcher[:pattern]
61
+ matcher[:callback].call(@ss)
62
+ end
63
+ end
64
+ else
65
+ @ss.terminate
66
+ end
67
+ end
68
+ end
69
+
70
+ # Register a spot to walk a multiline comment.
71
+ def register_default_ml_comment_spot
72
+ register_spot(BEGIN_ML_COMMENT) do |scanner|
73
+ scanner.scan_until(END_ML_COMMENT)
74
+ end
75
+ end
76
+
77
+ # Register a spot to walk a single line comment.
78
+ def register_default_sl_comment_spot
79
+ register_spot(BEGIN_SL_COMMENT) do |scanner|
80
+ scanner.scan_until(END_SL_COMMENT)
81
+ end
82
+ end
83
+
84
+ # Register a spot to walk a plsql special quoting liternal.
85
+ def register_default_plsql_quoting_spot
86
+ register_spot(BEGIN_PLSQL_SPECIAL_QUOTING) do |scanner|
87
+ scanner.scan_until(END_SL_COMMENT)
88
+ if scanner.matched =~ /q'\[/
89
+ scanner.scan_until(/\]'/)
90
+ elsif scanner.matched =~ /q'[{]/
91
+ scanner.scan_until(/[}]'/)
92
+ elsif scanner.matched =~ /q'[(]/
93
+ scanner.scan_until(/[)]'/)
94
+ elsif scanner.matched =~ /q'[<]/
95
+ scanner.scan_until(/[>]'/)
96
+ end
97
+ end
98
+ end
99
+
100
+ # Register a spot to walk a double quoted literal.
101
+ def register_default_double_quoting_spot
102
+ register_spot(BEGIN_DOUBLE_QUOTING) do |scanner|
103
+ scanner.scan_until(/"/)
104
+ end
105
+ end
106
+
107
+ # Register a spot to walk a single quoted literal.
108
+ def register_default_single_quoting_spot
109
+ register_spot(BEGIN_SINGLE_QUOTING) do |scanner|
110
+ collector = ''
111
+ begin
112
+ if match = scanner.scan_until(/\'+/)
113
+ collector << match
114
+ end
115
+ end while (scanner.matched != "'" && !scanner.eos?)
116
+ collector
117
+ end
118
+ end
119
+
120
+ private
121
+
122
+ def create_default_spots
123
+ # define a multiline comment spot
124
+ register_default_ml_comment_spot()
125
+
126
+ # define a single line comment spot
127
+ register_default_sl_comment_spot()
128
+
129
+ # define special PLSQL quotes spot
130
+ register_default_plsql_quoting_spot()
131
+
132
+ # register a double quoted string spot
133
+ register_default_double_quoting_spot()
134
+
135
+ # register a single quoted string spot
136
+ register_default_single_quoting_spot()
137
+ end
138
+
139
+ end
140
+
141
+ end
142
+
143
+ end