y2r 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,709 @@
1
+ # encoding: utf-8
2
+
3
+ require "cheetah"
4
+ require "nokogiri"
5
+ require "tempfile"
6
+
7
+ module Y2R
8
+ class Parser
9
+
10
+ # The lists of elements skipped during parsing and comment processing need
11
+ # to differ. Currently there are two reasons:
12
+ #
13
+ # * When parsing, we want to skip <yconst>, because it's just a useless
14
+ # wrapper. But when processing comments we don't want to skip it,
15
+ # because this is the element to which comments are attached to for
16
+ # various literals.
17
+ #
18
+ # * When parsing, we don't want to skip <element>, because we need to
19
+ # handle it specially in case it's inside <map>. But when processing
20
+ # comments it's just a useless wrapper.
21
+
22
+ SKIPPED_ELEMENTS_PARSING = [
23
+ "arg",
24
+ "cond",
25
+ "else",
26
+ "expr",
27
+ "false",
28
+ "key",
29
+ "lhs",
30
+ "rhs",
31
+ "stmt",
32
+ "then",
33
+ "true",
34
+ "until",
35
+ "value",
36
+ "yconst",
37
+ "ycp"
38
+ ]
39
+
40
+ SKIPPED_ELEMENTS_COMMENTS = [
41
+ "arg",
42
+ "cond",
43
+ "element",
44
+ "else",
45
+ "expr",
46
+ "false",
47
+ "key",
48
+ "lhs",
49
+ "rhs",
50
+ "stmt",
51
+ "then",
52
+ "true",
53
+ "until",
54
+ "value",
55
+ "ycp"
56
+ ]
57
+
58
+ class SyntaxError < StandardError
59
+ end
60
+
61
+ def initialize(options = {})
62
+ @options = options
63
+ end
64
+
65
+ def parse(input)
66
+ xml = ycp_to_xml(input)
67
+
68
+ if !@options[:xml]
69
+ xml_to_ast(xml)
70
+ else
71
+ xml
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def ycp_to_xml(ycp)
78
+ module_paths = @options[:module_paths] || []
79
+ include_paths = @options[:include_paths] || []
80
+
81
+ ycp_file = Tempfile.new("y2r")
82
+ begin
83
+ begin
84
+ ycp_file.write(ycp)
85
+ ensure
86
+ ycp_file.close
87
+ end
88
+
89
+ xml_file = Tempfile.new("y2r")
90
+ xml_file.close
91
+ begin
92
+ begin
93
+ ENV["Y2PARSECOMMENTS"] = "1"
94
+
95
+ cmd = [
96
+ "ycpc",
97
+ "--no-std-modules",
98
+ "--no-std-includes",
99
+ "-c",
100
+ "-x",
101
+ "-o", xml_file.path
102
+ ]
103
+
104
+ module_paths.each do |module_path|
105
+ cmd << '--module-path' << module_path
106
+ end
107
+
108
+ include_paths.each do |include_path|
109
+ cmd << '--include-path' << include_path
110
+ end
111
+
112
+ cmd << ycp_file.path
113
+
114
+ Cheetah.run(cmd)
115
+ rescue Cheetah::ExecutionFailed => e
116
+ raise SyntaxError.new(e.stderr)
117
+ end
118
+
119
+ File.read(xml_file.path)
120
+ ensure
121
+ xml_file.unlink
122
+ end
123
+ ensure
124
+ ycp_file.unlink
125
+ end
126
+ end
127
+
128
+ def xml_to_ast(xml)
129
+ root = Nokogiri::XML(xml).root
130
+
131
+ # Comment processing in ycpc is rough and comments often get attached to
132
+ # wrong nodes. This is a deliberate decision because it is easier to fix
133
+ # comments here than to do the right thing in ycpc.
134
+ fix_comments(root, nil)
135
+
136
+ ast = element_to_node(root, nil)
137
+ ast.filename = if @options[:reported_file]
138
+ @options[:reported_file]
139
+ else
140
+ @options[:filename] || "default.ycp"
141
+ end
142
+ ast
143
+ end
144
+
145
+ def fix_comments(element, last_element)
146
+ # We don't want to attach any comments to these.
147
+ if SKIPPED_ELEMENTS_COMMENTS.include?(element.name)
148
+ fix_comments(element.elements[0], last_element)
149
+ return
150
+ end
151
+
152
+ # In general, ycpc collects comments and they end up as |comment_before|
153
+ # at the next AST node that is created. In reality, parts of the comments
154
+ # may belong to the previous node (passed as |last_element|).
155
+ comment_before = element["comment_before"]
156
+ if last_element && comment_before
157
+ if comment_before =~ /\n/
158
+ after_part, before_part = comment_before.split("\n", 2)
159
+ else
160
+ after_part, before_part = comment_before, ""
161
+ end
162
+
163
+ if !after_part.empty?
164
+ if last_element["comment_after"]
165
+ last_element["comment_after"] = after_part + last_element["comment_after"]
166
+ else
167
+ last_element["comment_after"] = after_part
168
+ end
169
+ end
170
+
171
+ if !before_part.empty?
172
+ element["comment_before"] = before_part
173
+ else
174
+ element.attributes["comment_before"].remove
175
+ end
176
+ end
177
+
178
+ # Recurse into children.
179
+ last_element = element
180
+ element.elements.each do |child|
181
+ fix_comments(child, last_element)
182
+ last_element = child
183
+ end
184
+ end
185
+
186
+ def element_to_node(element, context)
187
+ node = case element.name
188
+ when *SKIPPED_ELEMENTS_PARSING
189
+ element_to_node(element.elements[0], context)
190
+
191
+ when "assign"
192
+ AST::YCP::Assign.new(
193
+ :ns => element["ns"],
194
+ :name => element["name"],
195
+ :child => element_to_node(element.elements[0], context)
196
+ )
197
+
198
+ when "block"
199
+ all_statements = extract_collection(element, "statements", context)
200
+
201
+ extracted_statements = if toplevel_block?(element) && @options[:extracted_file]
202
+ extract_file_statements(all_statements, @options[:extracted_file])
203
+ else
204
+ all_statements
205
+ end
206
+
207
+ statements = if toplevel_block?(element)
208
+ skip_include_statements(extracted_statements)
209
+ else
210
+ extracted_statements
211
+ end
212
+
213
+ file_block_class = if @options[:as_include_file]
214
+ AST::YCP::IncludeBlock
215
+ else
216
+ AST::YCP::FileBlock
217
+ end
218
+
219
+ module_block_class = if @options[:as_include_file]
220
+ AST::YCP::IncludeBlock
221
+ else
222
+ AST::YCP::ModuleBlock
223
+ end
224
+
225
+ {
226
+ :def => AST::YCP::DefBlock,
227
+ :file => file_block_class,
228
+ :module => module_block_class,
229
+ :stmt => AST::YCP::StmtBlock,
230
+ :unspec => AST::YCP::UnspecBlock
231
+ }[element["kind"].to_sym].new(
232
+ :name => element["name"],
233
+ :symbols => extract_symbols(element, context),
234
+ :statements => statements
235
+ )
236
+
237
+ when "bracket"
238
+ lhs = element.at_xpath("./lhs")
239
+
240
+ AST::YCP::Bracket.new(
241
+ :entry => element_to_node(lhs.at_xpath("./entry"), context),
242
+ :arg => element_to_node(lhs.at_xpath("./arg"), context),
243
+ :rhs => element_to_node(element.at_xpath("./rhs"), context)
244
+ )
245
+
246
+ when "break"
247
+ AST::YCP::Break.new
248
+
249
+ when "builtin"
250
+ symbol_attrs = element.attributes.select { |n, v| n =~ /^sym\d+$/ }
251
+ symbol_values = symbol_attrs.values.map(&:value)
252
+ children = extract_children(element, :builtin)
253
+
254
+ if symbol_values.empty?
255
+ args = children
256
+ block = nil
257
+ else
258
+ args = children[0..-2]
259
+ block = children.last
260
+
261
+ block.args = symbol_values.map do |value|
262
+ value =~ /^((\S+\s+)*)(\S+)/
263
+
264
+ AST::YCP::Symbol.new(
265
+ :global => false,
266
+ :category => :variable,
267
+ :type => AST::YCP::Type.new($1),
268
+ :name => $3
269
+ )
270
+ end
271
+ block.symbols = block.args + block.symbols
272
+ end
273
+
274
+ AST::YCP::Builtin.new(
275
+ :ns => element["ns"],
276
+ :name => element["name"],
277
+ :args => args,
278
+ :block => block
279
+ )
280
+
281
+ when "call"
282
+ AST::YCP::Call.new(
283
+ :ns => element["ns"],
284
+ :name => element["name"],
285
+ :category => element["category"].to_sym,
286
+ :result => element["result"] == "unused" ? :unused : :used,
287
+ :args => extract_collection(element, "args", context),
288
+ :type => AST::YCP::Type.new(element["type"])
289
+ )
290
+
291
+ when "case"
292
+ value_elements = element.elements.select { |e| e.name == "value" }
293
+
294
+ AST::YCP::Case.new(
295
+ :values => value_elements.map { |e| element_to_node(e, context) },
296
+ :body => build_body(extract_collection(element, "body", context))
297
+ )
298
+
299
+ when "compare"
300
+ AST::YCP::Compare.new(
301
+ :op => element["op"],
302
+ :lhs => element_to_node(element.at_xpath("./lhs"), context),
303
+ :rhs => element_to_node(element.at_xpath("./rhs"), context)
304
+ )
305
+
306
+ when "const"
307
+ # For some weird reason, some terms (e.g. those placed in lists) are
308
+ # represented as <const type="term" ...>, while others are represented
309
+ # as <yeterm ...>. We unify this mess here so that it doesn't
310
+ # propagate into the AST.
311
+ if element["type"] != "term"
312
+ AST::YCP::Const.new(
313
+ :type => element["type"].to_sym,
314
+ :value => element["value"]
315
+ )
316
+ else
317
+ AST::YCP::YETerm.new(
318
+ :name => element["name"],
319
+ :children => extract_collection(element, "list", :yeterm)
320
+ )
321
+ end
322
+
323
+ when "continue"
324
+ AST::YCP::Continue.new
325
+
326
+ when "default"
327
+ AST::YCP::Default.new(
328
+ :body => build_body(extract_children(element, context))
329
+ )
330
+
331
+ when "do"
332
+ # For some reason, blocks in |do| statements are of kind "unspec" but
333
+ # they really should be "stmt". Thus we need to construct the
334
+ # |StmtBlock| instance ourself.
335
+
336
+ block_element = element.at_xpath("./block")
337
+
338
+ AST::YCP::Do.new(
339
+ :do => if block_element
340
+ AST::YCP::StmtBlock.new(
341
+ :name => nil,
342
+ :symbols => extract_symbols(block_element, context),
343
+ :statements => extract_collection(block_element, "statements", context)
344
+ )
345
+ else
346
+ nil
347
+ end,
348
+ :while => element_to_node(element.at_xpath("./while/*"), context)
349
+ )
350
+
351
+ when "element"
352
+ if context != :map
353
+ element_to_node(element.elements[0], context)
354
+ else
355
+ AST::YCP::MapElement.new(
356
+ :key => element_to_node(element.at_xpath("./key"), context),
357
+ :value => element_to_node(element.at_xpath("./value"), context)
358
+ )
359
+ end
360
+
361
+ when "entry"
362
+ AST::YCP::Entry.new(
363
+ :ns => element["ns"],
364
+ :name => element["name"]
365
+ )
366
+
367
+ when "filename"
368
+ AST::YCP::Filename.new
369
+
370
+ when "fun_def"
371
+ args = if element.at_xpath("./declaration")
372
+ extract_collection(
373
+ element.at_xpath("./declaration/block"),
374
+ "symbols",
375
+ context
376
+ )
377
+ else
378
+ []
379
+ end
380
+ block = element_to_node(element.at_xpath("./block"), context)
381
+
382
+ # This will make the code consider arguments as local variables.
383
+ # Which is exactly what we want e.g. for alias detection.
384
+ #
385
+ # Note we make sure not to add arguments that would create duplicate
386
+ # entries in the symbol table. These can arise e.g. if a variable with
387
+ # the same name as an argument is defined inside the function (yes,
388
+ # that's possible to do in YCP).
389
+ unique_args = args.reject do |arg|
390
+ block.symbols.find { |s| s.name == arg.name }
391
+ end
392
+ block.symbols = unique_args + block.symbols
393
+
394
+ AST::YCP::FunDef.new(
395
+ :name => element["name"],
396
+ :args => args,
397
+ :block => block
398
+ )
399
+
400
+ when "if"
401
+ AST::YCP::If.new(
402
+ :cond => element_to_node(element.elements[0], context),
403
+ :then => if element.at_xpath("./then")
404
+ element_to_node(element.at_xpath("./then"), context)
405
+ else
406
+ nil
407
+ end,
408
+ :else => if element.at_xpath("./else")
409
+ element_to_node(element.at_xpath("./else"), context)
410
+ else
411
+ nil
412
+ end
413
+ )
414
+
415
+ when "import"
416
+ AST::YCP::Import.new(:name => element["name"])
417
+
418
+ when "include"
419
+ AST::YCP::Include.new(
420
+ :name => element["name"],
421
+ :skipped => element["skipped"] == "1"
422
+ )
423
+
424
+ when "list"
425
+ AST::YCP::List.new(
426
+ :children => extract_children(element, :list)
427
+ )
428
+
429
+ when "locale"
430
+ AST::YCP::Locale.new(:text => element["text"])
431
+
432
+ when "map"
433
+ AST::YCP::Map.new(
434
+ :children => extract_children(element, :map)
435
+ )
436
+
437
+ when "repeat"
438
+ # For some reason, blocks in |repeat| statements are of kind "unspec"
439
+ # but they really should be "stmt". Thus we need to construct the
440
+ # |StmtBlock| instance ourself.
441
+
442
+ block_element = element.at_xpath("./do/block")
443
+
444
+ AST::YCP::Repeat.new(
445
+ :do => if block_element
446
+ AST::YCP::StmtBlock.new(
447
+ :name => nil,
448
+ :symbols => extract_symbols(block_element, context),
449
+ :statements => extract_collection(block_element, "statements", context)
450
+ )
451
+ else
452
+ nil
453
+ end,
454
+ :until => element_to_node(element.at_xpath("./until"), context)
455
+ )
456
+
457
+ when "return"
458
+ AST::YCP::Return.new(
459
+ :child => if element.elements[0]
460
+ element_to_node(element.elements[0], context)
461
+ else
462
+ nil
463
+ end
464
+ )
465
+
466
+ when "switch"
467
+ case_elements = element.elements.select { |e| e.name == "case" }
468
+
469
+ AST::YCP::Switch.new(
470
+ :cond => element_to_node(element.at_xpath("./cond"), context),
471
+ :cases => case_elements.map { |e| element_to_node(e, context) },
472
+ :default => if element.at_xpath("./default")
473
+ element_to_node(element.at_xpath("./default"), context)
474
+ else
475
+ nil
476
+ end
477
+ )
478
+
479
+ when "symbol"
480
+ category = element["category"].to_sym
481
+
482
+ AST::YCP::Symbol.new(
483
+ :global => element["global"] == "1",
484
+ :category => category,
485
+ :type => AST::YCP::Type.new(element["type"]),
486
+ # We don't save names for files mainly because of the specs. They
487
+ # use temporary files with unpredictable names and node equality
488
+ # tests would fail because of that.
489
+ :name => if category != :filename
490
+ element["name"]
491
+ else
492
+ nil
493
+ end
494
+ )
495
+
496
+ when "textdomain"
497
+ AST::YCP::Textdomain.new(:name => element["name"])
498
+
499
+ when "typedef"
500
+ AST::YCP::Typedef.new
501
+
502
+ when "variable"
503
+ AST::YCP::Variable.new(
504
+ :ns => element["ns"],
505
+ :name => element["name"],
506
+ :category => element["category"].to_sym,
507
+ :type => AST::YCP::Type.new(element["type"])
508
+ )
509
+
510
+ when "while"
511
+ AST::YCP::While.new(
512
+ :cond => element_to_node(element.at_xpath("./cond"), context),
513
+ :do => if element.at_xpath("./do/*")
514
+ element_to_node(element.at_xpath("./do/*"), context)
515
+ else
516
+ nil
517
+ end
518
+ )
519
+
520
+ when "ycpcode"
521
+ AST::YCP::YCPCode.new(
522
+ :args => [],
523
+ :symbols => [],
524
+ :child => element_to_node(element.elements[0], context)
525
+ )
526
+
527
+ when "yebinary"
528
+ AST::YCP::YEBinary.new(
529
+ :name => element["name"],
530
+ :lhs => element_to_node(element.elements[0], context),
531
+ :rhs => element_to_node(element.elements[1], context)
532
+ )
533
+
534
+ when "yebracket"
535
+ AST::YCP::YEBracket.new(
536
+ :value => element_to_node(element.elements[0], context),
537
+ :index => element_to_node(element.elements[1], context),
538
+ :default => element_to_node(element.elements[2], context)
539
+ )
540
+
541
+ when "yeis"
542
+ AST::YCP::YEIs.new(
543
+ :type => AST::YCP::Type.new(element["type"]),
544
+ :child => element_to_node(element.elements[0], context)
545
+ )
546
+
547
+ when "yepropagate"
548
+ AST::YCP::YEPropagate.new(
549
+ :from => AST::YCP::Type.new(element["from"]),
550
+ :to => AST::YCP::Type.new(element["to"]),
551
+ :child => element_to_node(element.elements[0], context)
552
+ )
553
+
554
+ when "yereference"
555
+ AST::YCP::YEReference.new(
556
+ :child => element_to_node(element.elements[0], context)
557
+ )
558
+
559
+ when "yereturn"
560
+ child = element_to_node(element.elements[0], context)
561
+
562
+ if child.is_a?(AST::YCP::UnspecBlock) # ``{ ... }
563
+ child
564
+ else # ``( ... )
565
+ AST::YCP::YEReturn.new(
566
+ :args => [],
567
+ :symbols => [],
568
+ :child => element_to_node(element.elements[0], context)
569
+ )
570
+ end
571
+
572
+ when "yeterm"
573
+ AST::YCP::YETerm.new(
574
+ :name => element["name"],
575
+ :children => extract_children(element, :yeterm)
576
+ )
577
+
578
+ when "yetriple"
579
+ AST::YCP::YETriple.new(
580
+ :cond => element_to_node(element.at_xpath("./cond"), context),
581
+ :true => element_to_node(element.at_xpath("./true"), context),
582
+ :false => element_to_node(element.at_xpath("./false"), context)
583
+ )
584
+
585
+ when "yeunary"
586
+ AST::YCP::YEUnary.new(
587
+ :name => element["name"],
588
+ :child => element_to_node(element.elements[0], context)
589
+ )
590
+
591
+ else
592
+ raise "Invalid element: <#{element.name}>."
593
+ end
594
+
595
+ transfer_comments(node, element)
596
+
597
+ node
598
+ end
599
+
600
+ def extract_children(element, context)
601
+ element.elements.map { |e| element_to_node(e, context) }
602
+ end
603
+
604
+ def extract_collection(element, name, context)
605
+ child = element.at_xpath("./#{name}")
606
+ child ? extract_children(child, context) : []
607
+ end
608
+
609
+ def extract_symbols(element, context)
610
+ # We only want symbols of relevant categories in the AST. This simplifies
611
+ # the code as it does not need to filter out the irrelevant ones.
612
+ categories = [:variable, :reference, :function]
613
+
614
+ all_symbols = extract_collection(element, "symbols", context)
615
+ all_symbols.select { |s| categories.include?(s.category) }
616
+ end
617
+
618
+ def build_body(statements)
619
+ if statements.size == 1 && statements.first.is_a?(AST::YCP::StmtBlock)
620
+ body = statements.first
621
+ else
622
+ body = AST::YCP::StmtBlock.new(
623
+ :name => nil,
624
+ :symbols => [],
625
+ :statements => statements
626
+ )
627
+ end
628
+ end
629
+
630
+ def toplevel_block?(element)
631
+ element["kind"] == "file" || element["kind"] == "module"
632
+ end
633
+
634
+ def extract_file_statements(statements, file)
635
+ extracted = []
636
+ do_extract = false
637
+ nesting_level = 0
638
+ threshhold_level = nil
639
+
640
+ statements.each do |statement|
641
+ if statement.is_a?(AST::YCP::Include)
642
+ extracted << statement if do_extract
643
+ next if statement.skipped
644
+
645
+ nesting_level += 1
646
+ if statement.name == file
647
+ do_extract = true
648
+ threshhold_level = nesting_level
649
+ end
650
+ elsif statement.is_a?(AST::YCP::Filename)
651
+ nesting_level -= 1
652
+ if do_extract && nesting_level < threshhold_level
653
+ do_extract = false
654
+ end
655
+
656
+ extracted << statement if do_extract
657
+ else
658
+ extracted << statement if do_extract
659
+ end
660
+ end
661
+
662
+ extracted
663
+ end
664
+
665
+ def skip_include_statements(statements)
666
+ filtered = []
667
+ do_skip = false
668
+ nesting_level = 0
669
+
670
+ statements.each do |statement|
671
+ if statement.is_a?(AST::YCP::Include)
672
+ filtered << statement if nesting_level == 0
673
+ next if statement.skipped
674
+
675
+ nesting_level += 1
676
+ do_skip = true
677
+ elsif statement.is_a?(AST::YCP::Filename)
678
+ nesting_level -= 1
679
+ do_skip = false if nesting_level == 0
680
+ else
681
+ filtered << statement unless do_skip
682
+ end
683
+ end
684
+
685
+ filtered
686
+ end
687
+
688
+ def transfer_comments(node, element)
689
+ # We don't transfer comments consisting of only line whitespace. They
690
+ # represent indentation, in-expression spacing, etc. -- things we would
691
+ # ignore anyway later. By removing them here already we save some
692
+ # processing time in later stages.
693
+
694
+ comment_before = element["comment_before"]
695
+ if comment_before && !is_line_whitespace?(comment_before)
696
+ node.comment_before = comment_before
697
+ end
698
+
699
+ comment_after = element["comment_after"]
700
+ if comment_after && !is_line_whitespace?(comment_after)
701
+ node.comment_after = comment_after
702
+ end
703
+ end
704
+
705
+ def is_line_whitespace?(s)
706
+ s =~ /\A[ \t]*$\z/
707
+ end
708
+ end
709
+ end