y2r 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,709 @@
1
+ # encoding: utf-8
2
+
3
+ require "cheetah"
4
+ require "nokogiri"
5
+ require "tempfile"
6
+
7
+ module Y2R
8
+ class Parser
9
+
10
+ # The lists of elements skipped during parsing and comment processing need
11
+ # to differ. Currently there are two reasons:
12
+ #
13
+ # * When parsing, we want to skip <yconst>, because it's just a useless
14
+ # wrapper. But when processing comments we don't want to skip it,
15
+ # because this is the element to which comments are attached to for
16
+ # various literals.
17
+ #
18
+ # * When parsing, we don't want to skip <element>, because we need to
19
+ # handle it specially in case it's inside <map>. But when processing
20
+ # comments it's just a useless wrapper.
21
+
22
+ SKIPPED_ELEMENTS_PARSING = [
23
+ "arg",
24
+ "cond",
25
+ "else",
26
+ "expr",
27
+ "false",
28
+ "key",
29
+ "lhs",
30
+ "rhs",
31
+ "stmt",
32
+ "then",
33
+ "true",
34
+ "until",
35
+ "value",
36
+ "yconst",
37
+ "ycp"
38
+ ]
39
+
40
+ SKIPPED_ELEMENTS_COMMENTS = [
41
+ "arg",
42
+ "cond",
43
+ "element",
44
+ "else",
45
+ "expr",
46
+ "false",
47
+ "key",
48
+ "lhs",
49
+ "rhs",
50
+ "stmt",
51
+ "then",
52
+ "true",
53
+ "until",
54
+ "value",
55
+ "ycp"
56
+ ]
57
+
58
+ class SyntaxError < StandardError
59
+ end
60
+
61
+ def initialize(options = {})
62
+ @options = options
63
+ end
64
+
65
+ def parse(input)
66
+ xml = ycp_to_xml(input)
67
+
68
+ if !@options[:xml]
69
+ xml_to_ast(xml)
70
+ else
71
+ xml
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def ycp_to_xml(ycp)
78
+ module_paths = @options[:module_paths] || []
79
+ include_paths = @options[:include_paths] || []
80
+
81
+ ycp_file = Tempfile.new("y2r")
82
+ begin
83
+ begin
84
+ ycp_file.write(ycp)
85
+ ensure
86
+ ycp_file.close
87
+ end
88
+
89
+ xml_file = Tempfile.new("y2r")
90
+ xml_file.close
91
+ begin
92
+ begin
93
+ ENV["Y2PARSECOMMENTS"] = "1"
94
+
95
+ cmd = [
96
+ "ycpc",
97
+ "--no-std-modules",
98
+ "--no-std-includes",
99
+ "-c",
100
+ "-x",
101
+ "-o", xml_file.path
102
+ ]
103
+
104
+ module_paths.each do |module_path|
105
+ cmd << '--module-path' << module_path
106
+ end
107
+
108
+ include_paths.each do |include_path|
109
+ cmd << '--include-path' << include_path
110
+ end
111
+
112
+ cmd << ycp_file.path
113
+
114
+ Cheetah.run(cmd)
115
+ rescue Cheetah::ExecutionFailed => e
116
+ raise SyntaxError.new(e.stderr)
117
+ end
118
+
119
+ File.read(xml_file.path)
120
+ ensure
121
+ xml_file.unlink
122
+ end
123
+ ensure
124
+ ycp_file.unlink
125
+ end
126
+ end
127
+
128
+ def xml_to_ast(xml)
129
+ root = Nokogiri::XML(xml).root
130
+
131
+ # Comment processing in ycpc is rough and comments often get attached to
132
+ # wrong nodes. This is a deliberate decision because it is easier to fix
133
+ # comments here than to do the right thing in ycpc.
134
+ fix_comments(root, nil)
135
+
136
+ ast = element_to_node(root, nil)
137
+ ast.filename = if @options[:reported_file]
138
+ @options[:reported_file]
139
+ else
140
+ @options[:filename] || "default.ycp"
141
+ end
142
+ ast
143
+ end
144
+
145
+ def fix_comments(element, last_element)
146
+ # We don't want to attach any comments to these.
147
+ if SKIPPED_ELEMENTS_COMMENTS.include?(element.name)
148
+ fix_comments(element.elements[0], last_element)
149
+ return
150
+ end
151
+
152
+ # In general, ycpc collects comments and they end up as |comment_before|
153
+ # at the next AST node that is created. In reality, parts of the comments
154
+ # may belong to the previous node (passed as |last_element|).
155
+ comment_before = element["comment_before"]
156
+ if last_element && comment_before
157
+ if comment_before =~ /\n/
158
+ after_part, before_part = comment_before.split("\n", 2)
159
+ else
160
+ after_part, before_part = comment_before, ""
161
+ end
162
+
163
+ if !after_part.empty?
164
+ if last_element["comment_after"]
165
+ last_element["comment_after"] = after_part + last_element["comment_after"]
166
+ else
167
+ last_element["comment_after"] = after_part
168
+ end
169
+ end
170
+
171
+ if !before_part.empty?
172
+ element["comment_before"] = before_part
173
+ else
174
+ element.attributes["comment_before"].remove
175
+ end
176
+ end
177
+
178
+ # Recurse into children.
179
+ last_element = element
180
+ element.elements.each do |child|
181
+ fix_comments(child, last_element)
182
+ last_element = child
183
+ end
184
+ end
185
+
186
+ def element_to_node(element, context)
187
+ node = case element.name
188
+ when *SKIPPED_ELEMENTS_PARSING
189
+ element_to_node(element.elements[0], context)
190
+
191
+ when "assign"
192
+ AST::YCP::Assign.new(
193
+ :ns => element["ns"],
194
+ :name => element["name"],
195
+ :child => element_to_node(element.elements[0], context)
196
+ )
197
+
198
+ when "block"
199
+ all_statements = extract_collection(element, "statements", context)
200
+
201
+ extracted_statements = if toplevel_block?(element) && @options[:extracted_file]
202
+ extract_file_statements(all_statements, @options[:extracted_file])
203
+ else
204
+ all_statements
205
+ end
206
+
207
+ statements = if toplevel_block?(element)
208
+ skip_include_statements(extracted_statements)
209
+ else
210
+ extracted_statements
211
+ end
212
+
213
+ file_block_class = if @options[:as_include_file]
214
+ AST::YCP::IncludeBlock
215
+ else
216
+ AST::YCP::FileBlock
217
+ end
218
+
219
+ module_block_class = if @options[:as_include_file]
220
+ AST::YCP::IncludeBlock
221
+ else
222
+ AST::YCP::ModuleBlock
223
+ end
224
+
225
+ {
226
+ :def => AST::YCP::DefBlock,
227
+ :file => file_block_class,
228
+ :module => module_block_class,
229
+ :stmt => AST::YCP::StmtBlock,
230
+ :unspec => AST::YCP::UnspecBlock
231
+ }[element["kind"].to_sym].new(
232
+ :name => element["name"],
233
+ :symbols => extract_symbols(element, context),
234
+ :statements => statements
235
+ )
236
+
237
+ when "bracket"
238
+ lhs = element.at_xpath("./lhs")
239
+
240
+ AST::YCP::Bracket.new(
241
+ :entry => element_to_node(lhs.at_xpath("./entry"), context),
242
+ :arg => element_to_node(lhs.at_xpath("./arg"), context),
243
+ :rhs => element_to_node(element.at_xpath("./rhs"), context)
244
+ )
245
+
246
+ when "break"
247
+ AST::YCP::Break.new
248
+
249
+ when "builtin"
250
+ symbol_attrs = element.attributes.select { |n, v| n =~ /^sym\d+$/ }
251
+ symbol_values = symbol_attrs.values.map(&:value)
252
+ children = extract_children(element, :builtin)
253
+
254
+ if symbol_values.empty?
255
+ args = children
256
+ block = nil
257
+ else
258
+ args = children[0..-2]
259
+ block = children.last
260
+
261
+ block.args = symbol_values.map do |value|
262
+ value =~ /^((\S+\s+)*)(\S+)/
263
+
264
+ AST::YCP::Symbol.new(
265
+ :global => false,
266
+ :category => :variable,
267
+ :type => AST::YCP::Type.new($1),
268
+ :name => $3
269
+ )
270
+ end
271
+ block.symbols = block.args + block.symbols
272
+ end
273
+
274
+ AST::YCP::Builtin.new(
275
+ :ns => element["ns"],
276
+ :name => element["name"],
277
+ :args => args,
278
+ :block => block
279
+ )
280
+
281
+ when "call"
282
+ AST::YCP::Call.new(
283
+ :ns => element["ns"],
284
+ :name => element["name"],
285
+ :category => element["category"].to_sym,
286
+ :result => element["result"] == "unused" ? :unused : :used,
287
+ :args => extract_collection(element, "args", context),
288
+ :type => AST::YCP::Type.new(element["type"])
289
+ )
290
+
291
+ when "case"
292
+ value_elements = element.elements.select { |e| e.name == "value" }
293
+
294
+ AST::YCP::Case.new(
295
+ :values => value_elements.map { |e| element_to_node(e, context) },
296
+ :body => build_body(extract_collection(element, "body", context))
297
+ )
298
+
299
+ when "compare"
300
+ AST::YCP::Compare.new(
301
+ :op => element["op"],
302
+ :lhs => element_to_node(element.at_xpath("./lhs"), context),
303
+ :rhs => element_to_node(element.at_xpath("./rhs"), context)
304
+ )
305
+
306
+ when "const"
307
+ # For some weird reason, some terms (e.g. those placed in lists) are
308
+ # represented as <const type="term" ...>, while others are represented
309
+ # as <yeterm ...>. We unify this mess here so that it doesn't
310
+ # propagate into the AST.
311
+ if element["type"] != "term"
312
+ AST::YCP::Const.new(
313
+ :type => element["type"].to_sym,
314
+ :value => element["value"]
315
+ )
316
+ else
317
+ AST::YCP::YETerm.new(
318
+ :name => element["name"],
319
+ :children => extract_collection(element, "list", :yeterm)
320
+ )
321
+ end
322
+
323
+ when "continue"
324
+ AST::YCP::Continue.new
325
+
326
+ when "default"
327
+ AST::YCP::Default.new(
328
+ :body => build_body(extract_children(element, context))
329
+ )
330
+
331
+ when "do"
332
+ # For some reason, blocks in |do| statements are of kind "unspec" but
333
+ # they really should be "stmt". Thus we need to construct the
334
+ # |StmtBlock| instance ourself.
335
+
336
+ block_element = element.at_xpath("./block")
337
+
338
+ AST::YCP::Do.new(
339
+ :do => if block_element
340
+ AST::YCP::StmtBlock.new(
341
+ :name => nil,
342
+ :symbols => extract_symbols(block_element, context),
343
+ :statements => extract_collection(block_element, "statements", context)
344
+ )
345
+ else
346
+ nil
347
+ end,
348
+ :while => element_to_node(element.at_xpath("./while/*"), context)
349
+ )
350
+
351
+ when "element"
352
+ if context != :map
353
+ element_to_node(element.elements[0], context)
354
+ else
355
+ AST::YCP::MapElement.new(
356
+ :key => element_to_node(element.at_xpath("./key"), context),
357
+ :value => element_to_node(element.at_xpath("./value"), context)
358
+ )
359
+ end
360
+
361
+ when "entry"
362
+ AST::YCP::Entry.new(
363
+ :ns => element["ns"],
364
+ :name => element["name"]
365
+ )
366
+
367
+ when "filename"
368
+ AST::YCP::Filename.new
369
+
370
+ when "fun_def"
371
+ args = if element.at_xpath("./declaration")
372
+ extract_collection(
373
+ element.at_xpath("./declaration/block"),
374
+ "symbols",
375
+ context
376
+ )
377
+ else
378
+ []
379
+ end
380
+ block = element_to_node(element.at_xpath("./block"), context)
381
+
382
+ # This will make the code consider arguments as local variables.
383
+ # Which is exactly what we want e.g. for alias detection.
384
+ #
385
+ # Note we make sure not to add arguments that would create duplicate
386
+ # entries in the symbol table. These can arise e.g. if a variable with
387
+ # the same name as an argument is defined inside the function (yes,
388
+ # that's possible to do in YCP).
389
+ unique_args = args.reject do |arg|
390
+ block.symbols.find { |s| s.name == arg.name }
391
+ end
392
+ block.symbols = unique_args + block.symbols
393
+
394
+ AST::YCP::FunDef.new(
395
+ :name => element["name"],
396
+ :args => args,
397
+ :block => block
398
+ )
399
+
400
+ when "if"
401
+ AST::YCP::If.new(
402
+ :cond => element_to_node(element.elements[0], context),
403
+ :then => if element.at_xpath("./then")
404
+ element_to_node(element.at_xpath("./then"), context)
405
+ else
406
+ nil
407
+ end,
408
+ :else => if element.at_xpath("./else")
409
+ element_to_node(element.at_xpath("./else"), context)
410
+ else
411
+ nil
412
+ end
413
+ )
414
+
415
+ when "import"
416
+ AST::YCP::Import.new(:name => element["name"])
417
+
418
+ when "include"
419
+ AST::YCP::Include.new(
420
+ :name => element["name"],
421
+ :skipped => element["skipped"] == "1"
422
+ )
423
+
424
+ when "list"
425
+ AST::YCP::List.new(
426
+ :children => extract_children(element, :list)
427
+ )
428
+
429
+ when "locale"
430
+ AST::YCP::Locale.new(:text => element["text"])
431
+
432
+ when "map"
433
+ AST::YCP::Map.new(
434
+ :children => extract_children(element, :map)
435
+ )
436
+
437
+ when "repeat"
438
+ # For some reason, blocks in |repeat| statements are of kind "unspec"
439
+ # but they really should be "stmt". Thus we need to construct the
440
+ # |StmtBlock| instance ourself.
441
+
442
+ block_element = element.at_xpath("./do/block")
443
+
444
+ AST::YCP::Repeat.new(
445
+ :do => if block_element
446
+ AST::YCP::StmtBlock.new(
447
+ :name => nil,
448
+ :symbols => extract_symbols(block_element, context),
449
+ :statements => extract_collection(block_element, "statements", context)
450
+ )
451
+ else
452
+ nil
453
+ end,
454
+ :until => element_to_node(element.at_xpath("./until"), context)
455
+ )
456
+
457
+ when "return"
458
+ AST::YCP::Return.new(
459
+ :child => if element.elements[0]
460
+ element_to_node(element.elements[0], context)
461
+ else
462
+ nil
463
+ end
464
+ )
465
+
466
+ when "switch"
467
+ case_elements = element.elements.select { |e| e.name == "case" }
468
+
469
+ AST::YCP::Switch.new(
470
+ :cond => element_to_node(element.at_xpath("./cond"), context),
471
+ :cases => case_elements.map { |e| element_to_node(e, context) },
472
+ :default => if element.at_xpath("./default")
473
+ element_to_node(element.at_xpath("./default"), context)
474
+ else
475
+ nil
476
+ end
477
+ )
478
+
479
+ when "symbol"
480
+ category = element["category"].to_sym
481
+
482
+ AST::YCP::Symbol.new(
483
+ :global => element["global"] == "1",
484
+ :category => category,
485
+ :type => AST::YCP::Type.new(element["type"]),
486
+ # We don't save names for files mainly because of the specs. They
487
+ # use temporary files with unpredictable names and node equality
488
+ # tests would fail because of that.
489
+ :name => if category != :filename
490
+ element["name"]
491
+ else
492
+ nil
493
+ end
494
+ )
495
+
496
+ when "textdomain"
497
+ AST::YCP::Textdomain.new(:name => element["name"])
498
+
499
+ when "typedef"
500
+ AST::YCP::Typedef.new
501
+
502
+ when "variable"
503
+ AST::YCP::Variable.new(
504
+ :ns => element["ns"],
505
+ :name => element["name"],
506
+ :category => element["category"].to_sym,
507
+ :type => AST::YCP::Type.new(element["type"])
508
+ )
509
+
510
+ when "while"
511
+ AST::YCP::While.new(
512
+ :cond => element_to_node(element.at_xpath("./cond"), context),
513
+ :do => if element.at_xpath("./do/*")
514
+ element_to_node(element.at_xpath("./do/*"), context)
515
+ else
516
+ nil
517
+ end
518
+ )
519
+
520
+ when "ycpcode"
521
+ AST::YCP::YCPCode.new(
522
+ :args => [],
523
+ :symbols => [],
524
+ :child => element_to_node(element.elements[0], context)
525
+ )
526
+
527
+ when "yebinary"
528
+ AST::YCP::YEBinary.new(
529
+ :name => element["name"],
530
+ :lhs => element_to_node(element.elements[0], context),
531
+ :rhs => element_to_node(element.elements[1], context)
532
+ )
533
+
534
+ when "yebracket"
535
+ AST::YCP::YEBracket.new(
536
+ :value => element_to_node(element.elements[0], context),
537
+ :index => element_to_node(element.elements[1], context),
538
+ :default => element_to_node(element.elements[2], context)
539
+ )
540
+
541
+ when "yeis"
542
+ AST::YCP::YEIs.new(
543
+ :type => AST::YCP::Type.new(element["type"]),
544
+ :child => element_to_node(element.elements[0], context)
545
+ )
546
+
547
+ when "yepropagate"
548
+ AST::YCP::YEPropagate.new(
549
+ :from => AST::YCP::Type.new(element["from"]),
550
+ :to => AST::YCP::Type.new(element["to"]),
551
+ :child => element_to_node(element.elements[0], context)
552
+ )
553
+
554
+ when "yereference"
555
+ AST::YCP::YEReference.new(
556
+ :child => element_to_node(element.elements[0], context)
557
+ )
558
+
559
+ when "yereturn"
560
+ child = element_to_node(element.elements[0], context)
561
+
562
+ if child.is_a?(AST::YCP::UnspecBlock) # ``{ ... }
563
+ child
564
+ else # ``( ... )
565
+ AST::YCP::YEReturn.new(
566
+ :args => [],
567
+ :symbols => [],
568
+ :child => element_to_node(element.elements[0], context)
569
+ )
570
+ end
571
+
572
+ when "yeterm"
573
+ AST::YCP::YETerm.new(
574
+ :name => element["name"],
575
+ :children => extract_children(element, :yeterm)
576
+ )
577
+
578
+ when "yetriple"
579
+ AST::YCP::YETriple.new(
580
+ :cond => element_to_node(element.at_xpath("./cond"), context),
581
+ :true => element_to_node(element.at_xpath("./true"), context),
582
+ :false => element_to_node(element.at_xpath("./false"), context)
583
+ )
584
+
585
+ when "yeunary"
586
+ AST::YCP::YEUnary.new(
587
+ :name => element["name"],
588
+ :child => element_to_node(element.elements[0], context)
589
+ )
590
+
591
+ else
592
+ raise "Invalid element: <#{element.name}>."
593
+ end
594
+
595
+ transfer_comments(node, element)
596
+
597
+ node
598
+ end
599
+
600
+ def extract_children(element, context)
601
+ element.elements.map { |e| element_to_node(e, context) }
602
+ end
603
+
604
+ def extract_collection(element, name, context)
605
+ child = element.at_xpath("./#{name}")
606
+ child ? extract_children(child, context) : []
607
+ end
608
+
609
+ def extract_symbols(element, context)
610
+ # We only want symbols of relevant categories in the AST. This simplifies
611
+ # the code as it does not need to filter out the irrelevant ones.
612
+ categories = [:variable, :reference, :function]
613
+
614
+ all_symbols = extract_collection(element, "symbols", context)
615
+ all_symbols.select { |s| categories.include?(s.category) }
616
+ end
617
+
618
+ def build_body(statements)
619
+ if statements.size == 1 && statements.first.is_a?(AST::YCP::StmtBlock)
620
+ body = statements.first
621
+ else
622
+ body = AST::YCP::StmtBlock.new(
623
+ :name => nil,
624
+ :symbols => [],
625
+ :statements => statements
626
+ )
627
+ end
628
+ end
629
+
630
+ def toplevel_block?(element)
631
+ element["kind"] == "file" || element["kind"] == "module"
632
+ end
633
+
634
+ def extract_file_statements(statements, file)
635
+ extracted = []
636
+ do_extract = false
637
+ nesting_level = 0
638
+ threshhold_level = nil
639
+
640
+ statements.each do |statement|
641
+ if statement.is_a?(AST::YCP::Include)
642
+ extracted << statement if do_extract
643
+ next if statement.skipped
644
+
645
+ nesting_level += 1
646
+ if statement.name == file
647
+ do_extract = true
648
+ threshhold_level = nesting_level
649
+ end
650
+ elsif statement.is_a?(AST::YCP::Filename)
651
+ nesting_level -= 1
652
+ if do_extract && nesting_level < threshhold_level
653
+ do_extract = false
654
+ end
655
+
656
+ extracted << statement if do_extract
657
+ else
658
+ extracted << statement if do_extract
659
+ end
660
+ end
661
+
662
+ extracted
663
+ end
664
+
665
+ def skip_include_statements(statements)
666
+ filtered = []
667
+ do_skip = false
668
+ nesting_level = 0
669
+
670
+ statements.each do |statement|
671
+ if statement.is_a?(AST::YCP::Include)
672
+ filtered << statement if nesting_level == 0
673
+ next if statement.skipped
674
+
675
+ nesting_level += 1
676
+ do_skip = true
677
+ elsif statement.is_a?(AST::YCP::Filename)
678
+ nesting_level -= 1
679
+ do_skip = false if nesting_level == 0
680
+ else
681
+ filtered << statement unless do_skip
682
+ end
683
+ end
684
+
685
+ filtered
686
+ end
687
+
688
+ def transfer_comments(node, element)
689
+ # We don't transfer comments consisting of only line whitespace. They
690
+ # represent indentation, in-expression spacing, etc. -- things we would
691
+ # ignore anyway later. By removing them here already we save some
692
+ # processing time in later stages.
693
+
694
+ comment_before = element["comment_before"]
695
+ if comment_before && !is_line_whitespace?(comment_before)
696
+ node.comment_before = comment_before
697
+ end
698
+
699
+ comment_after = element["comment_after"]
700
+ if comment_after && !is_line_whitespace?(comment_after)
701
+ node.comment_after = comment_after
702
+ end
703
+ end
704
+
705
+ def is_line_whitespace?(s)
706
+ s =~ /\A[ \t]*$\z/
707
+ end
708
+ end
709
+ end