treetop 1.3.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -127,6 +127,18 @@ module Treetop
127
127
  end
128
128
  }
129
129
  /
130
+ prefix space? predicate_block {
131
+ def compile(address, builder, parent_expression=nil)
132
+ prefix.compile(address, builder, self)
133
+ end
134
+ def prefixed_expression
135
+ predicate_block
136
+ end
137
+ def inline_modules
138
+ []
139
+ end
140
+ }
141
+ /
130
142
  atomic suffix node_class_declarations {
131
143
  def compile(address, builder, parent_expression=nil)
132
144
  suffix.compile(address, builder, self)
@@ -223,6 +235,18 @@ module Treetop
223
235
  end
224
236
  }
225
237
  /
238
+ prefix space? predicate_block {
239
+ def compile(address, builder, parent_expression=nil)
240
+ prefix.compile(address, builder, self)
241
+ end
242
+ def prefixed_expression
243
+ predicate_block
244
+ end
245
+ def inline_modules
246
+ []
247
+ end
248
+ }
249
+ /
226
250
  atomic suffix {
227
251
  def compile(lexical_address, builder)
228
252
  suffix.compile(lexical_address, builder, self)
@@ -321,7 +345,7 @@ module Treetop
321
345
  end
322
346
 
323
347
  rule character_class
324
- '[' characters:(!']' ('\\' . /!'\\' .))+ ']' <CharacterClass> {
348
+ '[' characters:(!']' ('\\' . /!'\\' .))+ ']' <CharacterClass> {
325
349
  def characters
326
350
  super.text_value
327
351
  end
@@ -372,6 +396,10 @@ module Treetop
372
396
  }
373
397
  end
374
398
 
399
+ rule predicate_block
400
+ '' inline_module <PredicateBlock>
401
+ end
402
+
375
403
  rule inline_module
376
404
  '{' (inline_module / ![{}] .)* '}' <InlineModule>
377
405
  end
@@ -4,9 +4,10 @@ module Treetop
4
4
  def compile(address, builder, parent_expression = nil)
5
5
  super
6
6
  builder.if__ "index < input_length" do
7
- assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
7
+ builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
8
+ assign_result "instantiate_node(#{node_class_name},input, index...next_character)"
8
9
  extend_result_with_inline_module
9
- builder << "@index += 1"
10
+ builder << "@index = next_character"
10
11
  end
11
12
  builder.else_ do
12
13
  builder << 'terminal_parse_failure("any character")'
@@ -5,12 +5,17 @@ module Treetop
5
5
  super
6
6
 
7
7
  builder.if__ "has_terminal?(#{grounded_regexp(text_value)}, true, index)" do
8
- assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
9
- extend_result_with_inline_module
10
- builder << "@index += 1"
8
+ builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
9
+ if address == 0 || decorated?
10
+ assign_result "instantiate_node(#{node_class_name}, input, index...next_character)"
11
+ extend_result_with_inline_module
12
+ else
13
+ assign_lazily_instantiated_node
14
+ end
15
+ builder << "@index = next_character"
11
16
  end
12
17
  builder.else_ do
13
- "terminal_parse_failure(#{single_quote(characters)})"
18
+ # "terminal_parse_failure(#{single_quote(characters)})"
14
19
  assign_result 'nil'
15
20
  end
16
21
  end
@@ -21,6 +21,10 @@ module Treetop
21
21
  parent_expression && parent_expression.inline_module_name
22
22
  end
23
23
 
24
+ def decorated?
25
+ parent_expression && (parent_expression.node_class_name || parent_expression.node_class_name || parent_expression.inline_module_name)
26
+ end
27
+
24
28
  def optional_arg(arg)
25
29
  if arg
26
30
  ", #{arg}"
@@ -89,6 +93,10 @@ module Treetop
89
93
  def assign_failure(start_index_var)
90
94
  assign_result("nil")
91
95
  end
96
+
97
+ def assign_lazily_instantiated_node
98
+ assign_result("true")
99
+ end
92
100
 
93
101
  def var_initialization
94
102
  left, right = [], []
@@ -0,0 +1,16 @@
1
+ module Treetop
2
+ module Compiler
3
+ class PredicateBlock < ParsingExpression
4
+ def compile(index, builder, parent_expression = nil)
5
+ super
6
+ # REVISIT: This is distinctly dodgey, but since we can only be called from
7
+ # two contexts, and it works in both those, I'm going with it for now, as
8
+ # opposed to doing the major refactor of providing a proper way of accessing
9
+ # the parent's accumulator variable.
10
+ p = parent
11
+ p = p.parent while p && !p.respond_to?(:accumulator_var)
12
+ assign_result "lambda #{text_value}.call(#{p ? p.accumulator_var : ""})"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -53,9 +53,12 @@ module Treetop
53
53
  def compile(index, builder, rule)
54
54
  super
55
55
  builder.module_declaration(module_name) do
56
+ elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name] ||= []) << e; h}
56
57
  sequence_elements.each_with_index do |element, index|
57
58
  if element.label_name
58
- builder.method_declaration(element.label_name) do
59
+ repetitions = elements_by_name[element.label_name]
60
+ label_name = element.label_name + (repetitions.size > 1 ? (repetitions.index(element)+1).to_s : "")
61
+ builder.method_declaration(label_name) do
59
62
  builder << "elements[#{index}]"
60
63
  end
61
64
  builder.newline unless index == sequence_elements.size - 1
@@ -2,6 +2,7 @@ dir = File.dirname(__FILE__)
2
2
  require File.join(dir, *%w[node_classes parsing_expression])
3
3
  require File.join(dir, *%w[node_classes atomic_expression])
4
4
  require File.join(dir, *%w[node_classes inline_module])
5
+ require File.join(dir, *%w[node_classes predicate_block])
5
6
  require File.join(dir, *%w[node_classes treetop_file])
6
7
  require File.join(dir, *%w[node_classes grammar])
7
8
  require File.join(dir, *%w[node_classes declaration_sequence])
@@ -2,5 +2,5 @@ dir = File.dirname(__FILE__)
2
2
  require File.join(dir, *%w[compiler lexical_address_space])
3
3
  require File.join(dir, *%w[compiler ruby_builder])
4
4
  require File.join(dir, *%w[compiler node_classes])
5
- require File.join(dir, *%w[compiler metagrammar]) unless $exclude_metagrammar
5
+ require File.join(dir, *%w[compiler metagrammar]) unless defined?($exclude_metagrammar)
6
6
  require File.join(dir, *%w[compiler grammar_compiler])
@@ -0,0 +1,22 @@
1
+ class Array
2
+ def join_with(method, pattern = "")
3
+ return join(pattern) unless method
4
+ return "" if self.length == 0
5
+
6
+ args = []
7
+ if method.respond_to? :to_hash
8
+ args = method[:args] || []
9
+ method = method[:name]
10
+ end
11
+
12
+ output = self[0].send(method, *args)
13
+ for i in (1...self.length)
14
+ output += pattern + self[i].send(method, *args)
15
+ end
16
+ output
17
+ end
18
+
19
+ def to_tt
20
+ self.join_with({:name => :seq_to_tt, :args => [true]}, " ")
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ class NilClass
2
+ def to_tt
3
+ ""
4
+ end
5
+ end
@@ -0,0 +1,57 @@
1
+ class Object
2
+ def sequence
3
+ @sequence ||= []
4
+ end
5
+
6
+ def /(other)
7
+ sequence.push(other)
8
+ self
9
+ end
10
+
11
+ def seq_to_tt(inline = false)
12
+ separator = inline ? " / " : "\n/\n"
13
+ tt = if sequence.length == 0
14
+ self.to_tt
15
+ else
16
+ output = self.to_tt + separator + sequence.join_with({:name => :seq_to_tt, :args => [true]}, separator)
17
+ output = "( #{output} )" if inline
18
+ output
19
+ end
20
+
21
+ # Operators
22
+ tt = "&" + tt if @amper
23
+ tt = "!" + tt if @bang
24
+ tt += "*" if @kleene
25
+ tt += "+" if @plus
26
+ tt += "?" if @mark
27
+
28
+ tt += " <#{@node.to_s}>" if @node
29
+ tt += " {\n#{@block.gsub("\t", " ").justify.indent_paragraph(2)}\n}" if @block
30
+ tt = @label.to_s + ':' + tt if @label
31
+ tt
32
+ end
33
+
34
+ def node(name)
35
+ @node = name
36
+ self
37
+ end
38
+
39
+ def block(content)
40
+ @block = content
41
+ self
42
+ end
43
+
44
+ def label(name)
45
+ @label = name
46
+ self
47
+ end
48
+
49
+ [:mark, :kleene, :plus, :amper, :bang].each do |sym|
50
+ Object.class_eval(%{
51
+ def #{sym.to_s}
52
+ @#{sym.to_s} = true
53
+ self
54
+ end
55
+ })
56
+ end
57
+ end
@@ -0,0 +1,5 @@
1
+ class Regexp
2
+ def to_tt
3
+ self.inspect
4
+ end
5
+ end
@@ -8,7 +8,7 @@ class String
8
8
  index + 1
9
9
  end
10
10
  end
11
-
11
+
12
12
  def line_of(index)
13
13
  self[0...index].count("\n") + 1
14
14
  end
@@ -36,7 +36,33 @@ class String
36
36
  end
37
37
  end
38
38
 
39
+ def indent_paragraph(n)
40
+ out = ""
41
+ self.each_line {|line| out += line.indent(n) }
42
+ out
43
+ end
44
+
45
+ # Removes indentation uniformly.
46
+ def justify
47
+ min = self.length
48
+ self.each_line {|line|
49
+ next if line.strip == ""
50
+ if line =~ /^( *)\S/
51
+ min = $1.length if min > $1.length
52
+ else
53
+ min = 0
54
+ end
55
+ }
56
+ out = ""
57
+ self.each_line {|line| out += line.slice(min...line.length) || "\n" }
58
+ out.strip
59
+ end
60
+
39
61
  def treetop_camelize
40
62
  to_s.gsub(/\/(.?)/){ "::" + $1.upcase }.gsub(/(^|_)(.)/){ $2.upcase }
41
63
  end
42
- end
64
+
65
+ def to_tt
66
+ "'#{self}'"
67
+ end
68
+ end
@@ -0,0 +1,5 @@
1
+ class Symbol
2
+ def to_tt
3
+ self.to_s
4
+ end
5
+ end
@@ -1,2 +1,4 @@
1
1
  dir = File.dirname(__FILE__)
2
- require "#{dir}/ruby_extensions/string"
2
+ Dir.glob("#{dir}/ruby_extensions/*.rb").each do |file|
3
+ require file
4
+ end
@@ -3,7 +3,7 @@ module Treetop
3
3
  class CompiledParser
4
4
  include Treetop::Runtime
5
5
 
6
- attr_reader :input, :index, :terminal_failures, :max_terminal_failure_index
6
+ attr_reader :input, :index, :max_terminal_failure_index
7
7
  attr_writer :root
8
8
  attr_accessor :consume_all_input
9
9
  alias :consume_all_input? :consume_all_input
@@ -25,22 +25,26 @@ module Treetop
25
25
  end
26
26
 
27
27
  def failure_line
28
- terminal_failures && input.line_of(failure_index)
28
+ @terminal_failures && input.line_of(failure_index)
29
29
  end
30
30
 
31
31
  def failure_column
32
- terminal_failures && input.column_of(failure_index)
32
+ @terminal_failures && input.column_of(failure_index)
33
33
  end
34
34
 
35
35
  def failure_reason
36
36
  return nil unless (tf = terminal_failures) && tf.size > 0
37
- "Expected " +
38
- (tf.size == 1 ?
39
- tf[0].expected_string :
40
- "one of #{tf.map{|f| f.expected_string}.uniq*', '}"
41
- ) +
42
- " at line #{failure_line}, column #{failure_column} (byte #{failure_index+1})" +
43
- " after #{input[index...failure_index]}"
37
+ "Expected " +
38
+ (tf.size == 1 ?
39
+ tf[0].expected_string :
40
+ "one of #{tf.map{|f| f.expected_string}.uniq*', '}"
41
+ ) +
42
+ " at line #{failure_line}, column #{failure_column} (byte #{failure_index+1})" +
43
+ " after #{input[index...failure_index]}"
44
+ end
45
+
46
+ def terminal_failures
47
+ @terminal_failures.map! {|tf_ary| TerminalParseFailure.new(*tf_ary) }
44
48
  end
45
49
 
46
50
 
@@ -84,7 +88,7 @@ module Treetop
84
88
 
85
89
  def has_terminal?(terminal, regex, index)
86
90
  if regex
87
- rx = @regexps[terminal] ||= Regexp.new(terminal)
91
+ rx = @regexps[terminal] ||= Regexp.new(terminal, nil, 'u')
88
92
  input.index(rx, index) == index
89
93
  else
90
94
  input[index, terminal.size] == terminal
@@ -97,7 +101,7 @@ module Treetop
97
101
  @max_terminal_failure_index = index
98
102
  @terminal_failures = []
99
103
  end
100
- terminal_failures << TerminalParseFailure.new(index, expected_string)
104
+ @terminal_failures << [index, expected_string]
101
105
  return nil
102
106
  end
103
107
  end
@@ -1,17 +1,35 @@
1
1
  module Treetop
2
2
  module Runtime
3
3
  class SyntaxNode
4
- attr_reader :input, :interval, :elements
4
+ attr_reader :input, :interval
5
5
  attr_accessor :parent
6
+ attr_reader :dot_id
7
+
8
+ @@dot_id_counter = 0
6
9
 
7
10
  def initialize(input, interval, elements = nil)
8
11
  @input = input
9
12
  @interval = interval
10
- if @elements = elements
11
- elements.each do |element|
12
- element.parent = self
13
+ @elements = elements
14
+ end
15
+
16
+ def elements
17
+ return @elements if terminal?
18
+ # replace the character class placeholders in the sequence (lazy instantiation)
19
+ last_element = nil
20
+ @comprehensive_elements ||= @elements.map do |element|
21
+ if element == true
22
+ index = last_element ? last_element.interval.last : interval.first
23
+ element = SyntaxNode.new(input, index...(index + 1))
13
24
  end
25
+ element.parent = self
26
+ last_element = element
14
27
  end
28
+
29
+ @dot_id = @@dot_id_counter
30
+ @@dot_id_counter += 1
31
+
32
+ @comprehensive_elements
15
33
  end
16
34
 
17
35
  def terminal?
@@ -29,6 +47,10 @@ module Treetop
29
47
  def empty?
30
48
  interval.first == interval.last && interval.exclude_end?
31
49
  end
50
+
51
+ def <=>(other)
52
+ self.interval.first <=> other.interval.first
53
+ end
32
54
 
33
55
  def extension_modules
34
56
  local_extensions =
@@ -57,7 +79,7 @@ module Treetop
57
79
  im +
58
80
  (elements && elements.size > 0 ?
59
81
  ":" +
60
- (@elements||[]).map{|e|
82
+ (elements||[]).map{|e|
61
83
  begin
62
84
  "\n"+e.inspect(indent+" ")
63
85
  rescue # Defend against inspect not taking a parameter
@@ -67,6 +89,26 @@ module Treetop
67
89
  ""
68
90
  )
69
91
  end
92
+
93
+ def write_dot(io)
94
+ io.puts "node#{dot_id} [label=\"#{text_value}\"];"
95
+ if nonterminal? then
96
+ elements.each do
97
+ |x|
98
+ io.puts "node#{dot_id} -> node#{x.dot_id};"
99
+ x.write_dot(io)
100
+ end
101
+ end
102
+ end
103
+
104
+ def write_dot_file(fname)
105
+ File.open(fname + ".dot","w") do
106
+ |file|
107
+ file.puts "digraph G {"
108
+ write_dot(file)
109
+ file.puts "}"
110
+ end
111
+ end
70
112
  end
71
113
  end
72
114
  end
@@ -0,0 +1,39 @@
1
+ =begin rdoc
2
+ Definition of TreeTop syntax in pure Ruby.
3
+ =end
4
+
5
+ module Treetop
6
+ # Provides the possibility to write Treetop syntax as a Ruby code.
7
+ # Symbols act as nonterminals, strings as terminals, arrays as
8
+ # sequences. Ordered choices are defined similar to original Treetop
9
+ # syntax.
10
+ #
11
+ # (Note: it is better not to use numbers; use Strings instead)
12
+ #
13
+ module Syntax
14
+ class Grammar
15
+ attr_reader :source
16
+ def initialize
17
+ @source = ""
18
+ end
19
+
20
+ def rule(name)
21
+ @source += "rule #{name.to_s}\n#{yield.seq_to_tt.indent_paragraph(2)}\nend\n"
22
+ end
23
+
24
+ def include(name)
25
+ @source += "include #{name.to_s}\n"
26
+ end
27
+ end
28
+
29
+ def grammar(name, &block)
30
+ Syntax.grammar(name, &block)
31
+ end
32
+
33
+ def self.grammar(name, &block)
34
+ (g = Grammar.new).instance_eval(&block)
35
+ source = "grammar #{name.to_s}\n#{g.source.indent_paragraph(2)}end\n"
36
+ Treetop.load_from_string(source)
37
+ end
38
+ end
39
+ end
@@ -1,8 +1,8 @@
1
1
  module Treetop #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
- MINOR = 3
5
- TINY = 0
4
+ MINOR = 4
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/treetop.rb CHANGED
@@ -1,11 +1,17 @@
1
1
  require 'rubygems'
2
2
 
3
+ module Treetop
4
+ VALID_GRAMMAR_EXT = ['treetop', 'tt']
5
+ VALID_GRAMMAR_EXT_REGEXP = /\.(#{VALID_GRAMMAR_EXT.join('|')})\Z/o
6
+ end
7
+
3
8
  dir = File.dirname(__FILE__)
4
9
 
5
10
  TREETOP_ROOT = File.join(dir, 'treetop')
6
11
  require File.join(TREETOP_ROOT, "ruby_extensions")
7
12
  require File.join(TREETOP_ROOT, "runtime")
8
13
  require File.join(TREETOP_ROOT, "compiler")
14
+ require File.join(TREETOP_ROOT, "syntax")
9
15
 
10
16
  require 'polyglot'
11
- Polyglot.register(["treetop", "tt"], Treetop)
17
+ Polyglot.register(Treetop::VALID_GRAMMAR_EXT, Treetop)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treetop
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Sobo
@@ -9,7 +9,7 @@ autorequire: treetop
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-07-22 00:00:00 +10:00
12
+ date: 2009-09-04 00:00:00 +10:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ extra_rdoc_files: []
32
32
 
33
33
  files:
34
34
  - LICENSE
35
- - README
35
+ - README.md
36
36
  - Rakefile
37
37
  - lib/treetop/bootstrap_gen_1_metagrammar.rb
38
38
  - lib/treetop/compiler/grammar_compiler.rb
@@ -52,6 +52,7 @@ files:
52
52
  - lib/treetop/compiler/node_classes/parsing_expression.rb
53
53
  - lib/treetop/compiler/node_classes/parsing_rule.rb
54
54
  - lib/treetop/compiler/node_classes/predicate.rb
55
+ - lib/treetop/compiler/node_classes/predicate_block.rb
55
56
  - lib/treetop/compiler/node_classes/repetition.rb
56
57
  - lib/treetop/compiler/node_classes/sequence.rb
57
58
  - lib/treetop/compiler/node_classes/terminal.rb
@@ -60,7 +61,12 @@ files:
60
61
  - lib/treetop/compiler/node_classes.rb
61
62
  - lib/treetop/compiler/ruby_builder.rb
62
63
  - lib/treetop/compiler.rb
64
+ - lib/treetop/ruby_extensions/array.rb
65
+ - lib/treetop/ruby_extensions/nil.rb
66
+ - lib/treetop/ruby_extensions/object.rb
67
+ - lib/treetop/ruby_extensions/regexp.rb
63
68
  - lib/treetop/ruby_extensions/string.rb
69
+ - lib/treetop/ruby_extensions/symbol.rb
64
70
  - lib/treetop/ruby_extensions.rb
65
71
  - lib/treetop/runtime/compiled_parser.rb
66
72
  - lib/treetop/runtime/interval_skip_list/head_node.rb
@@ -72,6 +78,7 @@ files:
72
78
  - lib/treetop/runtime/terminal_parse_failure_debug.rb
73
79
  - lib/treetop/runtime/terminal_syntax_node.rb
74
80
  - lib/treetop/runtime.rb
81
+ - lib/treetop/syntax.rb
75
82
  - lib/treetop/version.rb
76
83
  - lib/treetop.rb
77
84
  - bin/tt
@@ -94,6 +101,8 @@ files:
94
101
  - examples/lambda_calculus/lambda_calculus_node_classes.rb
95
102
  - examples/lambda_calculus/lambda_calculus_test.rb
96
103
  - examples/lambda_calculus/test_helper.rb
104
+ - examples/ruby_syntax/syntax_test.rb
105
+ - examples/ruby_syntax/test_helper.rb
97
106
  has_rdoc: true
98
107
  homepage: http://functionalform.blogspot.com
99
108
  licenses: []
@@ -118,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
127
  requirements: []
119
128
 
120
129
  rubyforge_project:
121
- rubygems_version: 1.3.4
130
+ rubygems_version: 1.3.5
122
131
  signing_key:
123
132
  specification_version: 3
124
133
  summary: A Ruby-based text parsing and interpretation DSL