treetop 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,6 +127,18 @@ module Treetop
127
127
  end
128
128
  }
129
129
  /
130
+ prefix space? predicate_block {
131
+ def compile(address, builder, parent_expression=nil)
132
+ prefix.compile(address, builder, self)
133
+ end
134
+ def prefixed_expression
135
+ predicate_block
136
+ end
137
+ def inline_modules
138
+ []
139
+ end
140
+ }
141
+ /
130
142
  atomic suffix node_class_declarations {
131
143
  def compile(address, builder, parent_expression=nil)
132
144
  suffix.compile(address, builder, self)
@@ -223,6 +235,18 @@ module Treetop
223
235
  end
224
236
  }
225
237
  /
238
+ prefix space? predicate_block {
239
+ def compile(address, builder, parent_expression=nil)
240
+ prefix.compile(address, builder, self)
241
+ end
242
+ def prefixed_expression
243
+ predicate_block
244
+ end
245
+ def inline_modules
246
+ []
247
+ end
248
+ }
249
+ /
226
250
  atomic suffix {
227
251
  def compile(lexical_address, builder)
228
252
  suffix.compile(lexical_address, builder, self)
@@ -321,7 +345,7 @@ module Treetop
321
345
  end
322
346
 
323
347
  rule character_class
324
- '[' characters:(!']' ('\\' . /!'\\' .))+ ']' <CharacterClass> {
348
+ '[' characters:(!']' ('\\' . /!'\\' .))+ ']' <CharacterClass> {
325
349
  def characters
326
350
  super.text_value
327
351
  end
@@ -372,6 +396,10 @@ module Treetop
372
396
  }
373
397
  end
374
398
 
399
+ rule predicate_block
400
+ '' inline_module <PredicateBlock>
401
+ end
402
+
375
403
  rule inline_module
376
404
  '{' (inline_module / ![{}] .)* '}' <InlineModule>
377
405
  end
@@ -4,9 +4,10 @@ module Treetop
4
4
  def compile(address, builder, parent_expression = nil)
5
5
  super
6
6
  builder.if__ "index < input_length" do
7
- assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
7
+ builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
8
+ assign_result "instantiate_node(#{node_class_name},input, index...next_character)"
8
9
  extend_result_with_inline_module
9
- builder << "@index += 1"
10
+ builder << "@index = next_character"
10
11
  end
11
12
  builder.else_ do
12
13
  builder << 'terminal_parse_failure("any character")'
@@ -5,12 +5,17 @@ module Treetop
5
5
  super
6
6
 
7
7
  builder.if__ "has_terminal?(#{grounded_regexp(text_value)}, true, index)" do
8
- assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
9
- extend_result_with_inline_module
10
- builder << "@index += 1"
8
+ builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
9
+ if address == 0 || decorated?
10
+ assign_result "instantiate_node(#{node_class_name}, input, index...next_character)"
11
+ extend_result_with_inline_module
12
+ else
13
+ assign_lazily_instantiated_node
14
+ end
15
+ builder << "@index = next_character"
11
16
  end
12
17
  builder.else_ do
13
- "terminal_parse_failure(#{single_quote(characters)})"
18
+ # "terminal_parse_failure(#{single_quote(characters)})"
14
19
  assign_result 'nil'
15
20
  end
16
21
  end
@@ -21,6 +21,10 @@ module Treetop
21
21
  parent_expression && parent_expression.inline_module_name
22
22
  end
23
23
 
24
+ def decorated?
25
+ parent_expression && (parent_expression.node_class_name || parent_expression.node_class_name || parent_expression.inline_module_name)
26
+ end
27
+
24
28
  def optional_arg(arg)
25
29
  if arg
26
30
  ", #{arg}"
@@ -89,6 +93,10 @@ module Treetop
89
93
  def assign_failure(start_index_var)
90
94
  assign_result("nil")
91
95
  end
96
+
97
+ def assign_lazily_instantiated_node
98
+ assign_result("true")
99
+ end
92
100
 
93
101
  def var_initialization
94
102
  left, right = [], []
@@ -0,0 +1,16 @@
1
+ module Treetop
2
+ module Compiler
3
+ class PredicateBlock < ParsingExpression
4
+ def compile(index, builder, parent_expression = nil)
5
+ super
6
+ # REVISIT: This is distinctly dodgey, but since we can only be called from
7
+ # two contexts, and it works in both those, I'm going with it for now, as
8
+ # opposed to doing the major refactor of providing a proper way of accessing
9
+ # the parent's accumulator variable.
10
+ p = parent
11
+ p = p.parent while p && !p.respond_to?(:accumulator_var)
12
+ assign_result "lambda #{text_value}.call(#{p ? p.accumulator_var : ""})"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -53,9 +53,12 @@ module Treetop
53
53
  def compile(index, builder, rule)
54
54
  super
55
55
  builder.module_declaration(module_name) do
56
+ elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name] ||= []) << e; h}
56
57
  sequence_elements.each_with_index do |element, index|
57
58
  if element.label_name
58
- builder.method_declaration(element.label_name) do
59
+ repetitions = elements_by_name[element.label_name]
60
+ label_name = element.label_name + (repetitions.size > 1 ? (repetitions.index(element)+1).to_s : "")
61
+ builder.method_declaration(label_name) do
59
62
  builder << "elements[#{index}]"
60
63
  end
61
64
  builder.newline unless index == sequence_elements.size - 1
@@ -2,6 +2,7 @@ dir = File.dirname(__FILE__)
2
2
  require File.join(dir, *%w[node_classes parsing_expression])
3
3
  require File.join(dir, *%w[node_classes atomic_expression])
4
4
  require File.join(dir, *%w[node_classes inline_module])
5
+ require File.join(dir, *%w[node_classes predicate_block])
5
6
  require File.join(dir, *%w[node_classes treetop_file])
6
7
  require File.join(dir, *%w[node_classes grammar])
7
8
  require File.join(dir, *%w[node_classes declaration_sequence])
@@ -2,5 +2,5 @@ dir = File.dirname(__FILE__)
2
2
  require File.join(dir, *%w[compiler lexical_address_space])
3
3
  require File.join(dir, *%w[compiler ruby_builder])
4
4
  require File.join(dir, *%w[compiler node_classes])
5
- require File.join(dir, *%w[compiler metagrammar]) unless $exclude_metagrammar
5
+ require File.join(dir, *%w[compiler metagrammar]) unless defined?($exclude_metagrammar)
6
6
  require File.join(dir, *%w[compiler grammar_compiler])
@@ -0,0 +1,22 @@
1
+ class Array
2
+ def join_with(method, pattern = "")
3
+ return join(pattern) unless method
4
+ return "" if self.length == 0
5
+
6
+ args = []
7
+ if method.respond_to? :to_hash
8
+ args = method[:args] || []
9
+ method = method[:name]
10
+ end
11
+
12
+ output = self[0].send(method, *args)
13
+ for i in (1...self.length)
14
+ output += pattern + self[i].send(method, *args)
15
+ end
16
+ output
17
+ end
18
+
19
+ def to_tt
20
+ self.join_with({:name => :seq_to_tt, :args => [true]}, " ")
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ class NilClass
2
+ def to_tt
3
+ ""
4
+ end
5
+ end
@@ -0,0 +1,57 @@
1
+ class Object
2
+ def sequence
3
+ @sequence ||= []
4
+ end
5
+
6
+ def /(other)
7
+ sequence.push(other)
8
+ self
9
+ end
10
+
11
+ def seq_to_tt(inline = false)
12
+ separator = inline ? " / " : "\n/\n"
13
+ tt = if sequence.length == 0
14
+ self.to_tt
15
+ else
16
+ output = self.to_tt + separator + sequence.join_with({:name => :seq_to_tt, :args => [true]}, separator)
17
+ output = "( #{output} )" if inline
18
+ output
19
+ end
20
+
21
+ # Operators
22
+ tt = "&" + tt if @amper
23
+ tt = "!" + tt if @bang
24
+ tt += "*" if @kleene
25
+ tt += "+" if @plus
26
+ tt += "?" if @mark
27
+
28
+ tt += " <#{@node.to_s}>" if @node
29
+ tt += " {\n#{@block.gsub("\t", " ").justify.indent_paragraph(2)}\n}" if @block
30
+ tt = @label.to_s + ':' + tt if @label
31
+ tt
32
+ end
33
+
34
+ def node(name)
35
+ @node = name
36
+ self
37
+ end
38
+
39
+ def block(content)
40
+ @block = content
41
+ self
42
+ end
43
+
44
+ def label(name)
45
+ @label = name
46
+ self
47
+ end
48
+
49
+ [:mark, :kleene, :plus, :amper, :bang].each do |sym|
50
+ Object.class_eval(%{
51
+ def #{sym.to_s}
52
+ @#{sym.to_s} = true
53
+ self
54
+ end
55
+ })
56
+ end
57
+ end
@@ -0,0 +1,5 @@
1
+ class Regexp
2
+ def to_tt
3
+ self.inspect
4
+ end
5
+ end
@@ -8,7 +8,7 @@ class String
8
8
  index + 1
9
9
  end
10
10
  end
11
-
11
+
12
12
  def line_of(index)
13
13
  self[0...index].count("\n") + 1
14
14
  end
@@ -36,7 +36,33 @@ class String
36
36
  end
37
37
  end
38
38
 
39
+ def indent_paragraph(n)
40
+ out = ""
41
+ self.each_line {|line| out += line.indent(n) }
42
+ out
43
+ end
44
+
45
+ # Removes indentation uniformly.
46
+ def justify
47
+ min = self.length
48
+ self.each_line {|line|
49
+ next if line.strip == ""
50
+ if line =~ /^( *)\S/
51
+ min = $1.length if min > $1.length
52
+ else
53
+ min = 0
54
+ end
55
+ }
56
+ out = ""
57
+ self.each_line {|line| out += line.slice(min...line.length) || "\n" }
58
+ out.strip
59
+ end
60
+
39
61
  def treetop_camelize
40
62
  to_s.gsub(/\/(.?)/){ "::" + $1.upcase }.gsub(/(^|_)(.)/){ $2.upcase }
41
63
  end
42
- end
64
+
65
+ def to_tt
66
+ "'#{self}'"
67
+ end
68
+ end
@@ -0,0 +1,5 @@
1
+ class Symbol
2
+ def to_tt
3
+ self.to_s
4
+ end
5
+ end
@@ -1,2 +1,4 @@
1
1
  dir = File.dirname(__FILE__)
2
- require "#{dir}/ruby_extensions/string"
2
+ Dir.glob("#{dir}/ruby_extensions/*.rb").each do |file|
3
+ require file
4
+ end
@@ -3,7 +3,7 @@ module Treetop
3
3
  class CompiledParser
4
4
  include Treetop::Runtime
5
5
 
6
- attr_reader :input, :index, :terminal_failures, :max_terminal_failure_index
6
+ attr_reader :input, :index, :max_terminal_failure_index
7
7
  attr_writer :root
8
8
  attr_accessor :consume_all_input
9
9
  alias :consume_all_input? :consume_all_input
@@ -25,22 +25,26 @@ module Treetop
25
25
  end
26
26
 
27
27
  def failure_line
28
- terminal_failures && input.line_of(failure_index)
28
+ @terminal_failures && input.line_of(failure_index)
29
29
  end
30
30
 
31
31
  def failure_column
32
- terminal_failures && input.column_of(failure_index)
32
+ @terminal_failures && input.column_of(failure_index)
33
33
  end
34
34
 
35
35
  def failure_reason
36
36
  return nil unless (tf = terminal_failures) && tf.size > 0
37
- "Expected " +
38
- (tf.size == 1 ?
39
- tf[0].expected_string :
40
- "one of #{tf.map{|f| f.expected_string}.uniq*', '}"
41
- ) +
42
- " at line #{failure_line}, column #{failure_column} (byte #{failure_index+1})" +
43
- " after #{input[index...failure_index]}"
37
+ "Expected " +
38
+ (tf.size == 1 ?
39
+ tf[0].expected_string :
40
+ "one of #{tf.map{|f| f.expected_string}.uniq*', '}"
41
+ ) +
42
+ " at line #{failure_line}, column #{failure_column} (byte #{failure_index+1})" +
43
+ " after #{input[index...failure_index]}"
44
+ end
45
+
46
+ def terminal_failures
47
+ @terminal_failures.map! {|tf_ary| TerminalParseFailure.new(*tf_ary) }
44
48
  end
45
49
 
46
50
 
@@ -84,7 +88,7 @@ module Treetop
84
88
 
85
89
  def has_terminal?(terminal, regex, index)
86
90
  if regex
87
- rx = @regexps[terminal] ||= Regexp.new(terminal)
91
+ rx = @regexps[terminal] ||= Regexp.new(terminal, nil, 'u')
88
92
  input.index(rx, index) == index
89
93
  else
90
94
  input[index, terminal.size] == terminal
@@ -97,7 +101,7 @@ module Treetop
97
101
  @max_terminal_failure_index = index
98
102
  @terminal_failures = []
99
103
  end
100
- terminal_failures << TerminalParseFailure.new(index, expected_string)
104
+ @terminal_failures << [index, expected_string]
101
105
  return nil
102
106
  end
103
107
  end
@@ -1,17 +1,35 @@
1
1
  module Treetop
2
2
  module Runtime
3
3
  class SyntaxNode
4
- attr_reader :input, :interval, :elements
4
+ attr_reader :input, :interval
5
5
  attr_accessor :parent
6
+ attr_reader :dot_id
7
+
8
+ @@dot_id_counter = 0
6
9
 
7
10
  def initialize(input, interval, elements = nil)
8
11
  @input = input
9
12
  @interval = interval
10
- if @elements = elements
11
- elements.each do |element|
12
- element.parent = self
13
+ @elements = elements
14
+ end
15
+
16
+ def elements
17
+ return @elements if terminal?
18
+ # replace the character class placeholders in the sequence (lazy instantiation)
19
+ last_element = nil
20
+ @comprehensive_elements ||= @elements.map do |element|
21
+ if element == true
22
+ index = last_element ? last_element.interval.last : interval.first
23
+ element = SyntaxNode.new(input, index...(index + 1))
13
24
  end
25
+ element.parent = self
26
+ last_element = element
14
27
  end
28
+
29
+ @dot_id = @@dot_id_counter
30
+ @@dot_id_counter += 1
31
+
32
+ @comprehensive_elements
15
33
  end
16
34
 
17
35
  def terminal?
@@ -29,6 +47,10 @@ module Treetop
29
47
  def empty?
30
48
  interval.first == interval.last && interval.exclude_end?
31
49
  end
50
+
51
+ def <=>(other)
52
+ self.interval.first <=> other.interval.first
53
+ end
32
54
 
33
55
  def extension_modules
34
56
  local_extensions =
@@ -57,7 +79,7 @@ module Treetop
57
79
  im +
58
80
  (elements && elements.size > 0 ?
59
81
  ":" +
60
- (@elements||[]).map{|e|
82
+ (elements||[]).map{|e|
61
83
  begin
62
84
  "\n"+e.inspect(indent+" ")
63
85
  rescue # Defend against inspect not taking a parameter
@@ -67,6 +89,26 @@ module Treetop
67
89
  ""
68
90
  )
69
91
  end
92
+
93
+ def write_dot(io)
94
+ io.puts "node#{dot_id} [label=\"#{text_value}\"];"
95
+ if nonterminal? then
96
+ elements.each do
97
+ |x|
98
+ io.puts "node#{dot_id} -> node#{x.dot_id};"
99
+ x.write_dot(io)
100
+ end
101
+ end
102
+ end
103
+
104
+ def write_dot_file(fname)
105
+ File.open(fname + ".dot","w") do
106
+ |file|
107
+ file.puts "digraph G {"
108
+ write_dot(file)
109
+ file.puts "}"
110
+ end
111
+ end
70
112
  end
71
113
  end
72
114
  end
@@ -0,0 +1,39 @@
1
+ =begin rdoc
2
+ Definition of TreeTop syntax in pure Ruby.
3
+ =end
4
+
5
+ module Treetop
6
+ # Provides the possibility to write Treetop syntax as a Ruby code.
7
+ # Symbols act as nonterminals, strings as terminals, arrays as
8
+ # sequences. Ordered choices are defined similar to original Treetop
9
+ # syntax.
10
+ #
11
+ # (Note: it is better not to use numbers; use Strings instead)
12
+ #
13
+ module Syntax
14
+ class Grammar
15
+ attr_reader :source
16
+ def initialize
17
+ @source = ""
18
+ end
19
+
20
+ def rule(name)
21
+ @source += "rule #{name.to_s}\n#{yield.seq_to_tt.indent_paragraph(2)}\nend\n"
22
+ end
23
+
24
+ def include(name)
25
+ @source += "include #{name.to_s}\n"
26
+ end
27
+ end
28
+
29
+ def grammar(name, &block)
30
+ Syntax.grammar(name, &block)
31
+ end
32
+
33
+ def self.grammar(name, &block)
34
+ (g = Grammar.new).instance_eval(&block)
35
+ source = "grammar #{name.to_s}\n#{g.source.indent_paragraph(2)}end\n"
36
+ Treetop.load_from_string(source)
37
+ end
38
+ end
39
+ end
@@ -1,8 +1,8 @@
1
1
  module Treetop #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 1
4
- MINOR = 3
5
- TINY = 0
4
+ MINOR = 4
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/treetop.rb CHANGED
@@ -1,11 +1,17 @@
1
1
  require 'rubygems'
2
2
 
3
+ module Treetop
4
+ VALID_GRAMMAR_EXT = ['treetop', 'tt']
5
+ VALID_GRAMMAR_EXT_REGEXP = /\.(#{VALID_GRAMMAR_EXT.join('|')})\Z/o
6
+ end
7
+
3
8
  dir = File.dirname(__FILE__)
4
9
 
5
10
  TREETOP_ROOT = File.join(dir, 'treetop')
6
11
  require File.join(TREETOP_ROOT, "ruby_extensions")
7
12
  require File.join(TREETOP_ROOT, "runtime")
8
13
  require File.join(TREETOP_ROOT, "compiler")
14
+ require File.join(TREETOP_ROOT, "syntax")
9
15
 
10
16
  require 'polyglot'
11
- Polyglot.register(["treetop", "tt"], Treetop)
17
+ Polyglot.register(Treetop::VALID_GRAMMAR_EXT, Treetop)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treetop
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Sobo
@@ -9,7 +9,7 @@ autorequire: treetop
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-07-22 00:00:00 +10:00
12
+ date: 2009-09-04 00:00:00 +10:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ extra_rdoc_files: []
32
32
 
33
33
  files:
34
34
  - LICENSE
35
- - README
35
+ - README.md
36
36
  - Rakefile
37
37
  - lib/treetop/bootstrap_gen_1_metagrammar.rb
38
38
  - lib/treetop/compiler/grammar_compiler.rb
@@ -52,6 +52,7 @@ files:
52
52
  - lib/treetop/compiler/node_classes/parsing_expression.rb
53
53
  - lib/treetop/compiler/node_classes/parsing_rule.rb
54
54
  - lib/treetop/compiler/node_classes/predicate.rb
55
+ - lib/treetop/compiler/node_classes/predicate_block.rb
55
56
  - lib/treetop/compiler/node_classes/repetition.rb
56
57
  - lib/treetop/compiler/node_classes/sequence.rb
57
58
  - lib/treetop/compiler/node_classes/terminal.rb
@@ -60,7 +61,12 @@ files:
60
61
  - lib/treetop/compiler/node_classes.rb
61
62
  - lib/treetop/compiler/ruby_builder.rb
62
63
  - lib/treetop/compiler.rb
64
+ - lib/treetop/ruby_extensions/array.rb
65
+ - lib/treetop/ruby_extensions/nil.rb
66
+ - lib/treetop/ruby_extensions/object.rb
67
+ - lib/treetop/ruby_extensions/regexp.rb
63
68
  - lib/treetop/ruby_extensions/string.rb
69
+ - lib/treetop/ruby_extensions/symbol.rb
64
70
  - lib/treetop/ruby_extensions.rb
65
71
  - lib/treetop/runtime/compiled_parser.rb
66
72
  - lib/treetop/runtime/interval_skip_list/head_node.rb
@@ -72,6 +78,7 @@ files:
72
78
  - lib/treetop/runtime/terminal_parse_failure_debug.rb
73
79
  - lib/treetop/runtime/terminal_syntax_node.rb
74
80
  - lib/treetop/runtime.rb
81
+ - lib/treetop/syntax.rb
75
82
  - lib/treetop/version.rb
76
83
  - lib/treetop.rb
77
84
  - bin/tt
@@ -94,6 +101,8 @@ files:
94
101
  - examples/lambda_calculus/lambda_calculus_node_classes.rb
95
102
  - examples/lambda_calculus/lambda_calculus_test.rb
96
103
  - examples/lambda_calculus/test_helper.rb
104
+ - examples/ruby_syntax/syntax_test.rb
105
+ - examples/ruby_syntax/test_helper.rb
97
106
  has_rdoc: true
98
107
  homepage: http://functionalform.blogspot.com
99
108
  licenses: []
@@ -118,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
127
  requirements: []
119
128
 
120
129
  rubyforge_project:
121
- rubygems_version: 1.3.4
130
+ rubygems_version: 1.3.5
122
131
  signing_key:
123
132
  specification_version: 3
124
133
  summary: A Ruby-based text parsing and interpretation DSL