aurum 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/Rakefile +29 -0
  2. data/examples/dangling_else/grammar.rb +23 -0
  3. data/examples/expression/grammar.rb +28 -0
  4. data/examples/smalltalk/grammar.rb +151 -0
  5. data/examples/smalltalk/interpreter.rb +70 -0
  6. data/examples/yacc/grammar.rb +72 -0
  7. data/lib/aurum.rb +1 -9
  8. data/lib/aurum/engine.rb +39 -175
  9. data/lib/aurum/engine/parsing_facility.rb +107 -0
  10. data/lib/aurum/engine/tokenization_facility.rb +86 -0
  11. data/lib/aurum/grammar.rb +52 -219
  12. data/lib/aurum/grammar/automata.rb +194 -0
  13. data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
  14. data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
  15. data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
  16. data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
  17. data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
  18. data/lib/aurum/grammar/compiled_tables.rb +20 -0
  19. data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
  20. data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
  21. data/lib/aurum/grammar/lexical_rules.rb +224 -0
  22. data/lib/aurum/grammar/metalang/grammar.rb +47 -0
  23. data/lib/aurum/grammar/syntax_rules.rb +95 -0
  24. data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
  25. data/spec/engine/lexer_spec.rb +59 -0
  26. data/spec/engine/parser_spec.rb +90 -0
  27. data/spec/examples/dangling_else_example.rb +30 -0
  28. data/spec/examples/expression_example.rb +48 -0
  29. data/spec/examples/smalltalk_example.rb +50 -0
  30. data/spec/examples/yacc_spec.rb +30 -0
  31. data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
  32. data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
  33. data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
  34. data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
  35. data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
  36. data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
  37. data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
  38. data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
  39. data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
  40. data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
  41. data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
  42. data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
  43. data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
  44. data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
  45. data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
  46. data/spec/grammar/metalang/metalang_spec.rb +0 -0
  47. data/spec/grammar/precedence_spec.rb +42 -0
  48. data/spec/grammar/syntax_rules_spec.rb +31 -0
  49. data/spec/parser_matcher.rb +69 -0
  50. data/spec/pattern_matcher.rb +123 -0
  51. data/spec/spec_helper.rb +133 -0
  52. metadata +70 -36
  53. data/example/expression/expression.rb +0 -35
  54. data/example/expression/lisp.rb +0 -26
  55. data/lib/aurum/lexical_table_generator.rb +0 -429
  56. data/lib/aurum/parsing_table_generator.rb +0 -464
  57. data/test/engine/lexer_test.rb +0 -59
  58. data/test/engine/semantic_attributes_test.rb +0 -15
  59. data/test/grammar_definition/character_class_definition_test.rb +0 -28
  60. data/test/grammar_definition/grammar_definition_test.rb +0 -55
  61. data/test/grammar_definition/lexical_definition_test.rb +0 -56
  62. data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
  63. data/test/grammar_definition/production_definition_test.rb +0 -60
  64. data/test/lexical_table_generator/automata_test.rb +0 -74
  65. data/test/lexical_table_generator/character_set_test.rb +0 -73
  66. data/test/lexical_table_generator/interval_test.rb +0 -36
  67. data/test/lexical_table_generator/pattern_test.rb +0 -115
  68. data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
  69. data/test/lexical_table_generator/table_generator_test.rb +0 -126
  70. data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
  71. data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
  72. data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
  73. data/test/parsing_table_generator/lr_item_test.rb +0 -27
  74. data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
  75. data/test/parsing_table_generator/precedence_table_test.rb +0 -28
  76. data/test/parsing_table_generator/production_test.rb +0 -9
  77. data/test/test_helper.rb +0 -103
@@ -0,0 +1,190 @@
1
+ require 'delegate'
2
+
3
+ module Aurum
4
+ module Builder
5
+ class LRItem < Struct.new(:production, :position)
6
+ attr_reader :dot_symbol, :is_kernel, :is_handle, :remaining
7
+ def initialize(production, position = 0)
8
+ super production, position
9
+ @is_handle = position >= production.symbols.size
10
+ @is_kernel = @is_handle || position != 0 || production.nonterminal == StartSymbol
11
+ @dot_symbol = production.symbols[position]
12
+ @remaining = @is_handle ? [] : production.symbols[position..-1]
13
+ end
14
+
15
+ def next
16
+ LRItem.new(production, position + 1)
17
+ end
18
+
19
+ def reducable?(symbol)
20
+ (position == production.symbols.size - 1) && (@dot_symbol == symbol)
21
+ end
22
+
23
+ def inspect
24
+ result = "#{production.nonterminal.name} -> "
25
+ production.symbols.each_with_index do |symbol, index|
26
+ result << '.' if index == position
27
+ result << symbol.inspect << ' '
28
+ end
29
+ result << '.' if position >= production.symbols.length
30
+ result
31
+ end
32
+ alias kernel? is_kernel
33
+ alias handle? is_handle
34
+ alias to_s inspect
35
+ end
36
+
37
+ class State < DelegateClass(Array)
38
+ attr_accessor :index
39
+ attr_reader :actions, :handles, :all_items, :expect_symbols
40
+ def initialize augmented_grammar, items
41
+ @augmented_grammar = augmented_grammar
42
+ super(kernels_of_closure(items))
43
+ @handles = find_all {|item| item.handle?}
44
+ @actions, @goto_states, @predecessors, @expect_symbols = Hash.new([]), {}, {}, []
45
+ @all_items.each {|item| @expect_symbols << item.dot_symbol unless item.handle?}
46
+ end
47
+
48
+ def inconsistent?
49
+ @handles.size > 1 || (@handles.size != 0 && @handles.size != size)
50
+ end
51
+
52
+ def conflict?
53
+ @actions.values.any? {|actions| actions.size > 1 }
54
+ end
55
+
56
+ def conflicts
57
+ conflict = []
58
+ @actions.each {|symbol, actions| conflict << symbol if actions.size > 1}
59
+ conflict
60
+ end
61
+
62
+ def read_reduce
63
+ (@handles.size == 1 && size == 1) ? first.production : nil
64
+ end
65
+
66
+ def goto symbol
67
+ new_state = []
68
+ @all_items.each {|item| new_state << item.next if item.dot_symbol == symbol}
69
+ new_state.empty? ? nil : State.new(@augmented_grammar, new_state)
70
+ end
71
+
72
+ def add_reduce symbol, production
73
+ add_action(symbol, Aurum::ParsingTable::Action.new(:reduce, production))
74
+ end
75
+
76
+ def add_read_reduce symbol, production
77
+ add_action(symbol, Aurum::ParsingTable::Action.new(:read_reduce, production))
78
+ end
79
+
80
+ def add_shift symbol, state
81
+ add_action(symbol, Aurum::ParsingTable::Action.new(:shift, state.index))
82
+ add_goto_state(symbol, state)
83
+ end
84
+
85
+ def add_lookahead_shift symbol, state
86
+ add_goto_state(symbol, state)
87
+ @actions[symbol] = [Aurum::ParsingTable::Action.new(:lookahead_shift, state.index)]
88
+ end
89
+
90
+ def add_action symbol, action
91
+ @actions[symbol] = [] unless @actions.has_key?(symbol)
92
+ @actions[symbol] << action unless @actions[symbol].include?(action)
93
+ end
94
+
95
+ def predecessors(symbols = nil)
96
+ return @predecessors unless symbols
97
+ result = [self]
98
+ symbols.reverse_each do |symbol|
99
+ new_result = []
100
+ result.each {|state| new_result |= state.predecessors[symbol] if state.predecessors[symbol]}
101
+ result.replace(new_result)
102
+ end
103
+ result
104
+ end
105
+
106
+ def read_set symbol
107
+ return [] unless @expect_symbols.include?(symbol)
108
+ result, state =[].to_set, @goto_states[symbol]
109
+ state.each {|item| result |= first_set_of(item.remaining)} if state
110
+ result
111
+ end
112
+
113
+ def include_each nonterminal
114
+ return unless @expect_symbols.include?(nonterminal)
115
+ for item in @all_items
116
+ for predecessor in predecessors(item.production.symbols[0, item.position])
117
+ yield predecessor, item.production.nonterminal
118
+ end if (item.dot_symbol == nonterminal && nullable?(item.remaining[1..-1]))
119
+ end
120
+ end
121
+
122
+ def read nonterminal
123
+ return unless @expect_symbols.include?(nonterminal)
124
+ for action in @actions[nonterminal]
125
+ if action.shift_action?
126
+ state, direct, indirect = @goto_states[nonterminal], [], []
127
+ state.expect_symbols.each do |symbol|
128
+ direct << symbol if symbol.is_terminal
129
+ indirect << symbol if @augmented_grammar.nullable?(symbol)
130
+ end
131
+ yield(state, direct, indirect)
132
+ end
133
+ end
134
+ end
135
+
136
+ def read_reduce_items nonterminal
137
+ return unless @expect_symbols.include?(nonterminal)
138
+ for item in @all_items
139
+ yield(item.production, item.position) if item.reducable?(nonterminal)
140
+ end
141
+ end
142
+
143
+ def reducable_items nonterminal
144
+ return unless @expect_symbols.include?(nonterminal)
145
+ for item in @all_items
146
+ suffix = item.remaining[1..-1]
147
+ yield(item.production, item.position) if item.dot_symbol == nonterminal && nullable?(suffix)
148
+ end
149
+ end
150
+
151
+ private
152
+ def kernels_of_closure items
153
+ @all_items = items.dup
154
+ kernel_items = @all_items.find_all {|item| item.kernel?}
155
+ Builder.working_list items.dup do |unvisited, visiting|
156
+ symbol = visiting.dot_symbol
157
+ for production in @augmented_grammar.productions(symbol)
158
+ item = LRItem.new(production)
159
+ unless @all_items.include?(item)
160
+ @all_items << item
161
+ unvisited << item
162
+ kernel_items << item if item.kernel?
163
+ end
164
+ end unless !symbol || symbol.is_terminal
165
+ end
166
+ kernel_items
167
+ end
168
+
169
+ def first_set_of symbols
170
+ result = [].to_set
171
+ for symbol in symbols
172
+ result |= @augmented_grammar.first_set(symbol)
173
+ break unless @augmented_grammar.nullable?(symbol)
174
+ end
175
+ result
176
+ end
177
+
178
+ def nullable? symbols
179
+ return true if symbols.empty?
180
+ symbols.all? {|symbol| @augmented_grammar.nullable?(symbol)}
181
+ end
182
+
183
+ def add_goto_state symbol, state
184
+ @goto_states[symbol] = state
185
+ state.predecessors[symbol] = [] unless state.predecessors[symbol]
186
+ state.predecessors[symbol] << self
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,20 @@
1
+ module Aurum
2
+ class ParsingTable < Struct.new(:productions, :actions, :lookahead)
3
+ class Action < Struct.new(:type, :value)
4
+ [:shift, :reduce, :lookahead_shift, :read_reduce].each do |type|
5
+ define_method("#{type}?") { self.type == type }
6
+ end
7
+ def shift_action?
8
+ type == :shift || type == :lookahead_shift
9
+ end
10
+
11
+ def reduce_action?
12
+ type == :reduce || type == :read_reduce
13
+ end
14
+ end
15
+ end
16
+
17
+ class LexicalTable < Struct.new(:states, :lexical_states, :actions)
18
+ Action = Struct.new(:token, :state, :event, :action)
19
+ end
20
+ end
@@ -0,0 +1,94 @@
1
+ module Aurum
2
+ class Grammar
3
+ module DSL
4
+ class CharacterClassesBuilder
5
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
6
+ attr_reader :__named_character_classes__
7
+
8
+ def initialize
9
+ @__named_character_classes__ = {'any' => Aurum::Grammar::LexicalRules::CharacterSet.any,
10
+ 'underscore' => enum('_'),'single_quote' => enum("'"), 'double_quote' => enum('"'),
11
+ 'ascii' => range(0, 255), 'decimal_number' => range(?0, ?9),
12
+ 'ascii_punctuation' => range(33, 47) + range(58, 64) + range(91, 96) + range(123, 126)}
13
+ end
14
+
15
+ def range first, last = first
16
+ Grammar.range(first, last)
17
+ end
18
+
19
+ def enum literal
20
+ Grammar.enum(literal)
21
+ end
22
+
23
+ def method_missing name, char_set = nil
24
+ name = name.to_s
25
+ return @__named_character_classes__[name] unless char_set
26
+ raise "already defined character class: #{name}" if @__named_character_classes__.has_key? name
27
+ @__named_character_classes__[name] = char_set.kind_of?(Aurum::Grammar::LexicalRules::CharacterSet) ? char_set : enum(char_set.to_s)
28
+ end
29
+ end
30
+
31
+ class LexicalRulesBuilder
32
+ instance_methods.each {|m| undef_method m unless m =~ /^__/ || m=='instance_eval'}
33
+
34
+ def initialize lexical_rules, character_classes
35
+ @lexical_rules, @character_classes = lexical_rules, character_classes
36
+ @named_patterns, @current_states = {}, [['initial']]
37
+ end
38
+
39
+ def range first, last = first
40
+ Aurum::Grammar::LexicalRules::Pattern.character_set(Grammar.range(first, last))
41
+ end
42
+
43
+ def enum literal
44
+ Aurum::Grammar::LexicalRules::Pattern.enum(literal)
45
+ end
46
+
47
+ def string literal
48
+ Aurum::Grammar::LexicalRules::Pattern.string literal
49
+ end
50
+
51
+ def concat *patterns
52
+ Aurum::Grammar::LexicalRules::Pattern.concat(patterns.map do
53
+ |pattern| pattern.is_a?(Aurum::Grammar::LexicalRules::Pattern) ? pattern : string(pattern.to_s)
54
+ end)
55
+ end
56
+
57
+ def within *states, &definition
58
+ @saved_named_patterns = @named_patterns.dup
59
+ @current_states << states.map {|state| state.to_s}
60
+ instance_eval(&definition)
61
+ @current_states.pop
62
+ @named_patterns = @saved_named_patterns
63
+ end
64
+
65
+ def ignore pattern, options = {}, &semantic_action
66
+ options[:recognize] = '$ignored'
67
+ match(pattern, options, &semantic_action)
68
+ end
69
+
70
+ def match pattern, options = {}, &semantic_action
71
+ event = options[:event].to_s if options.has_key?(:event)
72
+ token = options[:recognize].to_s if options.has_key?(:recognize)
73
+ lexical_state = options[:shift_to].to_s if options.has_key?(:shift_to)
74
+ action = Aurum::LexicalTable::Action.new(token, lexical_state, event, semantic_action)
75
+ pattern = string(pattern.to_s) unless pattern.is_a?(Aurum::Grammar::LexicalRules::Pattern)
76
+ for state in @current_states.last
77
+ @lexical_rules.add_lexical_action(state, pattern, action)
78
+ end
79
+ end
80
+
81
+ def method_missing name, *patterns, &semantic_action
82
+ name, pattern = name.to_s, concat(*patterns)
83
+ return match(pattern, :recognize => name, &semantic_action) if name =~ /^_/
84
+ if patterns.empty?
85
+ return @named_patterns[name] if @named_patterns.has_key? name
86
+ return Aurum::Grammar::LexicalRules::Pattern.character_set(@character_classes.__named_character_classes__[name]) if @character_classes.__named_character_classes__.has_key? name
87
+ raise "can not find pattern named '#{name}'"
88
+ end
89
+ @named_patterns[name] = concat(*patterns)
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,79 @@
1
+ module Aurum
2
+ class Grammar
3
+ module DSL
4
+ class PrecedencesBuilder
5
+ def initialize syntax_rules
6
+ @syntax_rules, @level = syntax_rules, 0
7
+ end
8
+
9
+ def nonassoc *operators
10
+ operator :non_associative, *operators
11
+ end
12
+
13
+ def left *operators
14
+ operator :left_associative, *operators
15
+ end
16
+
17
+ def right *operators
18
+ operator :right_associative, *operators
19
+ end
20
+
21
+ def operator associative, *operators
22
+ for operator in operators
23
+ name = operator.is_a?(String) ? "$literal_#{operator}" : operator.to_s
24
+ @syntax_rules.add_operator_precedence name, Aurum::Grammar.precedence(associative, @level)
25
+ end
26
+ @level -= 1 unless operators.empty?
27
+ end
28
+ end
29
+
30
+ class ProductionsBuilder
31
+ instance_methods.each {|m| undef_method m unless m =~ /^__/ || m=='instance_eval'}
32
+
33
+ def initialize syntax_rules, actions
34
+ @syntax_rules, @actions, @symbols = syntax_rules, actions, {}
35
+ end
36
+
37
+ private
38
+ def method_missing name, *arguments, &action
39
+ name = name.to_s
40
+ __add_symbol__ name
41
+ symbol = (@symbols[name] || Aurum::Grammar::Epsilon)
42
+ if arguments.empty?
43
+ symbol.action = block_given? ? action : nil
44
+ return symbol
45
+ end
46
+ production_name = arguments.pop.to_s if arguments.last.is_a?(Symbol)
47
+ production_action = action || __action_of_last_grammar_symbol__(arguments)
48
+ symbols = arguments.inject([]) do |result, arg|
49
+ break result if arg == Aurum::Grammar::Epsilon
50
+ result << (arg.is_a?(Aurum::Grammar::SyntaxRules::Symbol) ? arg : __add_literal__(arg))
51
+ end
52
+ production = Aurum::Grammar.production(symbol, symbols)
53
+ production.name = production_name || "$production_#{production.object_id}"
54
+ result = @syntax_rules.add_syntax_rule(production)
55
+ @actions[production.name] = production_action if result && production_action
56
+ end
57
+
58
+ def __add_literal__ literal
59
+ @syntax_rules.add_literal literal.to_s
60
+ literal = "$literal_#{literal}"
61
+ @symbols.has_key?(literal) ? @symbols[literal] : @symbols[literal] = Aurum::Grammar.terminal(literal)
62
+ end
63
+
64
+ def __add_symbol__ name
65
+ @symbols[name] = case name
66
+ when /^[a-z]/ : Aurum::Grammar.nonterminal(name)
67
+ when /^_.+/ : Aurum::Grammar.terminal(name)
68
+ when /\$literal_/ : Aurum::Grammar.terminal(name)
69
+ end unless @symbols.has_key?(name) or name == '_'
70
+ end
71
+
72
+ def __action_of_last_grammar_symbol__ symbols
73
+ last = symbols.last
74
+ last.is_a?(Aurum::Grammar::SyntaxRules::Symbol) ? last.action : nil
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,224 @@
1
+ require File.join(File.dirname(__FILE__), 'automata')
2
+
3
+ module Aurum
4
+ class Grammar
5
+ class LexicalRules
6
+ attr_reader :patterns
7
+ def initialize
8
+ @patterns = {}
9
+ end
10
+
11
+ def add_lexical_action state, pattern, action
12
+ state_name = state.to_s
13
+ @patterns[state_name] = {} unless @patterns.has_key?(state_name)
14
+ @patterns[state_name][pattern] = action
15
+ end
16
+
17
+ class Pattern
18
+ def self.string(literal)
19
+ automata, index = Automata.new(literal.size + 1), 0
20
+ literal.each_byte {|byte|automata.connect(index, CharacterSet::Interval.new(byte).to_char_set, (index += 1))}
21
+ Pattern.new(automata, index)
22
+ end
23
+
24
+ def self.enum(literal)
25
+ character_set(CharacterSet.enum(literal))
26
+ end
27
+
28
+ def self.character_set(char_set)
29
+ automata = Automata.new(2)
30
+ Pattern.new(automata, automata.connect(0, char_set, 1))
31
+ end
32
+
33
+ def self.concat patterns
34
+ automata, index = Automata.new, 0
35
+ for pattern in patterns
36
+ index = automata.connect(index, Epsilon, automata.merge!(pattern.automata)) + pattern.accept
37
+ end
38
+ Pattern.new(automata, index)
39
+ end
40
+
41
+ attr_reader :automata, :accept
42
+ def initialize automata, accept
43
+ @automata, @accept = automata, accept
44
+ end
45
+
46
+ def zero_or_more
47
+ automata = @automata.dup
48
+ automata.connect(0, Epsilon, @accept)
49
+ automata.connect(@accept, Epsilon, 0)
50
+ Pattern.new(automata, @accept)
51
+ end
52
+
53
+ def one_or_more
54
+ automata = @automata.dup
55
+ automata.connect(@accept, Epsilon, 0)
56
+ Pattern.new(automata, @accept)
57
+ end
58
+
59
+ def zero_or_one
60
+ automata = @automata.dup
61
+ automata.connect(0, Epsilon, @accept)
62
+ Pattern.new(automata, @accept)
63
+ end
64
+
65
+ def not
66
+ deterministic, accepts = SubsetDeterminizer.new(@automata, [@accept]).determinize
67
+ sink = deterministic.new_state
68
+ deterministic.connect(sink, CharacterSet.any, sink)
69
+ sink.times do |state|
70
+ joint = CharacterSet.any
71
+ deterministic.table[state].each {|tran| joint -= tran.character_set}
72
+ deterministic.connect(state, joint, sink) unless joint.empty?
73
+ end
74
+ accept = deterministic.new_state
75
+ accept.times {|state| deterministic.connect(state, Epsilon, accept) unless accepts.has_key? state }
76
+ Pattern.new(deterministic, accept)
77
+ end
78
+
79
+ def [] least, most = least
80
+ Pattern.concat([self] * least + [self.zero_or_one] * (most-least))
81
+ end
82
+
83
+ def | other
84
+ automata = Automata.new(2)
85
+ for pattern in [self, other]
86
+ automata.connect(automata.connect(0, Epsilon, automata.merge!(pattern.automata)) + pattern.accept, Epsilon, 1)
87
+ end
88
+ Pattern.new(automata, 1)
89
+ end
90
+
91
+ def ~
92
+ any = Pattern.character_set(CharacterSet.any).zero_or_more
93
+ Pattern.concat([Pattern.concat([any, self, any]).not, self])
94
+ end
95
+ end
96
+
97
+ class CharacterSet
98
+ def self.enum(literal)
99
+ intervals = []
100
+ literal.each_byte {|char| intervals << Interval.new(char)}
101
+ CharacterSet.new(intervals)
102
+ end
103
+
104
+ def self.range(a, b=a)
105
+ Interval.new(a, b).to_char_set
106
+ end
107
+
108
+ def self.any
109
+ range(0, 65535)
110
+ end
111
+ attr_reader :intervals
112
+ def initialize(intervals = [])
113
+ @intervals = intervals
114
+ merge_intervals
115
+ end
116
+
117
+ def include? char
118
+ @intervals.any? {|interval| interval.include?(char)}
119
+ end
120
+
121
+ def empty?
122
+ @intervals.empty?
123
+ end
124
+
125
+ def + other
126
+ CharacterSet.new(@intervals + other.intervals)
127
+ end
128
+
129
+ def - other
130
+ intervals = @intervals.dup
131
+ for interval in other.intervals
132
+ next unless to_be_replaced = intervals.find {|x| x.include?(interval.first) || x.include?(interval.last)}
133
+ intervals.delete to_be_replaced
134
+ intervals << Interval.new(to_be_replaced.first, interval.first-1) if to_be_replaced.first <= interval.first-1
135
+ intervals << Interval.new(interval.last + 1, to_be_replaced.last) if interval.last + 1 <= to_be_replaced.last
136
+ end
137
+ CharacterSet.new(intervals)
138
+ end
139
+
140
+ def to_points destination
141
+ @intervals.inject [] do |points, interval|
142
+ points << Point.new(interval.first, true, destination)
143
+ points << Point.new(interval.last, false, destination)
144
+ end
145
+ end
146
+
147
+ def == other
148
+ return false unless other.is_a?(Aurum::Grammar::LexicalRules::CharacterSet)
149
+ other.intervals == @intervals
150
+ end
151
+
152
+ def inspect
153
+ @intervals.map{|interval| interval.inspect}.join(',')
154
+ end
155
+
156
+ private
157
+ def merge_intervals
158
+ @intervals.sort!
159
+ merging = nil
160
+ for interval in @intervals.dup
161
+ if merging and merging.merge!(interval)
162
+ @intervals.delete(interval)
163
+ else
164
+ merging = interval
165
+ end
166
+ end
167
+ end
168
+
169
+ class Interval < Struct.new(:first, :last)
170
+ include Comparable
171
+ def initialize first, last = first
172
+ super first, last
173
+ end
174
+ def include? char
175
+ char = char[0] if char.kind_of? String
176
+ first <= char && char <= last
177
+ end
178
+
179
+ def merge! other
180
+ return nil unless include?(other.first) || include?(other.last) || other.first - last == 1 || first - other.last == 1
181
+ self.first, self.last = [first, other.first].min, [last, other.last].max
182
+ self
183
+ end
184
+
185
+ def <=> other
186
+ return first <=> other.first unless first == other.first
187
+ (last - first) <=> (other.last - other.first)
188
+ end
189
+
190
+ def to_char_set
191
+ CharacterSet.new([self])
192
+ end
193
+
194
+ def inspect
195
+ first == last ? chr_of(first) : "#{chr_of(first)}-#{chr_of(last)}"
196
+ end
197
+ private
198
+ def chr_of integer
199
+ return "##{integer};" if integer < 33
200
+ integer.chr rescue integer
201
+ end
202
+ end
203
+ end
204
+ Epsilon = CharacterSet.new
205
+
206
+ class Point < Struct.new(:char, :is_start, :destination)
207
+ include Comparable
208
+ def <=> other
209
+ char == other.char ? (is_start ? (other.is_start ? 0 : -1) : (other.is_start ? 1 : 0)) : (char <=> other.char)
210
+ end
211
+ end
212
+ end
213
+
214
+ def Grammar.range first, last = first
215
+ first = first[0] if first.kind_of? String
216
+ last = last[0] if last.kind_of? String
217
+ Aurum::Grammar::LexicalRules::CharacterSet.range(first, last)
218
+ end
219
+
220
+ def Grammar.enum literal
221
+ Aurum::Grammar::LexicalRules::CharacterSet.enum(literal.to_s)
222
+ end
223
+ end
224
+ end