dendroid 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 989c88ea00734b7145dfc64ded1318c296a3678039213904c15ead0a306942a2
4
- data.tar.gz: 73aa3efff885fcf1f71f23c7907b7c7b953fd3b174aefb9324a1326663652a9c
3
+ metadata.gz: 270fc74811d70652e19c4ed42cd11138a1fe9fc413e9b1856b982edfa28c5d51
4
+ data.tar.gz: 280351b252bd5c4a63f3082375053ea7d3bf9a9d0d32acc055dc33cce91ed628
5
5
  SHA512:
6
- metadata.gz: 906746623d20819ce0f1923c8156a27c7ce58b2d926174e0a1dff611407bd873378272c2a2125bb665c8aec20c3beef787942c9fc7f0376aff22e7db4fb6a235
7
- data.tar.gz: 34f3aabf74b7598a506e3acd38c9df03a7040203e8a9bda2fa02804b96ea0054d17c3044c7ca1384380bdb55af2cd80f1b9e88baa1b5c985642797cc68d80783
6
+ metadata.gz: 7a34047f56f1f488377afd88c4049b935d03d8a0a902cd44f8ffba3d58578c212c5ef7f0b1229192a7f4606b1d683d70ca479273d45d716d98154a38663f233f
7
+ data.tar.gz: 36578ffb40a0463a2e411000b24fa8005166c1ede8f6a856293c0122e44fdbb46d3758159042db0c9c4ccacf9c1bf071e49cfb86a64792b98fac8bb89447a85a
data/.rubocop.yml CHANGED
@@ -20,7 +20,7 @@ Metrics/CyclomaticComplexity:
20
20
 
21
21
  Metrics/MethodLength:
22
22
  Enabled: true
23
- Max: 30
23
+ Max: 60
24
24
 
25
25
  Metrics/PerceivedComplexity:
26
26
  Enabled: true
@@ -32,5 +32,8 @@ Naming/MethodParameterName:
32
32
  Naming/VariableName:
33
33
  Enabled: false
34
34
 
35
+ Style/AccessorGrouping:
36
+ Enabled: false
37
+
35
38
  Style/CommentedKeyword:
36
39
  Enabled: false
data/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.0.10] - 2023-11-01
6
+ Added missing class and method documentation, fixed some `Rubocop` offenses.
7
+
8
+
9
+ ## [0.0.9] - 2023-11-01
10
+ Added classes for tokenization and grammar analysis.
11
+
12
+ ### Added
13
+ - Class `AlternativeItem` and its spec file
14
+ - Class `BaseTokenizer` and its spec file
15
+ - Module `ChoiceItems` and its spec file-
16
+ - Class `GrmAnalyzer` and its spec file
17
+ - Class `Literal` and its spec file
18
+ - Module `ProductionItems` and its spec file
19
+ - Class `Token` and its spec file
20
+ - Class `TokenPosition` and its spec file
21
+
5
22
  ## [0.0.8] - 2023-10-30
6
23
  ### Added
7
24
  - Class `DottedItem` and its spec file
data/dendroid.gemspec CHANGED
@@ -7,8 +7,8 @@ Gem::Specification.new do |s|
7
7
  path = ::File.dirname(libpath) + ::File::SEPARATOR
8
8
  ::File.read("#{path}version.txt").strip
9
9
  end
10
- s.summary = 'Dendroid. TODO'
11
- s.description = 'WIP. A Ruby implementation of a Earley parser'
10
+ s.summary = 'WIP. A Ruby implementation of an Earley parser'
11
+ s.description = 'WIP. A Ruby implementation of an Earley parser'
12
12
  s.authors = ['Dimitri Geshef']
13
13
  s.email = 'famished.tiger@yahoo.com'
14
14
  s.files = Dir['bin/dendroid',
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'dotted_item'
4
+
5
+ module Dendroid
6
+ module GrmAnalysis
7
+ # A specialization of DottedItem specific for Choice (rule)
8
+ class AlternativeItem < DottedItem
9
+ # @return [Integer] the alternative number
10
+ attr_reader :alt_index
11
+
12
+ # Constructor.
13
+ # @param aChoice [Dendroid::Syntax::Choice]
14
+ # @param aPosition [Integer] Position of the dot in rhs of production.
15
+ # @param index [Integer] the rank of the alternative at hand
16
+ def initialize(aChoice, aPosition, index)
17
+ @alt_index = index
18
+ super(aChoice, aPosition)
19
+ end
20
+
21
+ # Return a String representation of the alternative item.
22
+ # @return [String]
23
+ def to_s
24
+ rhs_names = rule.alternatives[alt_index].members.map(&:to_s)
25
+ dotted_rhs = rhs_names.insert(position, '.')
26
+ "#{rule.head} => #{dotted_rhs.join(' ')}"
27
+ end
28
+
29
+ # Indicate whether the rhs of the alternative is empty
30
+ # @return [Boolean]
31
+ def empty?
32
+ rule.alternatives[alt_index].empty?
33
+ end
34
+
35
+ # Indicate whether the dot is at the start of rhs
36
+ # @return [Boolean]
37
+ def final_pos?
38
+ empty? || position == rule.alternatives[alt_index].size
39
+ end
40
+
41
+ alias completed? final_pos?
42
+
43
+ # Return the symbol right after the dot (if any)
44
+ # @return [Dendroid::Syntax::GrmSymbol, NilClass]
45
+ def next_symbol
46
+ return nil if empty? || completed?
47
+
48
+ rule.alternatives[alt_index].members[position]
49
+ end
50
+
51
+ # Test for equality with another dotted item.
52
+ # Two dotted items are equal if they refer to the same rule and
53
+ # have both the same rhs and dot positions.
54
+ # @return [Boolean]
55
+ def ==(other)
56
+ return true if eql?(other)
57
+
58
+ (position == other.position) && rule.eql?(other.rule) && (alt_index == other.alt_index)
59
+ end
60
+
61
+ private
62
+
63
+ def valid_position(aPosition)
64
+ raise StandardError if aPosition.negative? || aPosition > rule.alternatives[alt_index].size
65
+
66
+ aPosition
67
+ end
68
+ end # class
69
+ end # module
70
+ end # module
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'alternative_item'
4
+
5
+ module Dendroid
6
+ module GrmAnalysis
7
+ # Mix-in module for extending the Syntax::Choice class
8
+ # with dotted items manipulation methods
9
+ module ChoiceItems
10
+ # Build the alternative items for this choice and assign them
11
+ # to the `items` attributes
12
+ # @return [Array<Array<GrmAnalysis::AlternativeItem>>]
13
+ def build_items
14
+ # AlternativeItem
15
+ @items = Array.new(alternatives.size) { |_| [] }
16
+ alternatives.each_with_index do |alt_seq, index|
17
+ if alt_seq.empty?
18
+ @items[index] << AlternativeItem.new(self, 0, index)
19
+ else
20
+ (0..alt_seq.size).each do |pos|
21
+ @items[index] << AlternativeItem.new(self, pos, index)
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ # Read accessor for the `items` attribute.
28
+ # Return the dotted items for this production
29
+ # @return [Array<Array<GrmAnalysis::AlternativeItem>>]
30
+ def items
31
+ @items
32
+ end
33
+
34
+ # Return the predicted items (i.e. the alternative items with the dot at start)
35
+ # for this choice.
36
+ # @return [Array<GrmAnalysis::AlternativeItem>]
37
+ def predicted_items
38
+ @items.map(&:first)
39
+ end
40
+
41
+ # Return the reduce items (i.e. the alternative items with the dot at end)
42
+ # for this choice.
43
+ # @return [Array<GrmAnalysis::AlternativeItem>]
44
+ def reduce_items
45
+ @items.map(&:last)
46
+ end
47
+
48
+ # Return the next item given the provided item.
49
+ # In other words, advance the dot by one position.
50
+ # @param anItem [GrmAnalysis::AlternativeItem]
51
+ # @return [GrmAnalysis::AlternativeItem|NilClass]
52
+ def next_item(anItem)
53
+ items_arr = items[anItem.alt_index]
54
+ return nil if anItem == items_arr.last
55
+
56
+ items_arr[anItem.position + 1]
57
+ end
58
+ end # module
59
+ end # module
60
+ end # module
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dendroid
4
+ # This module contains classes that from the analysis of grammar rules help to build objects
5
+ # needed by a recognizer or a parser for the language.
4
6
  module GrmAnalysis
5
7
  # For a given production rule, a dotted item represents a recognition state.
6
8
  # The dot partitions the rhs of the rule in two parts:
@@ -87,7 +89,7 @@ module Dendroid
87
89
  end
88
90
 
89
91
  # Check whether the given symbol is the same as after the dot.
90
- # @param [Dendroid::Syntax::GrmSymbol]
92
+ # @param aSymbol [Dendroid::Syntax::GrmSymbol]
91
93
  # @return [Boolean]
92
94
  def expecting?(aSymbol)
93
95
  actual = next_symbol
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../grm_analysis/production_items'
4
+ require_relative '../grm_analysis/choice_items'
5
+
6
+ module Dendroid
7
+ module GrmAnalysis
8
+ # An analyzer performs an analysis of the grammar rules and
9
+ # build objects (dotted items, first and follow sets) to be used
10
+ # by a recognizer or a parser.
11
+ class GrmAnalyzer
12
+ # @return [Dendroid::Syntax::Grammar] The grammar subjected to analysis
13
+ attr_reader :grammar
14
+ attr_reader :items
15
+ attr_reader :production2items
16
+ attr_reader :symbol2productions
17
+
18
+ # @return [Dendroid::Syntax::Terminal] The pseudo-terminal `__epsilon` (for empty string)
19
+ attr_reader :epsilon
20
+
21
+ # @return [Dendroid::Syntax::Terminal] The pseudo-terminal `$$` for end of input stream
22
+ attr_reader :endmarker
23
+
24
+ # @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FIRST SETS mapping
25
+ attr_reader :first_sets
26
+
27
+ # @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to PREDICT SETS mapping
28
+ attr_reader :predict_sets
29
+
30
+ # @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FOLLOW SETS mapping
31
+ attr_reader :follow_sets
32
+
33
+ # Constructor.
34
+ # Build dotted items, first, follow sets for the given grammar
35
+ # @param aGrammar [Dendroid::Syntax::Grammar]
36
+ def initialize(aGrammar)
37
+ @grammar = aGrammar
38
+ @items = []
39
+ @production2items = {}
40
+ @symbol2productions = {}
41
+ @epsilon = Syntax::Terminal.new(:__epsilon)
42
+ @endmarker = Syntax::Terminal.new(:"$$")
43
+ @first_sets = {}
44
+ @predict_sets = {}
45
+ @follow_sets = {}
46
+
47
+ build_dotted_items
48
+ build_first_sets
49
+ build_follow_sets
50
+ end
51
+
52
+ # The next item of a given dotted item
53
+ # @param aDottedItem [DottedItem]
54
+ def next_item(aDottedItem)
55
+ prod = aDottedItem.rule
56
+ prod.next_item(aDottedItem)
57
+ end
58
+
59
+ private
60
+
61
+ def build_dotted_items
62
+ grammar.rules.each do |prod|
63
+ lhs = prod.head
64
+ symbol2productions[lhs] = [] unless symbol2productions.include? lhs
65
+ symbol2productions[lhs] << prod
66
+ # production2items[prod] = []
67
+ mixin = prod.choice? ? ChoiceItems : ProductionItems
68
+ prod.extend(mixin)
69
+ prod.build_items
70
+ rule_items = prod.items.flatten
71
+ items.concat(rule_items)
72
+ production2items[prod] = rule_items
73
+ end
74
+ end
75
+
76
+ def build_first_sets
77
+ initialize_first_sets
78
+
79
+ begin
80
+ changed = false
81
+ grammar.rules.each do |prod|
82
+ head = prod.head
83
+ first_head = first_sets[head]
84
+ pre_first_size = first_head.size
85
+ if prod.choice?
86
+ prod.alternatives.each do |alt|
87
+ first_head.merge(sequence_first(alt.members))
88
+ end
89
+ else
90
+ first_head.merge(sequence_first(prod.body.members))
91
+ end
92
+ changed = true if first_head.size > pre_first_size
93
+ end
94
+ end until !changed
95
+ end
96
+
97
+ def initialize_first_sets
98
+ grammar.symbols.each do |symb|
99
+ if symb.terminal?
100
+ first_sets[symb] = Set.new([symb])
101
+ elsif symb.nullable?
102
+ first_sets[symb] = Set.new([epsilon])
103
+ else
104
+ first_sets[symb] = Set.new
105
+ end
106
+ end
107
+ end
108
+
109
+ def sequence_first(symbol_seq)
110
+ result = Set.new
111
+ symbol_seq.each do |symb|
112
+ result.delete(epsilon)
113
+ result.merge(first_sets[symb])
114
+ break unless symb.nullable?
115
+ end
116
+
117
+ result
118
+ end
119
+
120
+ # FOLLOW(A): is the set of terminals (+ end marker) that may come after the
121
+ # non-terminal A.
122
+ def build_follow_sets
123
+ initialize_follow_sets
124
+
125
+ begin
126
+ changed = false
127
+ grammar.rules.each do |prod|
128
+ if prod.choice?
129
+ prod.alternatives.each do |alt|
130
+ body = alt.members
131
+ next if body.empty?
132
+
133
+ head = prod.head
134
+ head_follow = follow_sets[head]
135
+ # trailer = Set.new
136
+ last = true
137
+ last_index = body.size - 1
138
+ last_index.downto(0) do |i|
139
+ symbol = body[i]
140
+ next if symbol.terminal?
141
+
142
+ follow_symbol = follow_sets[symbol]
143
+ size_before = follow_symbol.size
144
+ if last
145
+ # Rule: if last non-terminal member (symbol) is nullable
146
+ # then add FOLLOW(head) to FOLLOW(symbol)
147
+ follow_sets[symbol].merge(head_follow) if symbol.nullable?
148
+ last = false
149
+ else
150
+ symbol_seq = body.slice(i + 1, last_index - i)
151
+ trailer_first = sequence_first(symbol_seq)
152
+ contains_epsilon = trailer_first.include? epsilon
153
+ trailer_first.delete(epsilon) if contains_epsilon
154
+ follow_sets[symbol].merge(trailer_first)
155
+ follow_sets[symbol].merge(head_follow) if contains_epsilon
156
+ end
157
+ changed = true if follow_sets[symbol].size > size_before
158
+ end
159
+ end
160
+ else
161
+ body = prod.body.members
162
+ next if body.empty?
163
+
164
+ head = prod.head
165
+ head_follow = follow_sets[head]
166
+ # trailer = Set.new
167
+ last = true
168
+ last_index = body.size - 1
169
+ last_index.downto(0) do |i|
170
+ symbol = body[i]
171
+ next if symbol.terminal?
172
+
173
+ follow_symbol = follow_sets[symbol]
174
+ size_before = follow_symbol.size
175
+ if last
176
+ # Rule: if last non-terminal member (symbol) is nullable
177
+ # then add FOLLOW(head) to FOLLOW(symbol)
178
+ follow_sets[symbol].merge(head_follow) if symbol.nullable?
179
+ last = false
180
+ else
181
+ symbol_seq = body.slice(i + 1, last_index - i)
182
+ trailer_first = sequence_first(symbol_seq)
183
+ contains_epsilon = trailer_first.include? epsilon
184
+ trailer_first.delete(epsilon) if contains_epsilon
185
+ follow_sets[symbol].merge(trailer_first)
186
+ follow_sets[symbol].merge(head_follow) if contains_epsilon
187
+ end
188
+ changed = true if follow_sets[symbol].size > size_before
189
+ end
190
+ end
191
+ end
192
+ end until !changed
193
+ end
194
+
195
+ def initialize_follow_sets
196
+ grammar.symbols.each do |symb|
197
+ next if symb.terminal?
198
+
199
+ follow_sets[symb] = Set.new
200
+ end
201
+
202
+ # Initialize FOLLOW(start symbol) with end marker
203
+ follow_sets[grammar.start_symbol].add(endmarker)
204
+ end
205
+ end # class
206
+ end # module
207
+ end # module
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'dotted_item'
4
+
5
+ module Dendroid
6
+ module GrmAnalysis
7
+ # Mix-in module for extending the Dendroid::Syntax::Production class
8
+ # with dotted items manipulation methods and an attribute named `items`.
9
+ module ProductionItems
10
+ # Build the dotted items for this production and assign them
11
+ # to the `items` attributes
12
+ # @return [Array<GrmAnalysis::DottedItem>]
13
+ def build_items
14
+ @items = if empty?
15
+ [DottedItem.new(self, 0)]
16
+ else
17
+ (0..body.size).reduce([]) do |result, pos|
18
+ result << GrmAnalysis::DottedItem.new(self, pos)
19
+ end
20
+ end
21
+ end
22
+
23
+ # Read accessor for the `items` attribute.
24
+ # Return the dotted items for this production
25
+ # @return [Array<GrmAnalysis::DottedItem>]
26
+ def items
27
+ @items
28
+ end
29
+
30
+ # Return the predicted item (i.e. the dotted item with the dot at start)
31
+ # for this production.
32
+ # @return [Array<GrmAnalysis::DottedItem>]
33
+ def predicted_items
34
+ [@items.first]
35
+ end
36
+
37
+ # Return the reduce item (i.e. the dotted item with the dot at end)
38
+ # for this production.
39
+ # @return [Array<GrmAnalysis::DottedItem>]
40
+ def reduce_items
41
+ [@items.last]
42
+ end
43
+
44
+ # Return the next item given the provided item.
45
+ # In other words, advance the dot by one position.
46
+ # @param anItem [GrmAnalysis::DottedItem]
47
+ # @return [GrmAnalysis::DottedItem|NilClass]
48
+ def next_item(anItem)
49
+ return nil if anItem == @items.last
50
+
51
+ @items[anItem.position + 1]
52
+ end
53
+ end # module
54
+ end # module
55
+ end # module
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'token'
4
+
5
+ module Dendroid
6
+ module Lexical
7
+ # A literal (value) is a token that represents a data value in the parsed
8
+ # language. For instance, in Ruby data values such as strings, numbers,
9
+ # regular expression,... can appear directly in the source code as text.
10
+ # These are examples of literal values. One responsibility of a tokenizer/lexer is
11
+ # to convert the text representation into a corresponding value in a
12
+ # convenient format for the interpreter/compiler.
13
+ class Literal < Token
14
+ # @return [Object] The value expressed in one of the target datatype.
15
+ attr_reader :value
16
+
17
+ # Constructor.
18
+ # @param original [String] the piece of text from input
19
+ # @param pos [Dendroid::Lexical::TokenPosition] line, column position of token
20
+ # @param symbol [Dendroid::Syntax::Terminal, String]
21
+ # @param aValue [Object] value of the token in internal representation
22
+ def initialize(original, pos, symbol, aValue)
23
+ super(original, pos, symbol)
24
+ @value = aValue
25
+ end
26
+ end # class
27
+ end # module
28
+ end # module
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dendroid
4
+ # This module contains the core classes needed for lexical analysis.
5
+ # The lexical analysis (tokenization) aims to transform the input stream of characters
6
+ # into a sequence of tokens.
7
+ module Lexical
8
+ # A (lexical) token is an object created by a tokenizer (lexer)
9
+ # and passed to the parser. Such token object is created when a lexer
10
+ # detects that a sequence of characters(a lexeme) from the input stream
11
+ # is an instance of a terminal grammar symbol.
12
+ # Say, that in a particular language, the lexeme 'foo' is an occurrence
13
+ # of the terminal symbol IDENTIFIER. Then the lexer will return a Token
14
+ # object that states the fact that 'foo' is indeed an IDENTIFIER. Basically,
15
+ # a Token is a pair (lexeme, terminal): it asserts that a given piece of text
16
+ # is an instance of given terminal symbol.
17
+ class Token
18
+ # The sequence of character(s) from the input stream that is an occurrence
19
+ # of the related terminal symbol.
20
+ # @return [String] Input substring that is an instance of the terminal.
21
+ attr_reader :source
22
+
23
+ # @return [TokenPosition] The position -in "editor" coordinates- of the text in the source file.
24
+ attr_reader :position
25
+
26
+ # @return [String] The name of terminal symbol matching the text.
27
+ attr_reader :terminal
28
+
29
+ # Constructor.
30
+ # @param original [String] the piece of text from input
31
+ # @param pos [Dendroid::Lexical::TokenPosition] position of the token in source file
32
+ # @param symbol [Dendroid::Syntax::Terminal, String]
33
+ # The terminal symbol corresponding to the matching text.
34
+ def initialize(original, pos, symbol)
35
+ @source = original.dup
36
+ @position = pos
37
+ @terminal = symbol
38
+ end
39
+
40
+ # @return [String] The text representation of the token position
41
+ def pos_to_s
42
+ position.to_s
43
+ end
44
+ end # class
45
+ end # module
46
+ end # module
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dendroid
4
+ module Lexical
5
+ # Keeps track of the position of a token in the input stream.
6
+ class TokenPosition
7
+ # @return [Integer] The line number where the token begins
8
+ attr_reader :lineno
9
+
10
+ # @return [Integer] The column number where the token begins
11
+ attr_reader :column
12
+
13
+ # Constructor
14
+ # @param line [Integer] The line number where the token begins
15
+ # @param col [Integer] The column number where the token begins
16
+ def initialize(line, col)
17
+ @lineno = line
18
+ @column = col
19
+ end
20
+
21
+ # Return the position of the start of the token in line:col format
22
+ # @return [String]
23
+ def to_s
24
+ "#{lineno}:#{column}"
25
+ end
26
+ end # class
27
+ end # module
28
+ end # module
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dendroid
4
- # The namespace for all classes used to build a grammar.
4
+ # This module contains for all classes representing elements of .
5
5
  module Syntax
6
6
  # Abstract class for grammar symbols.
7
7
  # A grammar symbol is an element that appears in grammar rules.
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'strscan'
4
+ require_relative '../lexical/token_position'
5
+ require_relative '../lexical/literal'
6
+
7
+ module Dendroid
8
+ # This module contains helper classes (e.g. a tokenizer generator)
9
+ module Utils
10
+ # A basic tokenizer.
11
+ # Responsibility: break input into a sequence of token objects.
12
+ # This class defines a simple DSL to build a tokenizer.
13
+ class BaseTokenizer
14
+ # @return [StringScanner] Low-level input scanner
15
+ attr_reader :scanner
16
+
17
+ # @return [Integer] The current line number
18
+ attr_reader :lineno
19
+
20
+ # @return [Integer] Position of last start of line in the input string
21
+ attr_reader :line_start
22
+
23
+ # @return [Hash{Symbol, Array<Regexp>}]
24
+ attr_reader :actions
25
+
26
+ # Constructor
27
+ # @param aBlock [Proc]
28
+ def initialize(&aBlock)
29
+ @scanner = StringScanner.new('')
30
+ @actions = { skip: [], scan_verbatim: [], scan_value: [] }
31
+ defaults
32
+ return unless block_given?
33
+
34
+ instance_exec(&aBlock)
35
+ end
36
+
37
+ # Reset the tokenizer and set new text to tokenize
38
+ # @param source [String]
39
+ def input=(source)
40
+ reset
41
+ scanner.string = source
42
+ end
43
+
44
+ # Reset the tokenizer
45
+ def reset
46
+ @lineno = 1
47
+ @line_start = 0
48
+ scanner.reset
49
+ end
50
+
51
+ # action, pattern, terminal?, conversion?
52
+ # action = skip, skip_nl, scan
53
+
54
+ # Associate the provided pattern to the action of skipping a newline and
55
+ # incrementing the line counter.
56
+ # @param pattern [Regexp]
57
+ def skip_nl(pattern)
58
+ actions[:skip_nl] = pattern
59
+ end
60
+
61
+ # Associate the provided pattern with the action to skip whitespace(s).
62
+ # @param pattern [Regexp]
63
+ def skip_ws(pattern)
64
+ actions[:skip_ws] = pattern
65
+ end
66
+
67
+ # Associate the provided pattern with the action to skip the matching text.
68
+ # @param pattern [Regexp]
69
+ def skip(pattern)
70
+ if actions[:skip].empty?
71
+ actions[:skip] = pattern
72
+ else
73
+ new_pattern = actions[:skip].union(pattern)
74
+ actions[:skip] = new_pattern
75
+ end
76
+ end
77
+
78
+ # Associate the provided pattern with the action to tokenize the matching text
79
+ # @param pattern [Regexp]
80
+ def scan_verbatim(pattern)
81
+ patt = normalize_pattern(pattern)
82
+ if actions[:scan_verbatim].empty?
83
+ actions[:scan_verbatim] = patt
84
+ else
85
+ new_pattern = actions[:scan_verbatim].union(patt)
86
+ actions[:scan_verbatim] = new_pattern
87
+ end
88
+ end
89
+
90
+ # Associate the provided pattern with the action to tokenize the matching text
91
+ # as an instance of the given terminal symbol and convert the matching text into
92
+ # a value by using the given conversion.
93
+ # @param pattern [Regexp]
94
+ # @param terminal [Dendroid::Syntax::Terminal]
95
+ # @param conversion [Proc] a Proc (lambda) that takes a String as argument and return a value.
96
+ def scan_value(pattern, terminal, conversion)
97
+ patt = normalize_pattern(pattern)
98
+ tuple = [patt, terminal, conversion]
99
+ if actions[:scan_value].empty?
100
+ actions[:scan_value] = [tuple]
101
+ else
102
+ actions[:scan_verbatim] << tuple
103
+ end
104
+ end
105
+
106
+ # Set the mapping between a verbatim text to its corresponding terminal symbol name
107
+ # @param mapping [Hash{String, String}]
108
+ def map_verbatim2terminal(mapping)
109
+ @verbatim2terminal = mapping
110
+ end
111
+
112
+ # rubocop: disable Metrics/AbcSize
113
+
114
+ # Return the next token (if any) from the input stream.
115
+ # @return [Dendroid::Lexical::Token, NilClass]
116
+ def next_token
117
+ token = nil
118
+
119
+ # Loop until end of input reached or token found
120
+ until scanner.eos?
121
+ if scanner.skip(actions[:skip_nl])
122
+ next_line_scanned
123
+ next
124
+ end
125
+
126
+ next if scanner.skip(actions[:skip_ws]) # Skip whitespaces
127
+
128
+ if (text = scanner.scan(actions[:scan_verbatim]))
129
+ token = verbatim_scanned(text)
130
+ break
131
+ end
132
+
133
+ tuple = actions[:scan_value].find do |(pattern, _terminal, _conversion)|
134
+ scanner.check(pattern)
135
+ end
136
+ if tuple
137
+ (pattern, terminal, conversion) = tuple
138
+ text = scanner.scan(pattern)
139
+ token = value_scanned(text, terminal, conversion)
140
+ break
141
+ end
142
+
143
+ # Unknown token
144
+ col = scanner.pos - line_start + 1
145
+ erroneous = scanner.peek(1).nil? ? '' : scanner.scan(/./)
146
+ raise StandardError, "Error: [line #{lineno}:#{col}]: Unexpected character #{erroneous}."
147
+ end
148
+
149
+ token
150
+ end
151
+
152
+ # rubocop: enable Metrics/AbcSize
153
+
154
+ protected
155
+
156
+ def defaults
157
+ # Defaults
158
+ skip_nl(/(?:\r\n)|\r|\n/) # Skip newlines
159
+ skip_ws(/[ \t\f]+/) # Skip blanks
160
+ end
161
+
162
+ private
163
+
164
+ def normalize_pattern(pattern)
165
+ case pattern
166
+ when String
167
+ Regexp.new(Regexp.escape(pattern))
168
+ when Array
169
+ regexes = pattern.map { |patt| normalize_pattern(patt) }
170
+ Regexp.union(regexes)
171
+ else
172
+ pattern
173
+ end
174
+ end
175
+
176
+ def next_line_scanned
177
+ @lineno += 1
178
+ @line_start = scanner.pos
179
+ end
180
+
181
+ def verbatim_scanned(text)
182
+ symbol_name = @verbatim2terminal[text]
183
+ begin
184
+ lex_length = text ? text.size : 0
185
+ col = scanner.pos - lex_length - @line_start + 1
186
+ pos = Lexical::TokenPosition.new(@lineno, col)
187
+ token = Lexical::Token.new(text, pos, symbol_name)
188
+ rescue StandardError => e
189
+ puts "Failing with '#{symbol_name}' and '#{text}'"
190
+ raise e
191
+ end
192
+
193
+ token
194
+ end
195
+
196
+ def value_scanned(aText, aSymbolName, conversion)
197
+ value = conversion.call(aText)
198
+ lex_length = aText ? aText.size : 0
199
+ col = scanner.pos - lex_length - @line_start + 1
200
+ build_literal(aSymbolName, value, aText, col)
201
+ end
202
+
203
+ def build_literal(aSymbolName, aValue, aText, aPosition)
204
+ pos = if aPosition.is_a?(Integer)
205
+ col = aPosition
206
+ Lexical::TokenPosition.new(@lineno, col)
207
+ else
208
+ aPosition
209
+ end
210
+
211
+ Lexical::Literal.new(aText.dup, pos, aSymbolName, aValue)
212
+ end
213
+ end # class
214
+ end # module
215
+ end # module
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '..\..\spec_helper'
4
+ require_relative '..\..\..\lib\dendroid\syntax\terminal'
5
+ require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
6
+ require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
7
+ require_relative '..\..\..\lib\dendroid\syntax\production'
8
+ require_relative '..\..\..\lib\dendroid\grm_analysis\alternative_item'
9
+
10
+ describe Dendroid::GrmAnalysis::DottedItem do
11
+ # TODO
12
+ end # describe
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '..\..\spec_helper'
4
+ require_relative '..\..\..\lib\dendroid\syntax\terminal'
5
+ require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
6
+ require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
7
+ require_relative '..\..\..\lib\dendroid\syntax\choice'
8
+ # require_relative '..\..\..\lib\dendroid\grm_analysis\alternative_item'
9
+ require_relative '..\..\..\lib\dendroid\grm_analysis\choice_items'
10
+
11
+ describe Dendroid::GrmAnalysis::ChoiceItems do
12
+ let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
13
+ let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
14
+ let(:star_symb) { Dendroid::Syntax::Terminal.new('STAR') }
15
+ let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
16
+ let(:alt1) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
17
+ let(:alt2) { Dendroid::Syntax::SymbolSeq.new([num_symb, star_symb, num_symb]) }
18
+ let(:alt3) { Dendroid::Syntax::SymbolSeq.new([]) }
19
+ subject do
20
+ choice = Dendroid::Syntax::Choice.new(expr_symb, [alt1, alt2, alt3])
21
+ choice.extend(Dendroid::GrmAnalysis::ChoiceItems)
22
+ choice.build_items
23
+ choice
24
+ end
25
+
26
+ context 'Methods from mix-in' do
27
+ it 'builds items for given choice' do
28
+ expect(subject.items.size).to eq(subject.alternatives.size)
29
+ subject.items.each_with_index do |itemz, index|
30
+ expect(itemz.size).to eq(subject.alternatives[index].size + 1)
31
+ end
32
+ arr_items = subject.items[1]
33
+ arr_items.each_with_index do |item, pos|
34
+ expect(item.rule).to eq(subject)
35
+ expect(item.position).to eq(pos)
36
+ expect(item.alt_index).to eq(1)
37
+ end
38
+ sole_item = subject.items[2].first # empty alternative...
39
+ expect(sole_item.rule).to eq(subject)
40
+ expect(sole_item.position).to eq(0)
41
+ expect(sole_item.alt_index).to eq(2)
42
+ end
43
+
44
+ it 'returns the first (predicted) items of the choice' do
45
+ expect(subject.predicted_items.size).to eq(subject.alternatives.size)
46
+ expectations = [
47
+ subject.items[0].first,
48
+ subject.items[1].first,
49
+ subject.items[2].first
50
+ ]
51
+ expect(subject.predicted_items).to eq(expectations)
52
+ end
53
+
54
+ it 'returns the last (reduce) items of the choice' do
55
+ expect(subject.reduce_items.size).to eq(subject.alternatives.size)
56
+ expectations = [
57
+ subject.items[0].last,
58
+ subject.items[1].last,
59
+ subject.items[2].last
60
+ ]
61
+ expect(subject.reduce_items).to eq(expectations)
62
+ end
63
+
64
+ it 'returns the consecutive item to a given one' do
65
+ arr_items = subject.items[1]
66
+ (0..arr_items.size - 1).each do |pos|
67
+ curr_item = arr_items[pos]
68
+ next_one = subject.next_item(curr_item)
69
+ expect(next_one).to eq(arr_items[pos + 1])
70
+ end
71
+ expect(subject.next_item(arr_items.last)).to be_nil
72
+ end
73
+ end # context
74
+ end # describe
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../../../lib/dendroid/grm_dsl/base_grm_builder'
5
+ require_relative '../../../lib/dendroid/grm_analysis/grm_analyzer'
6
+
7
+ module SampleGrammars
8
+ def grammar_l1
9
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
10
+ # Grammar inspired from Wikipedia entry on Earley parsing
11
+ declare_terminals('PLUS', 'STAR', 'INTEGER')
12
+
13
+ rule('p' => 's')
14
+ rule('s' => ['s PLUS m', 'm'])
15
+ rule('m' => ['m STAR t', 't'])
16
+ rule('t' => 'INTEGER')
17
+ end
18
+
19
+ builder.grammar
20
+ end
21
+
22
+ def tokenizer_l1
23
+ Utils::BaseTokenizer.new do
24
+ map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
25
+
26
+ scan_verbatim(['+', '*'])
27
+ scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
28
+ end
29
+ end
30
+
31
+ def grammar_l2
32
+ builder = GrmDSL::BaseGrmBuilder.new do
33
+ # Grammar inspired from Loup Vaillant's example
34
+ # https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
35
+ declare_terminals('PLUS', 'MINUS', 'STAR', 'SLASH')
36
+ declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
37
+
38
+ rule('p' => 'sum')
39
+ rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
40
+ rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
41
+ rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
42
+ end
43
+
44
+ builder.grammar
45
+ end
46
+
47
+ def tokenizer_l2
48
+ Utils::BaseTokenizer.new do
49
+ map_verbatim2terminal({
50
+ '+' => :PLUS,
51
+ '-' => :MINUS,
52
+ '*' => :STAR,
53
+ '/' => :SLASH,
54
+ '(' => :LPAREN,
55
+ ')' => :RPAREN
56
+ })
57
+
58
+ scan_verbatim(['+', '-', '*', '/', '(', ')'])
59
+ scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
60
+ end
61
+ end
62
+
63
+ def grammar_l3
64
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
65
+ # Grammar inspired from Andrew Appel's example
66
+ # Modern Compiler Implementation in Java
67
+ declare_terminals('a', 'c', 'd')
68
+
69
+ rule('Z' => ['d', 'X Y Z'])
70
+ rule('Y' => ['', 'c'])
71
+ rule('X' => %w[Y a])
72
+ end
73
+
74
+ builder.grammar
75
+ end
76
+ end # module
77
+
78
+ describe Dendroid::GrmAnalysis::GrmAnalyzer do
79
+ include SampleGrammars
80
+ let(:grammar) { grammar_l1 }
81
+
82
+ subject { described_class.new(grammar) }
83
+
84
+ context 'Initialization:' do
85
+ it 'is initialized with a grammar' do
86
+ expect { described_class.new(grammar) }.not_to raise_error
87
+ end
88
+
89
+ it 'knows its related grammar' do
90
+ expect(subject.grammar).to eq(grammar)
91
+ end
92
+
93
+ it 'knows the dotted items' do
94
+ item_count = subject.grammar.rules.reduce(0) do |count, prod|
95
+ count + prod.items.flatten.size
96
+ end
97
+ expect(subject.items.size).to eq(item_count)
98
+ expected_items = [
99
+ 'p => . s',
100
+ 'p => s .',
101
+ 's => . s PLUS m',
102
+ 's => s . PLUS m',
103
+ 's => s PLUS . m',
104
+ 's => s PLUS m .',
105
+ 's => . m',
106
+ 's => m .',
107
+ 'm => . m STAR t',
108
+ 'm => m . STAR t',
109
+ 'm => m STAR . t',
110
+ 'm => m STAR t .',
111
+ 'm => . t',
112
+ 'm => t .',
113
+ 't => . INTEGER',
114
+ 't => INTEGER .'
115
+ ]
116
+ expect(subject.items.map(&:to_s)).to eq(expected_items)
117
+ end
118
+
119
+ it 'knows the item that follows a given dotted item' do
120
+ first_item = subject.items.find { |itm| itm.to_s == 'm => . m STAR t' }
121
+ second = subject.next_item(first_item)
122
+ expect(second.to_s).to eq('m => m . STAR t')
123
+ third = subject.next_item(second)
124
+ expect(third.to_s).to eq('m => m STAR . t')
125
+ fourth = subject.next_item(third)
126
+ expect(fourth.to_s).to eq('m => m STAR t .')
127
+ expect(subject.next_item(fourth)).to be_nil
128
+ end
129
+ end # context
130
+
131
+ context 'Provided services:' do
132
+ subject { described_class.new(grammar_l3) }
133
+ it 'constructs the FIRST sets of grammar symbols' do
134
+ expectations = {
135
+ 'a' => ['a'],
136
+ 'c' => ['c'],
137
+ 'd' => ['d'],
138
+ 'X' => %w[a c], # Add epsilon
139
+ 'Y' => ['c'], # Add epsilon
140
+ 'Z' => %w[a c d]
141
+ }
142
+ expectations.each_pair do |sym_name, first_names|
143
+ symb = subject.grammar.name2symbol[sym_name]
144
+ expected_first = first_names.map { |name| subject.grammar.name2symbol[name] }
145
+ expected_first << subject.epsilon if sym_name =~ /[XY]/
146
+ expect(subject.first_sets[symb]).to eq(Set.new(expected_first))
147
+ end
148
+ end
149
+
150
+ it 'constructs the FOLLOW sets for non-terminal symbols' do
151
+ expectations = {
152
+ 'Z' => [], # Add $$
153
+ 'Y' => %w[a c d],
154
+ 'X' => %w[a c d]
155
+ }
156
+ subject.send(:build_follow_sets)
157
+ expectations.each_pair do |sym_name, follow_names|
158
+ symb = subject.grammar.name2symbol[sym_name]
159
+ expected_follow = follow_names.map { |name| subject.grammar.name2symbol[name] }
160
+ expected_follow << subject.endmarker if sym_name == 'Z'
161
+ expect(subject.follow_sets[symb]).to eq(Set.new(expected_follow))
162
+ end
163
+ end
164
+ end # context
165
+ end # describe
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../../../lib/dendroid/syntax/terminal'
5
+ require_relative '../../../lib/dendroid/syntax/non_terminal'
6
+ require_relative '../../../lib/dendroid/syntax/symbol_seq'
7
+ require_relative '../../../lib/dendroid/syntax/production'
8
+ require_relative '../../../lib/dendroid/grm_analysis/production_items'
9
+
10
+ describe Dendroid::GrmAnalysis::ProductionItems do
11
+ let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
12
+ let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
13
+ let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
14
+ let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
15
+ let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
16
+ let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
17
+ let(:empty_prod) do
18
+ e = Dendroid::Syntax::Production.new(expr_symb, empty_body)
19
+ e.extend(Dendroid::GrmAnalysis::ProductionItems)
20
+ e.build_items
21
+ e
22
+ end
23
+
24
+ subject do
25
+ prod.extend(Dendroid::GrmAnalysis::ProductionItems)
26
+ prod.build_items
27
+ prod
28
+ end
29
+
30
+ context 'Methods from mix-in' do
31
+ it 'builds items for given non-empty production' do
32
+ expect(subject.items.size).to eq(subject.body.size + 1)
33
+ subject.items.each_with_index do |item, index|
34
+ expect(item.rule).to eq(subject)
35
+ expect(item.position).to eq(index)
36
+ end
37
+ end
38
+
39
+ it 'builds the item for given empty production' do
40
+ expect(empty_prod.items.size).to eq(1)
41
+ expect(empty_prod.items[0].rule).to eq(empty_prod)
42
+ expect(empty_prod.items[0].position).to eq(0)
43
+ end
44
+
45
+ it 'returns the first (predicted) item of the production' do
46
+ expect(subject.predicted_items).to eq([subject.items.first])
47
+ expect(empty_prod.predicted_items).to eq([empty_prod.items.first])
48
+ end
49
+
50
+ it 'returns the last (reduce) item of the production' do
51
+ expect(subject.reduce_items).to eq([subject.items.last])
52
+ expect(empty_prod.reduce_items).to eq([empty_prod.items.first])
53
+ end
54
+
55
+ # rubocop: disable Style/EachForSimpleLoop
56
+ it 'returns the consecutive item to a given one' do
57
+ (0..2).each do |pos|
58
+ curr_item = subject.items[pos]
59
+ next_one = subject.next_item(curr_item)
60
+ expect(next_one).to eq(subject.items[pos + 1])
61
+ end
62
+ expect(subject.next_item(subject.items[-1])).to be_nil
63
+
64
+ expect(empty_prod.next_item(empty_prod.items[-1])).to be_nil
65
+ end
66
+ # rubocop: enable Style/EachForSimpleLoop
67
+ end # context
68
+ end # describe
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '..\..\spec_helper'
4
+ require_relative '..\..\..\lib\dendroid\lexical\token_position'
5
+ require_relative '..\..\..\lib\dendroid\lexical\literal'
6
+
7
+ describe Dendroid::Lexical::Literal do
8
+ let(:ex_source) { '42' }
9
+ let(:ex_pos) { Dendroid::Lexical::TokenPosition.new(2, 5) }
10
+ let(:ex_terminal) { :INTEGER }
11
+ let(:ex_value) { 42 }
12
+ subject { described_class.new(ex_source, ex_pos, ex_terminal, ex_value) }
13
+
14
+ context 'Initialization:' do
15
+ it 'is initialized with a text, position, symbol name and value' do
16
+ expect { described_class.new(ex_source, ex_pos, ex_terminal, ex_value) }.not_to raise_error
17
+ end
18
+
19
+ it 'knows its value' do
20
+ expect(subject.value).to eq(ex_value)
21
+ end
22
+ end # context
23
+ end # describe
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../../../lib/dendroid/lexical/token_position'
5
+
6
+ describe Dendroid::Lexical::TokenPosition do
7
+ let(:ex_lineno) { 5 }
8
+ let(:ex_column) { 7 }
9
+
10
+ subject { described_class.new(ex_lineno, ex_column) }
11
+
12
+ context 'Initialization:' do
13
+ it 'is initialized with a line number and a column position' do
14
+ expect { described_class.new(ex_lineno, ex_column) }.not_to raise_error
15
+ end
16
+
17
+ it 'knows its line number' do
18
+ expect(subject.lineno).to eq(ex_lineno)
19
+ end
20
+
21
+ it 'knows its column number' do
22
+ expect(subject.column).to eq(ex_column)
23
+ end
24
+ end # context
25
+
26
+ context 'Provided services:' do
27
+ it 'renders a String representation of itself' do
28
+ expect(subject.to_s).to eq("#{ex_lineno}:#{ex_column}")
29
+ end
30
+ end # context
31
+ end # describe
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../../../lib/dendroid/lexical/token_position'
5
+ require_relative '../../../lib/dendroid/lexical/token'
6
+
7
+ describe Dendroid::Lexical::Token do
8
+ let(:ex_source) { 'else' }
9
+ let(:ex_pos) { Dendroid::Lexical::TokenPosition.new(2, 5) }
10
+ let(:ex_terminal) { 'ELSE' }
11
+ subject { described_class.new(ex_source, ex_pos, ex_terminal) }
12
+
13
+ context 'Initialization:' do
14
+ it 'is initialized with a text, position and symbol name' do
15
+ expect { described_class.new(ex_source, ex_pos, ex_terminal) }.not_to raise_error
16
+ end
17
+
18
+ it 'knows its source text' do
19
+ expect(subject.source).to eq(ex_source)
20
+ end
21
+
22
+ it 'knows its position' do
23
+ expect(subject.position).to eq(ex_pos)
24
+ expect(subject.pos_to_s).to eq('2:5')
25
+ end
26
+
27
+ it 'knows the terminal name' do
28
+ expect(subject.terminal).to eq(ex_terminal)
29
+ end
30
+ end # context
31
+ end # describe
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../../../lib/dendroid/utils/base_tokenizer'
5
+
6
+ describe Dendroid::Utils::BaseTokenizer do
7
+ # Implements a dotted item: expression => NUMBER . PLUS NUMBER
8
+ subject { described_class.new }
9
+
10
+ context 'Initialization:' do
11
+ it 'is initialized with an optional block' do
12
+ expect { described_class.new }.not_to raise_error
13
+ end
14
+
15
+ it 'has a scanner at start' do
16
+ expect(subject.scanner).to be_kind_of(StringScanner)
17
+ end
18
+
19
+ it 'initializes actions to defaults' do
20
+ expect(subject.actions).to be_member(:skip_nl)
21
+ expect(subject.actions).to be_member(:skip_ws)
22
+ end
23
+ end # context
24
+
25
+ context 'Tokenizing:' do
26
+ subject do
27
+ described_class.new do
28
+ scan_verbatim(['+', '*'])
29
+ scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
30
+ map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
31
+ end
32
+ end
33
+
34
+ it 'generates a sequence of tokens from a simple input' do
35
+ subject.input = '2 + 3 * 4'
36
+
37
+ expectations = [
38
+ ['1:1', '2', :INTEGER, 2],
39
+ ['1:3', '+', :PLUS, nil],
40
+ ['1:5', '3', :INTEGER, 3],
41
+ ['1:7', '*', :STAR, nil],
42
+ ['1:9', '4', :INTEGER, 4]
43
+ ]
44
+ expectations.each do |tuple|
45
+ tok = subject.next_token
46
+ %i[pos_to_s source terminal value].each_with_index do |message, index|
47
+ expect(tok.send(message)).to eq(tuple[index]) unless tuple[index].nil?
48
+ end
49
+ end
50
+
51
+ # No more token... 'next_token' method returns nil
52
+ expect(subject.next_token).to be_nil
53
+ end
54
+ end # context
55
+ end # describe
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.0.8
1
+ 0.0.10
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dendroid
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-30 00:00:00.000000000 Z
11
+ date: 2023-11-01 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: WIP. A Ruby implementation of a Earley parser
13
+ description: WIP. A Ruby implementation of an Earley parser
14
14
  email: famished.tiger@yahoo.com
15
15
  executables: []
16
16
  extensions: []
@@ -24,8 +24,15 @@ files:
24
24
  - bin/dendroid
25
25
  - dendroid.gemspec
26
26
  - lib/dendroid.rb
27
+ - lib/dendroid/grm_analysis/alternative_item.rb
28
+ - lib/dendroid/grm_analysis/choice_items.rb
27
29
  - lib/dendroid/grm_analysis/dotted_item.rb
30
+ - lib/dendroid/grm_analysis/grm_analyzer.rb
31
+ - lib/dendroid/grm_analysis/production_items.rb
28
32
  - lib/dendroid/grm_dsl/base_grm_builder.rb
33
+ - lib/dendroid/lexical/literal.rb
34
+ - lib/dendroid/lexical/token.rb
35
+ - lib/dendroid/lexical/token_position.rb
29
36
  - lib/dendroid/syntax/choice.rb
30
37
  - lib/dendroid/syntax/grammar.rb
31
38
  - lib/dendroid/syntax/grm_symbol.rb
@@ -34,8 +41,16 @@ files:
34
41
  - lib/dendroid/syntax/rule.rb
35
42
  - lib/dendroid/syntax/symbol_seq.rb
36
43
  - lib/dendroid/syntax/terminal.rb
44
+ - lib/dendroid/utils/base_tokenizer.rb
45
+ - spec/dendroid/grm_analysis/alternative_item_spec.rb
46
+ - spec/dendroid/grm_analysis/choice_items_spec.rb
37
47
  - spec/dendroid/grm_analysis/dotted_item_spec.rb
48
+ - spec/dendroid/grm_analysis/grm_analyzer_spec.rb
49
+ - spec/dendroid/grm_analysis/production_items_spec.rb
38
50
  - spec/dendroid/grm_dsl/base_grm_builder_spec.rb
51
+ - spec/dendroid/lexical/literal_spec.rb
52
+ - spec/dendroid/lexical/token_position_spec.rb
53
+ - spec/dendroid/lexical/token_spec.rb
39
54
  - spec/dendroid/syntax/choice_spec.rb
40
55
  - spec/dendroid/syntax/grammar_spec.rb
41
56
  - spec/dendroid/syntax/grm_symbol_spec.rb
@@ -44,6 +59,7 @@ files:
44
59
  - spec/dendroid/syntax/rule_spec.rb
45
60
  - spec/dendroid/syntax/symbol_seq_spec.rb
46
61
  - spec/dendroid/syntax/terminal_spec.rb
62
+ - spec/dendroid/utils/base_tokenizer_spec.rb
47
63
  - spec/spec_helper.rb
48
64
  - version.txt
49
65
  homepage: https://github.com/famished-tiger/Dendroid
@@ -68,5 +84,5 @@ requirements: []
68
84
  rubygems_version: 3.3.7
69
85
  signing_key:
70
86
  specification_version: 4
71
- summary: Dendroid. TODO
87
+ summary: WIP. A Ruby implementation of an Earley parser
72
88
  test_files: []