dendroid 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 171ca5c202ea7a9d5c156086cecee352539c4f0551556175cf748328d3fa0983
4
- data.tar.gz: a81434f0fe610433cce7d6f1213dc3a25d06f7264fda882ae4b5ec4867514b31
3
+ metadata.gz: 270fc74811d70652e19c4ed42cd11138a1fe9fc413e9b1856b982edfa28c5d51
4
+ data.tar.gz: 280351b252bd5c4a63f3082375053ea7d3bf9a9d0d32acc055dc33cce91ed628
5
5
  SHA512:
6
- metadata.gz: 7894b4b5abcabde582c9e16dc93df8e5aa77b9fed8691ea52901c802e957558c4353e009544b53d352e45d145807e2957e43468a1783cca92326b52eddddb140
7
- data.tar.gz: b4a8415a997fb45005668b03087dc6347db11dd692c87e6da5c9fa6cd0d609ee31b630e51e896627cf95b1a57a8f60f919dde9cf18c8fba4a4f08191dc28004b
6
+ metadata.gz: 7a34047f56f1f488377afd88c4049b935d03d8a0a902cd44f8ffba3d58578c212c5ef7f0b1229192a7f4606b1d683d70ca479273d45d716d98154a38663f233f
7
+ data.tar.gz: 36578ffb40a0463a2e411000b24fa8005166c1ede8f6a856293c0122e44fdbb46d3758159042db0c9c4ccacf9c1bf071e49cfb86a64792b98fac8bb89447a85a
data/.rubocop.yml CHANGED
@@ -20,7 +20,7 @@ Metrics/CyclomaticComplexity:
20
20
 
21
21
  Metrics/MethodLength:
22
22
  Enabled: true
23
- Max: 30
23
+ Max: 60
24
24
 
25
25
  Metrics/PerceivedComplexity:
26
26
  Enabled: true
@@ -32,5 +32,8 @@ Naming/MethodParameterName:
32
32
  Naming/VariableName:
33
33
  Enabled: false
34
34
 
35
+ Style/AccessorGrouping:
36
+ Enabled: false
37
+
35
38
  Style/CommentedKeyword:
36
39
  Enabled: false
data/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.0.10] - 2023-11-01
6
+ Added missing class and method documentation, fixed some `Rubocop` offenses.
7
+
8
+
5
9
  ## [0.0.9] - 2023-11-01
6
10
  Added classes for tokenization and grammar analysis.
7
11
 
data/dendroid.gemspec CHANGED
@@ -7,8 +7,8 @@ Gem::Specification.new do |s|
7
7
  path = ::File.dirname(libpath) + ::File::SEPARATOR
8
8
  ::File.read("#{path}version.txt").strip
9
9
  end
10
- s.summary = 'WIP. A Ruby implementation of a Earley parser'
11
- s.description = 'WIP. A Ruby implementation of a Earley parser'
10
+ s.summary = 'WIP. A Ruby implementation of an Earley parser'
11
+ s.description = 'WIP. A Ruby implementation of an Earley parser'
12
12
  s.authors = ['Dimitri Geshef']
13
13
  s.email = 'famished.tiger@yahoo.com'
14
14
  s.files = Dir['bin/dendroid',
@@ -61,11 +61,10 @@ module Dendroid
61
61
  private
62
62
 
63
63
  def valid_position(aPosition)
64
- raise Exception if aPosition < 0 || aPosition > rule.alternatives[alt_index].size
64
+ raise StandardError if aPosition.negative? || aPosition > rule.alternatives[alt_index].size
65
65
 
66
66
  aPosition
67
67
  end
68
68
  end # class
69
69
  end # module
70
70
  end # module
71
-
@@ -52,8 +52,9 @@ module Dendroid
52
52
  def next_item(anItem)
53
53
  items_arr = items[anItem.alt_index]
54
54
  return nil if anItem == items_arr.last
55
+
55
56
  items_arr[anItem.position + 1]
56
57
  end
57
58
  end # module
58
59
  end # module
59
- end # module
60
+ end # module
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dendroid
4
+ # This module contains classes that from the analysis of grammar rules help to build objects
5
+ # needed by a recognizer or a parser for the language.
4
6
  module GrmAnalysis
5
7
  # For a given production rule, a dotted item represents a recognition state.
6
8
  # The dot partitions the rhs of the rule in two parts:
@@ -5,19 +5,34 @@ require_relative '../grm_analysis/choice_items'
5
5
 
6
6
  module Dendroid
7
7
  module GrmAnalysis
8
- # An analyzer performs an analysis of the rules of a given grammar
9
- #
8
+ # An analyzer performs an analysis of the grammar rules and
9
+ # build objects (dotted items, first and follow sets) to be used
10
+ # by a recognizer or a parser.
10
11
  class GrmAnalyzer
12
+ # @return [Dendroid::Syntax::Grammar] The grammar subjected to analysis
11
13
  attr_reader :grammar
12
14
  attr_reader :items
13
15
  attr_reader :production2items
14
16
  attr_reader :symbol2productions
17
+
18
+ # @return [Dendroid::Syntax::Terminal] The pseudo-terminal `__epsilon` (for empty string)
15
19
  attr_reader :epsilon
20
+
21
+ # @return [Dendroid::Syntax::Terminal] The pseudo-terminal `$$` for end of input stream
16
22
  attr_reader :endmarker
23
+
24
+ # @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FIRST SETS mapping
17
25
  attr_reader :first_sets
26
+
27
+ # @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to PREDICT SETS mapping
18
28
  attr_reader :predict_sets
29
+
30
+ # @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FOLLOW SETS mapping
19
31
  attr_reader :follow_sets
20
32
 
33
+ # Constructor.
34
+ # Build dotted items, first, follow sets for the given grammar
35
+ # @param aGrammar [Dendroid::Syntax::Grammar]
21
36
  def initialize(aGrammar)
22
37
  @grammar = aGrammar
23
38
  @items = []
@@ -34,6 +49,8 @@ module Dendroid
34
49
  build_follow_sets
35
50
  end
36
51
 
52
+ # The next item of a given dotted item
53
+ # @param aDottedItem [DottedItem]
37
54
  def next_item(aDottedItem)
38
55
  prod = aDottedItem.rule
39
56
  prod.next_item(aDottedItem)
@@ -72,7 +89,7 @@ module Dendroid
72
89
  else
73
90
  first_head.merge(sequence_first(prod.body.members))
74
91
  end
75
- changed = true if (first_head.size > pre_first_size)
92
+ changed = true if first_head.size > pre_first_size
76
93
  end
77
94
  end until !changed
78
95
  end
@@ -84,7 +101,7 @@ module Dendroid
84
101
  elsif symb.nullable?
85
102
  first_sets[symb] = Set.new([epsilon])
86
103
  else
87
- first_sets[symb] = Set.new
104
+ first_sets[symb] = Set.new
88
105
  end
89
106
  end
90
107
  end
@@ -115,7 +132,7 @@ module Dendroid
115
132
 
116
133
  head = prod.head
117
134
  head_follow = follow_sets[head]
118
- trailer = Set.new
135
+ # trailer = Set.new
119
136
  last = true
120
137
  last_index = body.size - 1
121
138
  last_index.downto(0) do |i|
@@ -130,7 +147,7 @@ module Dendroid
130
147
  follow_sets[symbol].merge(head_follow) if symbol.nullable?
131
148
  last = false
132
149
  else
133
- symbol_seq = body.slice(i+1, last_index - i)
150
+ symbol_seq = body.slice(i + 1, last_index - i)
134
151
  trailer_first = sequence_first(symbol_seq)
135
152
  contains_epsilon = trailer_first.include? epsilon
136
153
  trailer_first.delete(epsilon) if contains_epsilon
@@ -146,7 +163,7 @@ module Dendroid
146
163
 
147
164
  head = prod.head
148
165
  head_follow = follow_sets[head]
149
- trailer = Set.new
166
+ # trailer = Set.new
150
167
  last = true
151
168
  last_index = body.size - 1
152
169
  last_index.downto(0) do |i|
@@ -161,7 +178,7 @@ module Dendroid
161
178
  follow_sets[symbol].merge(head_follow) if symbol.nullable?
162
179
  last = false
163
180
  else
164
- symbol_seq = body.slice(i+1, last_index - i)
181
+ symbol_seq = body.slice(i + 1, last_index - i)
165
182
  trailer_first = sequence_first(symbol_seq)
166
183
  contains_epsilon = trailer_first.include? epsilon
167
184
  trailer_first.delete(epsilon) if contains_epsilon
@@ -179,7 +196,7 @@ module Dendroid
179
196
  grammar.symbols.each do |symb|
180
197
  next if symb.terminal?
181
198
 
182
- follow_sets[symb] = Set.new
199
+ follow_sets[symb] = Set.new
183
200
  end
184
201
 
185
202
  # Initialize FOLLOW(start symbol) with end marker
@@ -14,7 +14,7 @@ module Dendroid
14
14
  @items = if empty?
15
15
  [DottedItem.new(self, 0)]
16
16
  else
17
- (0..body.size).reduce([]) do | result,pos|
17
+ (0..body.size).reduce([]) do |result, pos|
18
18
  result << GrmAnalysis::DottedItem.new(self, pos)
19
19
  end
20
20
  end
@@ -47,6 +47,7 @@ module Dendroid
47
47
  # @return [GrmAnalysis::DottedItem|NilClass]
48
48
  def next_item(anItem)
49
49
  return nil if anItem == @items.last
50
+
50
51
  @items[anItem.position + 1]
51
52
  end
52
53
  end # module
@@ -24,7 +24,7 @@ module Dendroid
24
24
  attr_reader :position
25
25
 
26
26
  # @return [String] The name of terminal symbol matching the text.
27
- attr :terminal
27
+ attr_reader :terminal
28
28
 
29
29
  # Constructor.
30
30
  # @param original [String] the piece of text from input
@@ -2,19 +2,27 @@
2
2
 
3
3
  module Dendroid
4
4
  module Lexical
5
+ # Keeps track of the position of a token in the input stream.
5
6
  class TokenPosition
7
+ # @return [Integer] The line number where the token begins
6
8
  attr_reader :lineno
9
+
10
+ # @return [Integer] The column number where the token begins
7
11
  attr_reader :column
8
12
 
13
+ # Constructor
14
+ # @param line [Integer] The line number where the token begins
15
+ # @param col [Integer] The column number where the token begins
9
16
  def initialize(line, col)
10
17
  @lineno = line
11
18
  @column = col
12
19
  end
13
20
 
21
+ # Return the position of the start of the token in line:col format
22
+ # @return [String]
14
23
  def to_s
15
24
  "#{lineno}:#{column}"
16
25
  end
17
26
  end # class
18
27
  end # module
19
28
  end # module
20
-
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dendroid
4
- # The namespace for all classes used to build a grammar.
4
+ # This module contains for all classes representing elements of .
5
5
  module Syntax
6
6
  # Abstract class for grammar symbols.
7
7
  # A grammar symbol is an element that appears in grammar rules.
@@ -5,44 +5,67 @@ require_relative '../lexical/token_position'
5
5
  require_relative '../lexical/literal'
6
6
 
7
7
  module Dendroid
8
+ # This module contains helper classes (e.g. a tokenizer generator)
8
9
  module Utils
10
+ # A basic tokenizer.
11
+ # Responsibility: break input into a sequence of token objects.
12
+ # This class defines a simple DSL to build a tokenizer.
9
13
  class BaseTokenizer
14
+ # @return [StringScanner] Low-level input scanner
10
15
  attr_reader :scanner
16
+
17
+ # @return [Integer] The current line number
11
18
  attr_reader :lineno
19
+
20
+ # @return [Integer] Position of last start of line in the input string
12
21
  attr_reader :line_start
22
+
23
+ # @return [Hash{Symbol, Array<Regexp>}]
13
24
  attr_reader :actions
14
25
 
26
+ # Constructor
27
+ # @param aBlock [Proc]
15
28
  def initialize(&aBlock)
16
29
  @scanner = StringScanner.new('')
17
30
  @actions = { skip: [], scan_verbatim: [], scan_value: [] }
18
31
  defaults
32
+ return unless block_given?
19
33
 
20
- if block_given?
21
- instance_exec(&aBlock)
22
- # grammar_complete!
23
- end
34
+ instance_exec(&aBlock)
24
35
  end
25
36
 
37
+ # Reset the tokenizer and set new text to tokenize
38
+ # @param source [String]
26
39
  def input=(source)
27
- scanner.string = source
28
40
  reset
41
+ scanner.string = source
29
42
  end
30
43
 
44
+ # Reset the tokenizer
31
45
  def reset
32
46
  @lineno = 1
33
47
  @line_start = 0
48
+ scanner.reset
34
49
  end
35
50
 
36
51
  # action, pattern, terminal?, conversion?
37
52
  # action = skip, skip_nl, scan
53
+
54
+ # Associate the provided pattern to the action of skipping a newline and
55
+ # incrementing the line counter.
56
+ # @param pattern [Regexp]
38
57
  def skip_nl(pattern)
39
58
  actions[:skip_nl] = pattern
40
59
  end
41
60
 
61
+ # Associate the provided pattern with the action to skip whitespace(s).
62
+ # @param pattern [Regexp]
42
63
  def skip_ws(pattern)
43
64
  actions[:skip_ws] = pattern
44
65
  end
45
66
 
67
+ # Associate the provided pattern with the action to skip the matching text.
68
+ # @param pattern [Regexp]
46
69
  def skip(pattern)
47
70
  if actions[:skip].empty?
48
71
  actions[:skip] = pattern
@@ -52,6 +75,8 @@ module Dendroid
52
75
  end
53
76
  end
54
77
 
78
+ # Associate the provided pattern with the action to tokenize the matching text
79
+ # @param pattern [Regexp]
55
80
  def scan_verbatim(pattern)
56
81
  patt = normalize_pattern(pattern)
57
82
  if actions[:scan_verbatim].empty?
@@ -62,9 +87,15 @@ module Dendroid
62
87
  end
63
88
  end
64
89
 
65
- def scan_value(pattern, terminal, convertion)
90
+ # Associate the provided pattern with the action to tokenize the matching text
91
+ # as an instance of the given terminal symbol and convert the matching text into
92
+ # a value by using the given conversion.
93
+ # @param pattern [Regexp]
94
+ # @param terminal [Dendroid::Syntax::Terminal]
95
+ # @param conversion [Proc] a Proc (lambda) that takes a String as argument and return a value.
96
+ def scan_value(pattern, terminal, conversion)
66
97
  patt = normalize_pattern(pattern)
67
- tuple = [patt, terminal, convertion]
98
+ tuple = [patt, terminal, conversion]
68
99
  if actions[:scan_value].empty?
69
100
  actions[:scan_value] = [tuple]
70
101
  else
@@ -72,10 +103,16 @@ module Dendroid
72
103
  end
73
104
  end
74
105
 
106
+ # Set the mapping between a verbatim text to its corresponding terminal symbol name
107
+ # @param mapping [Hash{String, String}]
75
108
  def map_verbatim2terminal(mapping)
76
109
  @verbatim2terminal = mapping
77
110
  end
78
111
 
112
+ # rubocop: disable Metrics/AbcSize
113
+
114
+ # Return the next token (if any) from the input stream.
115
+ # @return [Dendroid::Lexical::Token, NilClass]
79
116
  def next_token
80
117
  token = nil
81
118
 
@@ -93,7 +130,7 @@ module Dendroid
93
130
  break
94
131
  end
95
132
 
96
- tuple = actions[:scan_value].find do |(pattern, terminal, conversion)|
133
+ tuple = actions[:scan_value].find do |(pattern, _terminal, _conversion)|
97
134
  scanner.check(pattern)
98
135
  end
99
136
  if tuple
@@ -106,18 +143,20 @@ module Dendroid
106
143
  # Unknown token
107
144
  col = scanner.pos - line_start + 1
108
145
  erroneous = scanner.peek(1).nil? ? '' : scanner.scan(/./)
109
- raise Exception, "Error: [line #{lineno}:#{col}]: Unexpected character #{erroneous}."
146
+ raise StandardError, "Error: [line #{lineno}:#{col}]: Unexpected character #{erroneous}."
110
147
  end
111
148
 
112
149
  token
113
150
  end
114
151
 
152
+ # rubocop: enable Metrics/AbcSize
153
+
115
154
  protected
116
155
 
117
156
  def defaults
118
157
  # Defaults
119
- skip_nl /(?:\r\n)|\r|\n/ # Skip newlines
120
- skip_ws /[ \t\f]+/ # Skip blanks
158
+ skip_nl(/(?:\r\n)|\r|\n/) # Skip newlines
159
+ skip_ws(/[ \t\f]+/) # Skip blanks
121
160
  end
122
161
 
123
162
  private
@@ -146,7 +185,7 @@ module Dendroid
146
185
  col = scanner.pos - lex_length - @line_start + 1
147
186
  pos = Lexical::TokenPosition.new(@lineno, col)
148
187
  token = Lexical::Token.new(text, pos, symbol_name)
149
- rescue Exception => e
188
+ rescue StandardError => e
150
189
  puts "Failing with '#{symbol_name}' and '#{text}'"
151
190
  raise e
152
191
  end
@@ -154,15 +193,15 @@ module Dendroid
154
193
  token
155
194
  end
156
195
 
157
- def value_scanned(aText, aSymbolName, convertion)
158
- value = convertion.call(aText)
196
+ def value_scanned(aText, aSymbolName, conversion)
197
+ value = conversion.call(aText)
159
198
  lex_length = aText ? aText.size : 0
160
199
  col = scanner.pos - lex_length - @line_start + 1
161
200
  build_literal(aSymbolName, value, aText, col)
162
201
  end
163
202
 
164
203
  def build_literal(aSymbolName, aValue, aText, aPosition)
165
- pos = if aPosition.kind_of?(Integer)
204
+ pos = if aPosition.is_a?(Integer)
166
205
  col = aPosition
167
206
  Lexical::TokenPosition.new(@lineno, col)
168
207
  else
@@ -52,7 +52,8 @@ module SampleGrammars
52
52
  '*' => :STAR,
53
53
  '/' => :SLASH,
54
54
  '(' => :LPAREN,
55
- ')' => :RPAREN })
55
+ ')' => :RPAREN
56
+ })
56
57
 
57
58
  scan_verbatim(['+', '-', '*', '/', '(', ')'])
58
59
  scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
@@ -67,7 +68,7 @@ module SampleGrammars
67
68
 
68
69
  rule('Z' => ['d', 'X Y Z'])
69
70
  rule('Y' => ['', 'c'])
70
- rule('X' => ['Y', 'a'])
71
+ rule('X' => %w[Y a])
71
72
  end
72
73
 
73
74
  builder.grammar
@@ -91,7 +92,7 @@ describe Dendroid::GrmAnalysis::GrmAnalyzer do
91
92
 
92
93
  it 'knows the dotted items' do
93
94
  item_count = subject.grammar.rules.reduce(0) do |count, prod|
94
- count += prod.items.flatten.size
95
+ count + prod.items.flatten.size
95
96
  end
96
97
  expect(subject.items.size).to eq(item_count)
97
98
  expected_items = [
@@ -116,7 +117,7 @@ describe Dendroid::GrmAnalysis::GrmAnalyzer do
116
117
  end
117
118
 
118
119
  it 'knows the item that follows a given dotted item' do
119
- first_item = subject.items.find { |itm| itm.to_s == 'm => . m STAR t' }
120
+ first_item = subject.items.find { |itm| itm.to_s == 'm => . m STAR t' }
120
121
  second = subject.next_item(first_item)
121
122
  expect(second.to_s).to eq('m => m . STAR t')
122
123
  third = subject.next_item(second)
@@ -134,9 +135,9 @@ describe Dendroid::GrmAnalysis::GrmAnalyzer do
134
135
  'a' => ['a'],
135
136
  'c' => ['c'],
136
137
  'd' => ['d'],
137
- 'X' => ['a', 'c'], # Add epsilon
138
+ 'X' => %w[a c], # Add epsilon
138
139
  'Y' => ['c'], # Add epsilon
139
- 'Z' => ['a', 'c', 'd']
140
+ 'Z' => %w[a c d]
140
141
  }
141
142
  expectations.each_pair do |sym_name, first_names|
142
143
  symb = subject.grammar.name2symbol[sym_name]
@@ -149,8 +150,8 @@ describe Dendroid::GrmAnalysis::GrmAnalyzer do
149
150
  it 'constructs the FOLLOW sets for non-terminal symbols' do
150
151
  expectations = {
151
152
  'Z' => [], # Add $$
152
- 'Y' => ['a', 'c', 'd'],
153
- 'X' => ['a', 'c', 'd']
153
+ 'Y' => %w[a c d],
154
+ 'X' => %w[a c d]
154
155
  }
155
156
  subject.send(:build_follow_sets)
156
157
  expectations.each_pair do |sym_name, follow_names|
@@ -28,4 +28,4 @@ describe Dendroid::Lexical::TokenPosition do
28
28
  expect(subject.to_s).to eq("#{ex_lineno}:#{ex_column}")
29
29
  end
30
30
  end # context
31
- end # describe
31
+ end # describe
@@ -9,7 +9,7 @@ describe Dendroid::Utils::BaseTokenizer do
9
9
 
10
10
  context 'Initialization:' do
11
11
  it 'is initialized with an optional block' do
12
- expect {described_class.new }.not_to raise_error
12
+ expect { described_class.new }.not_to raise_error
13
13
  end
14
14
 
15
15
  it 'has a scanner at start' do
@@ -24,11 +24,11 @@ describe Dendroid::Utils::BaseTokenizer do
24
24
 
25
25
  context 'Tokenizing:' do
26
26
  subject do
27
- described_class.new {
27
+ described_class.new do
28
28
  scan_verbatim(['+', '*'])
29
29
  scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
30
30
  map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
31
- }
31
+ end
32
32
  end
33
33
 
34
34
  it 'generates a sequence of tokens from a simple input' do
@@ -43,7 +43,7 @@ describe Dendroid::Utils::BaseTokenizer do
43
43
  ]
44
44
  expectations.each do |tuple|
45
45
  tok = subject.next_token
46
- [:pos_to_s, :source, :terminal, :value].each_with_index do |message, index|
46
+ %i[pos_to_s source terminal value].each_with_index do |message, index|
47
47
  expect(tok.send(message)).to eq(tuple[index]) unless tuple[index].nil?
48
48
  end
49
49
  end
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.0.9
1
+ 0.0.10
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dendroid
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
@@ -10,7 +10,7 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2023-11-01 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: WIP. A Ruby implementation of a Earley parser
13
+ description: WIP. A Ruby implementation of an Earley parser
14
14
  email: famished.tiger@yahoo.com
15
15
  executables: []
16
16
  extensions: []
@@ -84,5 +84,5 @@ requirements: []
84
84
  rubygems_version: 3.3.7
85
85
  signing_key:
86
86
  specification_version: 4
87
- summary: WIP. A Ruby implementation of a Earley parser
87
+ summary: WIP. A Ruby implementation of an Earley parser
88
88
  test_files: []