sequitur 0.1.24 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/Gemfile +2 -4
- data/LICENSE.txt +1 -1
- data/README.md +2 -3
- data/appveyor.yml +10 -10
- data/examples/inductive_english.rb +35 -0
- data/lib/sequitur/constants.rb +6 -5
- data/lib/sequitur/digram.rb +9 -9
- data/lib/sequitur/dynamic_grammar.rb +9 -8
- data/lib/sequitur/formatter/base_formatter.rb +1 -1
- data/lib/sequitur/formatter/base_text.rb +5 -2
- data/lib/sequitur/formatter/debug.rb +10 -3
- data/lib/sequitur/grammar_visitor.rb +6 -6
- data/lib/sequitur/production.rb +20 -17
- data/lib/sequitur/production_ref.rb +9 -8
- data/lib/sequitur/symbol_sequence.rb +6 -6
- data/lib/sequitur.rb +2 -1
- data/sig/lib/sequitur/constants.rbs +10 -0
- data/sig/lib/sequitur/digram.rbs +37 -0
- data/sig/lib/sequitur/dynamic_grammar.rbs +58 -0
- data/sig/lib/sequitur/formatter/base_formatter.rbs +20 -0
- data/sig/lib/sequitur/formatter/base_text.rbs +62 -0
- data/sig/lib/sequitur/formatter/debug.rbs +89 -0
- data/sig/lib/sequitur/production.rbs +120 -0
- data/sig/lib/sequitur/production_ref.rbs +73 -0
- data/sig/lib/sequitur/sequitur_grammar.rbs +55 -0
- data/sig/lib/sequitur/symbol_sequence.rbs +83 -0
- data/sig/lib/sequitur.rbs +9 -0
- data/spec/sequitur/symbol_sequence_spec.rb +1 -4
- metadata +47 -17
data/lib/sequitur.rb
CHANGED
@@ -9,10 +9,11 @@ require_relative './sequitur/sequitur_grammar'
|
|
9
9
|
require_relative './sequitur/formatter/debug'
|
10
10
|
require_relative './sequitur/formatter/base_text'
|
11
11
|
|
12
|
+
# Namespace for the classes of sequitur gem.
|
12
13
|
module Sequitur
|
13
14
|
# Build a Sequitur-generated grammar based on the sequence of input tokens.
|
14
15
|
#
|
15
|
-
# @param tokens [
|
16
|
+
# @param tokens [String, Enumerator] The input sequence of input tokens.
|
16
17
|
# Can be a sequence of characters (i.e. a String) or an Enumerator.
|
17
18
|
# Tokens returned by enumerator should respond to the :hash message.
|
18
19
|
# @return [SequiturGrammar] a grammar that encodes the input.
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# In linguistics, a digram is a sequence of two letters.
|
3
|
+
# In Sequitur, a digram is a sequence of two consecutive symbols that
|
4
|
+
# appear in a production rule. Each symbol in a digram
|
5
|
+
# can be a terminal or not.
|
6
|
+
class Digram
|
7
|
+
# The sequence of two consecutive grammar symbols.
|
8
|
+
# @return [Array<String, Symbol>] The two symbols should respond to the :hash message.
|
9
|
+
attr_reader symbols: Array<String|Symbol>
|
10
|
+
|
11
|
+
# @return [String] An unique hash key of the digram
|
12
|
+
attr_reader key: String
|
13
|
+
|
14
|
+
# @return [Sequitur::Production] The production in which the digram occurs
|
15
|
+
attr_reader production: Sequitur::Production
|
16
|
+
|
17
|
+
# Constructor.
|
18
|
+
# A digram represents a sequence of two symbols
|
19
|
+
# (that appears in a rhs of a production).
|
20
|
+
# Terminal symbols must respond to the :hash message.
|
21
|
+
# @param symbol1 [String, Symbol] First element of the digram
|
22
|
+
# @param symbol2 [String, Symbol] Second element of the digram
|
23
|
+
# @param aProduction [Sequitur::Production] Production in which the RHS
|
24
|
+
# the sequence symbol1 symbol2 appears.
|
25
|
+
def initialize: ((String | Symbol) symbol1, (String | Symbol) symbol2, Sequitur::Production aProduction) -> void
|
26
|
+
|
27
|
+
# Equality testing.
|
28
|
+
# true iff keys of both digrams are equal, false otherwise
|
29
|
+
# @param other [Sequitur::Digram] another to compare with
|
30
|
+
# @return [TrueClass, FalseClass]
|
31
|
+
def ==: (Sequitur::Digram other) -> bool
|
32
|
+
|
33
|
+
# Does the digram consists of twice the same symbols?
|
34
|
+
# @return [TrueClass, FalseClass] true when symbols.first == symbols.last
|
35
|
+
def repeating?: () -> bool
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# A dynamic grammar is a context-free grammar that can be built incrementally.
|
3
|
+
# Formally, a grammar has:
|
4
|
+
# One start production
|
5
|
+
# Zero or more other productions
|
6
|
+
# Each production has a rhs that is a sequence of grammar symbols.
|
7
|
+
# Grammar symbols are categorized into
|
8
|
+
# -terminal symbols (i.e. String, Ruby Symbol,...)
|
9
|
+
# -non-terminal symbols (i.e. ProductionRef)
|
10
|
+
class DynamicGrammar
|
11
|
+
# @return [Sequitur::Production] Link to the start production.
|
12
|
+
attr_reader start: Sequitur::Production
|
13
|
+
|
14
|
+
# @return [Array<Sequitur::Production>] The set of production rules of the grammar
|
15
|
+
attr_reader productions: Array[Sequitur::Production]
|
16
|
+
|
17
|
+
# @return [TrueClass, FalseClass] Trace the execution of the algorithm.
|
18
|
+
attr_accessor trace: bool
|
19
|
+
|
20
|
+
# Constructor.
|
21
|
+
# Build a grammar with one empty rule as start/start rule.
|
22
|
+
def initialize: () -> void
|
23
|
+
|
24
|
+
# Emit a text representation of the grammar.
|
25
|
+
# Each production rule is emitted per line.
|
26
|
+
# @return [String]
|
27
|
+
def to_string: () -> String
|
28
|
+
|
29
|
+
# Add a given production to the grammar.
|
30
|
+
# @param aProduction [Sequitur::Production]
|
31
|
+
# @return [Array<Sequitur::Production>]
|
32
|
+
def add_production: (Sequitur::Production aProduction) -> Array[Sequitur::Production]
|
33
|
+
|
34
|
+
# Remove a production with given index from the grammar
|
35
|
+
# @param anIndex [Integer]
|
36
|
+
# @return [Sequitur::Production] the production removed from the grammar.
|
37
|
+
def remove_production: (Integer anIndex) -> Sequitur::Production
|
38
|
+
|
39
|
+
# Add the given token to the grammar.
|
40
|
+
# Append the token to the rhs of the start/start rule.
|
41
|
+
# @param aToken [Object] input token to add
|
42
|
+
def add_token: (untyped aToken) -> untyped
|
43
|
+
|
44
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
45
|
+
# A visitee is expected to accept the visit from a visitor object
|
46
|
+
# @param aVisitor [Sequitur::GrammarVisitor] the visitor object
|
47
|
+
def accept: (Sequitur::GrammarVisitor aVisitor) -> untyped
|
48
|
+
|
49
|
+
# Factory method. Returns a visitor for this grammar.
|
50
|
+
# @return [Sequitur::GrammarVisitor]
|
51
|
+
def visitor: () -> Sequitur::GrammarVisitor
|
52
|
+
|
53
|
+
# Append a given symbol to the rhs of passed production.
|
54
|
+
# @param aProduction [Production]
|
55
|
+
# @param aSymbol [Object]
|
56
|
+
def append_symbol_to: (Production aProduction, untyped aSymbol) -> untyped
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Namespace dedicated to grammar formatters.
|
3
|
+
module Formatter
|
4
|
+
# Superclass for grammar formatters.
|
5
|
+
class BaseFormatter
|
6
|
+
# The IO output stream in which the formatter's result will be sent.
|
7
|
+
attr_accessor output: untyped
|
8
|
+
|
9
|
+
# Constructor.
|
10
|
+
# @param anIO [IO] an output IO where the formatter's result will
|
11
|
+
# be placed.
|
12
|
+
def initialize: (untyped anIO) -> void
|
13
|
+
|
14
|
+
# Given a grammar or a grammar visitor, perform the visit
|
15
|
+
# and render the visit events in the output stream.
|
16
|
+
# @param aGrmOrVisitor [DynamicGrammar, GrammarVisitor]
|
17
|
+
def render: ((DynamicGrammar | GrammarVisitor) aGrmOrVisitor) -> untyped
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Sequitur
|
2
|
+
module Formatter
|
3
|
+
# A formatter class that can render a dynamic grammar in plain text.
|
4
|
+
# @example
|
5
|
+
# some_grammar = ... # Points to a DynamicGrammar-like object
|
6
|
+
# # Output the result to the standard console output
|
7
|
+
# formatter = Sequitur::Formatter::BaseText.new(STDOUT)
|
8
|
+
# # Render the grammar (through a visitor)
|
9
|
+
# formatter.run(some_grammar.visitor)
|
10
|
+
class BaseText < BaseFormatter
|
11
|
+
attr_reader prod_lookup: ::Hash[Production, Integer]
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
# @param anIO [IO] The output stream to which the rendered grammar
|
15
|
+
# is written.
|
16
|
+
def initialize: (IO anIO) -> void
|
17
|
+
|
18
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
19
|
+
# Notification of a visit event: the visitor is about to visit a grammar
|
20
|
+
# @param aGrammar [DynamicGrammar]
|
21
|
+
def before_grammar: (DynamicGrammar aGrammar) -> untyped
|
22
|
+
|
23
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
24
|
+
# Notification of a visit event: the visitor is about to visit
|
25
|
+
# a production
|
26
|
+
# @param aProduction [Production]
|
27
|
+
def before_production: (Production aProduction) -> untyped
|
28
|
+
|
29
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
30
|
+
# Notification of a visit event: the visitor is about to visit
|
31
|
+
# the rhs of a production
|
32
|
+
# @param _ [Array]
|
33
|
+
def before_rhs: (::Array[untyped] _)
|
34
|
+
|
35
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
36
|
+
# Notification of a visit event: the visitor is about to visit
|
37
|
+
# a terminal symbol from the rhs of a production
|
38
|
+
# @param aSymbol [Object]
|
39
|
+
def before_terminal: (untyped aSymbol) -> untyped
|
40
|
+
|
41
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
42
|
+
# Notification of a visit event: the visitor is about to visit
|
43
|
+
# a non-terminal (= an allusion to a production) in the rhs of a
|
44
|
+
# production
|
45
|
+
# @param aProduction [Production] a production occurring in the rhs
|
46
|
+
def before_non_terminal: (Production aProduction) -> untyped
|
47
|
+
|
48
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
49
|
+
# Notification of a visit event: the visitor complete the visit
|
50
|
+
# of a production
|
51
|
+
# @param _ [Production]
|
52
|
+
def after_production: (Production _) -> untyped
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# Generate a name of a given production.
|
57
|
+
# @param aProduction [Production]
|
58
|
+
# @return [String]
|
59
|
+
def prod_name: (Production aProduction) -> String
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Sequitur
|
2
|
+
module Formatter
|
3
|
+
# A formatter class that can render the notification events
|
4
|
+
# from a grammar visitor
|
5
|
+
# @example
|
6
|
+
# some_grammar = ... # Points to a DynamicGrammar-like object
|
7
|
+
# # Output the result to the standard console output
|
8
|
+
# formatter = Sequitur::Formatter::Debug.new(STDOUT)
|
9
|
+
# # Render the visit notifications
|
10
|
+
# formatter.run(some_grammar.visitor)
|
11
|
+
class Debug < BaseFormatter
|
12
|
+
# @return [Integer] Current indentation level
|
13
|
+
attr_accessor indentation: Integer
|
14
|
+
|
15
|
+
# Constructor.
|
16
|
+
# @param anIO [IO] The output stream to which the rendered grammar
|
17
|
+
# is written.
|
18
|
+
def initialize: (IO anIO) -> void
|
19
|
+
|
20
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
21
|
+
# Notification of a visit event: the visitor is about to visit a grammar
|
22
|
+
# @param _ [DynamicGrammar]
|
23
|
+
def before_grammar: (DynamicGrammar _) -> untyped
|
24
|
+
|
25
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
26
|
+
# Notification of a visit event: the visitor is about to visit
|
27
|
+
# a production
|
28
|
+
# @param _ [Production]
|
29
|
+
def before_production: (Production _) -> untyped
|
30
|
+
|
31
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
32
|
+
# Notification of a visit event: the visitor is about to visit
|
33
|
+
# the rhs of a production
|
34
|
+
# @param _ [Array]
|
35
|
+
def before_rhs: (::Array[untyped] _) -> untyped
|
36
|
+
|
37
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
38
|
+
# Notification of a visit event: the visitor is about to visit
|
39
|
+
# a terminal symbol from the rhs of a production
|
40
|
+
# @param _ [Object]
|
41
|
+
def before_terminal: (untyped _) -> untyped
|
42
|
+
|
43
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
44
|
+
# Notification of a visit event: the visitor completed the visit of
|
45
|
+
# a terminal symbol from the rhs of a production
|
46
|
+
# @param _ [Object]
|
47
|
+
def after_terminal: (untyped _) -> untyped
|
48
|
+
|
49
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
50
|
+
# Notification of a visit event: the visitor is about to visit
|
51
|
+
# a non-terminal (= an allusion to a production) in the rhs of a
|
52
|
+
# production
|
53
|
+
# @param _ [Production] a production occurring in the rhs
|
54
|
+
def before_non_terminal: (Production _) -> untyped
|
55
|
+
|
56
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
57
|
+
# Notification of a visit event: the visitor completed the visit of
|
58
|
+
# a non-terminal symbol from the rhs of a production.
|
59
|
+
# @param _ [Object]
|
60
|
+
def after_non_terminal: (untyped _) -> untyped
|
61
|
+
|
62
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
63
|
+
# Notification of a visit event: the visitor completed the visit of
|
64
|
+
# the rhs of a production
|
65
|
+
# @param _ [Array]
|
66
|
+
def after_rhs: (::Array[untyped] _) -> untyped
|
67
|
+
|
68
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
69
|
+
# Notification of a visit event: the visitor completed the visit
|
70
|
+
# of a production
|
71
|
+
# @param _ [Production]
|
72
|
+
def after_production: (Production _) -> untyped
|
73
|
+
|
74
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
75
|
+
# Notification of a visit event: the visitor completed the visit
|
76
|
+
# of a grammar
|
77
|
+
# @param _ [DynamicGrammar]
|
78
|
+
def after_grammar: (DynamicGrammar _) -> untyped
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def indent: () -> Integer
|
83
|
+
|
84
|
+
def dedent: () -> Integer
|
85
|
+
|
86
|
+
def output_event: (Symbol anEvent, Integer indentationLevel) -> nil
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# In a context-free grammar, a production is a rule in which
|
3
|
+
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
4
|
+
# and the right-hand side (RHS) consists of a sequence of symbols.
|
5
|
+
# The symbols in RHS can be either terminal or non-terminal symbols.
|
6
|
+
# The rule stipulates that the LHS is equivalent to the RHS,
|
7
|
+
# in other words every occurrence of the LHS can be substituted to
|
8
|
+
# corresponding RHS.
|
9
|
+
# Implementation note: the object id of the production is taken as its LHS.
|
10
|
+
class Production
|
11
|
+
# @return [Sequitur::SymbolSequence] The right-hand side (rhs)
|
12
|
+
# consists of a sequence of grammar symbols
|
13
|
+
attr_reader rhs: untyped
|
14
|
+
|
15
|
+
# @return [Integer] The reference count (= how times other productions reference this one)
|
16
|
+
attr_reader refcount: untyped
|
17
|
+
|
18
|
+
# @return [Array<Sequitur::Digram>] The sequence of digrams appearing in the RHS
|
19
|
+
attr_reader digrams: untyped
|
20
|
+
|
21
|
+
# Constructor.
|
22
|
+
# Build a production with an empty RHS.
|
23
|
+
def initialize: () -> void
|
24
|
+
|
25
|
+
# Identity testing.
|
26
|
+
# @param other [Production, ProductionRef] another production or production reference.
|
27
|
+
# @return [TrueClass, FalseClass] true when the receiver and other are the same.
|
28
|
+
def ==: ((Production | ProductionRef) other) -> bool
|
29
|
+
|
30
|
+
# Is the rhs empty?
|
31
|
+
# @return [TrueClass, FalseClass] true if the rhs has no members.
|
32
|
+
def empty?: () -> bool
|
33
|
+
|
34
|
+
# Increment the reference count by one.
|
35
|
+
# @return [Integer]
|
36
|
+
def incr_refcount: () -> Integer
|
37
|
+
|
38
|
+
# Decrement the reference count by one.
|
39
|
+
# @return [Integer]
|
40
|
+
def decr_refcount: () -> Integer
|
41
|
+
|
42
|
+
# Select the references to production appearing in the rhs.
|
43
|
+
# @return [Array<ProductionRef>]
|
44
|
+
def references: () -> Array[Sequitur::ProductionRef]
|
45
|
+
|
46
|
+
# Look in the rhs all the references to a production passed a argument.
|
47
|
+
# @param a_prod [Production, ProductionRef] The production to search for.
|
48
|
+
# @return [Array<ProductionRef>]
|
49
|
+
def references_of: ((Production | ProductionRef) a_prod) -> Array[ProductionRef]
|
50
|
+
|
51
|
+
# Enumerate the digrams appearing in the right-hand side (rhs)
|
52
|
+
# @return [Array<Sequitur::Digram>] the list of digrams found in rhs of this production.
|
53
|
+
def recalc_digrams: () -> Array[Sequitur::Digram]
|
54
|
+
|
55
|
+
# Does the rhs have exactly one digram only (= 2 symbols)?
|
56
|
+
# @return [TrueClass, FalseClass] true when the rhs contains exactly two symbols.
|
57
|
+
def single_digram?: () -> bool
|
58
|
+
|
59
|
+
# Detect whether the last digram occurs twice
|
60
|
+
# Assumption: when a digram occurs twice in a production then it must occur
|
61
|
+
# at the end of the rhs
|
62
|
+
# @return [TrueClass, FalseClass] true when the digram occurs twice in rhs.
|
63
|
+
def repeated_digram?: () -> bool
|
64
|
+
|
65
|
+
# Retrieve the last digram appearing in the RHS (if any).
|
66
|
+
# @return [Sequitur::Digram, NilClass] last digram in the rhs otherwise nil.
|
67
|
+
def last_digram: () -> (nil | Sequitur::Digram)
|
68
|
+
|
69
|
+
# Emit a text representation of the production rule.
|
70
|
+
# Text is of the form:
|
71
|
+
# object id of production : rhs as space-separated sequence of symbols.
|
72
|
+
# @return [String]
|
73
|
+
def to_string: () -> ::String
|
74
|
+
|
75
|
+
# Add a (grammar) symbol at the end of the RHS.
|
76
|
+
# @param aSymbol [Object] A (grammar) symbol to add.
|
77
|
+
def append_symbol: (untyped aSymbol) -> untyped
|
78
|
+
|
79
|
+
# Clear the right-hand side.
|
80
|
+
# Any referenced production has its reference counter decremented.
|
81
|
+
def clear_rhs: () -> untyped
|
82
|
+
|
83
|
+
# Find all the positions where the digram occurs in the rhs
|
84
|
+
# @param symb1 [Object] first symbol of the digram
|
85
|
+
# @param symb2 [Object] second symbol of the digram
|
86
|
+
# @return [Array<Integer>] the list of indices where the digram occurs in rhs.
|
87
|
+
# @example
|
88
|
+
# # Given the production p : a b c a b a b d
|
89
|
+
# #Then ...
|
90
|
+
# p.positions_of(a, b) # => [0, 3, 5]
|
91
|
+
# # Caution: "overlapping" digrams shouldn't be counted
|
92
|
+
# # Given the production p : a a b a a a c d
|
93
|
+
# # Then ...
|
94
|
+
# p.positions_of(a, a) # => [0, 3]
|
95
|
+
def positions_of: (untyped symb1, untyped symb2) -> Array[Integer]
|
96
|
+
|
97
|
+
# Given that the production P passed as argument has exactly 2 symbols
|
98
|
+
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
99
|
+
# s1 s2 by a reference to P.
|
100
|
+
# @param another [Production, ProductionRef] a production that
|
101
|
+
# consists exactly of one digram (= 2 symbols).
|
102
|
+
def reduce_step: ((Production | ProductionRef) another) -> untyped
|
103
|
+
|
104
|
+
# Replace every occurrence of 'another' production in self.rhs by
|
105
|
+
# the symbols in the rhs of 'another'.
|
106
|
+
# @param another [Production, ProductionRef] a production that
|
107
|
+
# consists exactly of one digram (= 2 symbols).
|
108
|
+
# @example Synopsis
|
109
|
+
# # Given the production p_A : a p_B b p_B c
|
110
|
+
# # And the production p_B : x y
|
111
|
+
# # Then...
|
112
|
+
# p_A.derive_step(p_B)
|
113
|
+
# #Modifies p_A as into: p_A -> a x y b x y c
|
114
|
+
def derive_step: ((Production | ProductionRef) another) -> untyped
|
115
|
+
|
116
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
117
|
+
# @param aVisitor[Sequitur::GrammarVisitor]
|
118
|
+
def accept: (Sequitur::GrammarVisitor aVisitor) -> untyped
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
3
|
+
# side of a production P1 and that refers to a production P2.
|
4
|
+
# Every time a production P2 appears in the left-hand side of
|
5
|
+
# production P1, this is implemented by inserting a production reference to P2
|
6
|
+
# in the appropriate position in the RHS of P1.
|
7
|
+
# In the literature, production references are also called non terminal
|
8
|
+
# symbols
|
9
|
+
# @example
|
10
|
+
# # Given a production rule...
|
11
|
+
# prod = Sequitur::Production.new
|
12
|
+
# puts prod.refcount # outputs 0
|
13
|
+
# # ... Build a reference to it
|
14
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
15
|
+
# # ... Production reference count is updated...
|
16
|
+
# puts prod.refcount # outputs 1
|
17
|
+
class ProductionRef
|
18
|
+
# @return [Sequitur::Production] Link to the production to reference.
|
19
|
+
attr_reader production: Sequitur::Production
|
20
|
+
|
21
|
+
# Constructor
|
22
|
+
# @param target [Production, ProductionRef]
|
23
|
+
# The production that is being referenced.
|
24
|
+
def initialize: ((Production | ProductionRef) target) -> void
|
25
|
+
|
26
|
+
# Copy constructor invoked by dup or clone methods.
|
27
|
+
# @param orig [ProductionRef]
|
28
|
+
# @example
|
29
|
+
# prod = Sequitur::Production.new
|
30
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
31
|
+
# copy_ref = ref.dup
|
32
|
+
# puts prod.refcount # outputs 2
|
33
|
+
def initialize_copy: (ProductionRef orig) -> void
|
34
|
+
|
35
|
+
# Emit the text representation of a production reference.
|
36
|
+
# @return [String]
|
37
|
+
def to_s: () -> String
|
38
|
+
|
39
|
+
alias to_string to_s
|
40
|
+
|
41
|
+
# Equality testing.
|
42
|
+
# A production ref is equal to another one when its
|
43
|
+
# refers to the same production or when it is compared to
|
44
|
+
# the production it refers to.
|
45
|
+
# @param other [Production, ProductionRef]
|
46
|
+
# @return [TrueClass, FalseClass]
|
47
|
+
def ==: ((Production | ProductionRef) other) -> bool
|
48
|
+
|
49
|
+
# Produce a hash value.
|
50
|
+
# A reference has no identity on its own,
|
51
|
+
# the method returns the hash value of the
|
52
|
+
# referenced production
|
53
|
+
# @return [Integer] the hash value
|
54
|
+
def hash: () -> Integer
|
55
|
+
|
56
|
+
# Make this reference point to the given production.
|
57
|
+
# @param aProduction [Production, ProductionRef] the production
|
58
|
+
# to refer to
|
59
|
+
def bind_to: ((Production | ProductionRef) aProduction) -> (nil | untyped)
|
60
|
+
|
61
|
+
# Clear the reference to the target production.
|
62
|
+
def unbind: () -> nil
|
63
|
+
|
64
|
+
# Check that the this object doesn't refer to any production.
|
65
|
+
# @return [TrueClass, FalseClass] true when this object doesn't
|
66
|
+
# point to a production.
|
67
|
+
def unbound?: () -> bool
|
68
|
+
|
69
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
70
|
+
# @param aVisitor [Sequitur::GrammarVisitor] the visitor
|
71
|
+
def accept: (Sequitur::GrammarVisitor aVisitor) -> untyped
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Module for classes implementing the Sequitur algorithm
|
3
|
+
# Specialization of the DynamicGrammar class.
|
4
|
+
# A Sequitur grammar is a context-free grammar that is entirely built
|
5
|
+
# from a sequence of input tokens through the Sequitur algorithm.
|
6
|
+
class SequiturGrammar < DynamicGrammar
|
7
|
+
# Build the grammar from an enumerator of tokens.
|
8
|
+
# @param anEnum [Enumerator] an enumerator that will iterate
|
9
|
+
# over the input tokens.
|
10
|
+
def initialize: (untyped anEnum) -> void
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# Struct used for internal purposes
|
15
|
+
CollisionDiagnosis: untyped
|
16
|
+
|
17
|
+
# Assuming that a new input token was added to the start production,
|
18
|
+
# enforce the digram unicity and rule utility rules
|
19
|
+
# begin
|
20
|
+
# if a digram D occurs twice in the grammar then
|
21
|
+
# add a production P : D (if not already there)
|
22
|
+
# replace both Ds with R (reduction step).
|
23
|
+
# end
|
24
|
+
# if a production P : RHS in referenced only once then
|
25
|
+
# replace P by its RHS (derivation step)
|
26
|
+
# remove P from grammar
|
27
|
+
# end
|
28
|
+
# end until digram unicity and rule utility are met
|
29
|
+
def enforce_rules: () -> untyped
|
30
|
+
|
31
|
+
# Check whether a digram is used twice in the grammar.
|
32
|
+
# Return an empty Hash if each digram appears once.
|
33
|
+
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
34
|
+
# Where Pi, Pk are two productions where the digram occurs.
|
35
|
+
def detect_collision: () -> untyped
|
36
|
+
|
37
|
+
# When a collision diagnosis indicates that a given
|
38
|
+
# digram d occurs twice in the grammar
|
39
|
+
# Then create a new production that will have
|
40
|
+
# the symbols of d as its rhs members.
|
41
|
+
def restore_unicity: (untyped aDiagnosis) -> untyped
|
42
|
+
|
43
|
+
# Return a production that is used less than twice in the grammar.
|
44
|
+
def detect_useless_production: () -> untyped
|
45
|
+
|
46
|
+
# Given the passed production P is referenced only once.
|
47
|
+
# Then replace P by its RHS where it is referenced.
|
48
|
+
# And delete P
|
49
|
+
def restore_utility: (untyped prod_index) -> untyped
|
50
|
+
|
51
|
+
# Create a new production that will have the symbols from digram
|
52
|
+
# as its rhs members.
|
53
|
+
def build_production_for: (untyped aDigram) -> untyped
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Module for classes implementing the Sequitur algorithm
|
3
|
+
# Represents a sequence (concatenation) of grammar symbols
|
4
|
+
# as they appear in rhs of productions
|
5
|
+
class SymbolSequence
|
6
|
+
# The sequence of symbols itself
|
7
|
+
attr_reader symbols: Array[untyped]
|
8
|
+
|
9
|
+
# Create an empty sequence
|
10
|
+
def initialize: () -> void
|
11
|
+
|
12
|
+
# Copy constructor invoked by dup or clone methods.
|
13
|
+
# @param orig [SymbolSequence]
|
14
|
+
def initialize_copy: (untyped orig) -> untyped
|
15
|
+
|
16
|
+
# Clear the symbol sequence.
|
17
|
+
def clear: () -> untyped
|
18
|
+
|
19
|
+
# Tell whether the sequence is empty.
|
20
|
+
# @[true / false] true only if the sequence has no symbol in it.
|
21
|
+
def empty?: () -> untyped
|
22
|
+
|
23
|
+
# Count the number of elements in the sequence.
|
24
|
+
# @[Fixnum] the number of elements
|
25
|
+
def size: () -> untyped
|
26
|
+
|
27
|
+
# Append a grammar symbol at the end of the sequence.
|
28
|
+
# @param aSymbol [Object] The symbol to append.
|
29
|
+
def <<: (untyped aSymbol) -> (nil | untyped)
|
30
|
+
|
31
|
+
# Retrieve the element from the sequence at given position.
|
32
|
+
# @param anIndex [Fixnum] A zero-based index of the element to access.
|
33
|
+
def []: (untyped anIndex) -> untyped
|
34
|
+
|
35
|
+
# Equality testing.
|
36
|
+
# @param other [SymbolSequence or Array] the other other sequence
|
37
|
+
# to compare to.
|
38
|
+
# @true when an item from self equals the corresponding
|
39
|
+
# item from 'other'
|
40
|
+
def ==: (untyped other) -> untyped
|
41
|
+
|
42
|
+
# Select the references to production appearing in the rhs.
|
43
|
+
# @[Array of ProductionRef]
|
44
|
+
def references: () -> untyped
|
45
|
+
|
46
|
+
# Select the references of the given production appearing in the rhs.
|
47
|
+
# @param aProduction [Production]
|
48
|
+
# @[Array of ProductionRef]
|
49
|
+
def references_of: (untyped aProduction) -> untyped
|
50
|
+
|
51
|
+
# Emit a text representation of the symbol sequence.
|
52
|
+
# Text is of the form: space-separated sequence of symbols.
|
53
|
+
# @[String]
|
54
|
+
def to_string: () -> untyped
|
55
|
+
|
56
|
+
# Insert at position the elements from another sequence.
|
57
|
+
# @param position [Fixnum] A zero-based index of the symbols to replace.
|
58
|
+
# @param another [SymbolSequence] A production with a two-elements rhs
|
59
|
+
# (a single digram).
|
60
|
+
def insert_at: (untyped position, untyped another) -> untyped
|
61
|
+
|
62
|
+
# Given that the production P passed as argument has exactly 2 symbols
|
63
|
+
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
64
|
+
# s1 s2 by a reference to P.
|
65
|
+
# @param index [Integer] the position of a two symbol sequence to be replaced
|
66
|
+
# by the production
|
67
|
+
# @param aProduction [Production, ProductionRef] a production that
|
68
|
+
# consists exactly of one digram (= 2 symbols).
|
69
|
+
def reduce_step: (Integer index, (Production | ProductionRef) aProduction) -> untyped
|
70
|
+
|
71
|
+
# Remove the element at given position
|
72
|
+
# @param position [Integer] a zero-based index.
|
73
|
+
def delete_at: (untyped position) -> untyped
|
74
|
+
|
75
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
76
|
+
# @param aVisitor[GrammarVisitor]
|
77
|
+
def accept: (untyped aVisitor) -> untyped
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def invalidate_refs: () -> untyped
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Build a Sequitur-generated grammar based on the sequence of input tokens.
|
3
|
+
#
|
4
|
+
# @param tokens [String, Enumerator] The input sequence of input tokens.
|
5
|
+
# Can be a sequence of characters (i.e. a String) or an Enumerator.
|
6
|
+
# Tokens returned by enumerator should respond to the :hash message.
|
7
|
+
# @return [SequiturGrammar] a grammar that encodes the input.
|
8
|
+
def self.build_from: ((String | ::Enumerator[untyped]) tokens) -> SequiturGrammar
|
9
|
+
end
|
@@ -42,7 +42,7 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
42
42
|
expect(clone_a).to eq(instance)
|
43
43
|
|
44
44
|
# Reference objects are distinct but points to same production
|
45
|
-
expect(clone_a.symbols[1]
|
45
|
+
expect(clone_a.symbols[1]).not_to equal(instance.symbols[1])
|
46
46
|
|
47
47
|
# Modifying the clone...
|
48
48
|
clone_a.symbols[1] = 'diff'
|
@@ -91,9 +91,6 @@ module Sequitur # Re-open the module to get rid of qualified names
|
|
91
91
|
expect(refs.size).to eq(2)
|
92
92
|
expect(refs).to eq([ref, ref])
|
93
93
|
|
94
|
-
refs = subject.references
|
95
|
-
expect(refs.size).to eq(2)
|
96
|
-
expect(refs).to eq([ref, ref])
|
97
94
|
specific_refs = subject.references_of(a_prod)
|
98
95
|
expect(specific_refs).to eq(refs)
|
99
96
|
|