sequitur 0.1.24 → 0.1.25
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/Gemfile +0 -2
- data/README.md +2 -3
- data/appveyor.yml +10 -10
- data/examples/inductive_english.rb +35 -0
- data/lib/sequitur/constants.rb +4 -4
- data/lib/sequitur/digram.rb +9 -9
- data/lib/sequitur/dynamic_grammar.rb +9 -8
- data/lib/sequitur/formatter/base_formatter.rb +1 -1
- data/lib/sequitur/formatter/base_text.rb +5 -2
- data/lib/sequitur/formatter/debug.rb +10 -3
- data/lib/sequitur/grammar_visitor.rb +6 -6
- data/lib/sequitur/production.rb +20 -17
- data/lib/sequitur/production_ref.rb +9 -8
- data/lib/sequitur/symbol_sequence.rb +6 -6
- data/lib/sequitur.rb +2 -1
- data/sig/lib/sequitur/constants.rbs +10 -0
- data/sig/lib/sequitur/digram.rbs +37 -0
- data/sig/lib/sequitur/dynamic_grammar.rbs +58 -0
- data/sig/lib/sequitur/formatter/base_formatter.rbs +20 -0
- data/sig/lib/sequitur/formatter/base_text.rbs +62 -0
- data/sig/lib/sequitur/formatter/debug.rbs +89 -0
- data/sig/lib/sequitur/production.rbs +120 -0
- data/sig/lib/sequitur/production_ref.rbs +73 -0
- data/sig/lib/sequitur/sequitur_grammar.rbs +55 -0
- data/sig/lib/sequitur/symbol_sequence.rbs +83 -0
- data/sig/lib/sequitur.rbs +9 -0
- metadata +26 -14
@@ -0,0 +1,37 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# In linguistics, a digram is a sequence of two letters.
|
3
|
+
# In Sequitur, a digram is a sequence of two consecutive symbols that
|
4
|
+
# appear in a production rule. Each symbol in a digram
|
5
|
+
# can be a terminal or not.
|
6
|
+
class Digram
|
7
|
+
# The sequence of two consecutive grammar symbols.
|
8
|
+
# @return [Array<String, Symbol>] The two symbols should respond to the :hash message.
|
9
|
+
attr_reader symbols: Array<String|Symbol>
|
10
|
+
|
11
|
+
# @return [String] An unique hash key of the digram
|
12
|
+
attr_reader key: String
|
13
|
+
|
14
|
+
# @return [Sequitur::Production] The production in which the digram occurs
|
15
|
+
attr_reader production: Sequitur::Production
|
16
|
+
|
17
|
+
# Constructor.
|
18
|
+
# A digram represents a sequence of two symbols
|
19
|
+
# (that appears in a rhs of a production).
|
20
|
+
# Terminal symbols must respond to the :hash message.
|
21
|
+
# @param symbol1 [String, Symbol] First element of the digram
|
22
|
+
# @param symbol2 [String, Symbol] Second element of the digram
|
23
|
+
# @param aProduction [Sequitur::Production] Production in which the RHS
|
24
|
+
# the sequence symbol1 symbol2 appears.
|
25
|
+
def initialize: ((String | Symbol) symbol1, (String | Symbol) symbol2, Sequitur::Production aProduction) -> void
|
26
|
+
|
27
|
+
# Equality testing.
|
28
|
+
# true iff keys of both digrams are equal, false otherwise
|
29
|
+
# @param other [Sequitur::Digram] another to compare with
|
30
|
+
# @return [TrueClass, FalseClass]
|
31
|
+
def ==: (Sequitur::Digram other) -> bool
|
32
|
+
|
33
|
+
# Does the digram consists of twice the same symbols?
|
34
|
+
# @return [TrueClass, FalseClass] true when symbols.first == symbols.last
|
35
|
+
def repeating?: () -> bool
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# A dynamic grammar is a context-free grammar that can be built incrementally.
|
3
|
+
# Formally, a grammar has:
|
4
|
+
# One start production
|
5
|
+
# Zero or more other productions
|
6
|
+
# Each production has a rhs that is a sequence of grammar symbols.
|
7
|
+
# Grammar symbols are categorized into
|
8
|
+
# -terminal symbols (i.e. String, Ruby Symbol,...)
|
9
|
+
# -non-terminal symbols (i.e. ProductionRef)
|
10
|
+
class DynamicGrammar
|
11
|
+
# @return [Sequitur::Production] Link to the start production.
|
12
|
+
attr_reader start: Sequitur::Production
|
13
|
+
|
14
|
+
# @return [Array<Sequitur::Production>] The set of production rules of the grammar
|
15
|
+
attr_reader productions: Array[Sequitur::Production]
|
16
|
+
|
17
|
+
# @return [TrueClass, FalseClass] Trace the execution of the algorithm.
|
18
|
+
attr_accessor trace: bool
|
19
|
+
|
20
|
+
# Constructor.
|
21
|
+
# Build a grammar with one empty rule as start/start rule.
|
22
|
+
def initialize: () -> void
|
23
|
+
|
24
|
+
# Emit a text representation of the grammar.
|
25
|
+
# Each production rule is emitted per line.
|
26
|
+
# @return [String]
|
27
|
+
def to_string: () -> String
|
28
|
+
|
29
|
+
# Add a given production to the grammar.
|
30
|
+
# @param aProduction [Sequitur::Production]
|
31
|
+
# @return [Array<Sequitur::Production>]
|
32
|
+
def add_production: (Sequitur::Production aProduction) -> Array[Sequitur::Production]
|
33
|
+
|
34
|
+
# Remove a production with given index from the grammar
|
35
|
+
# @param anIndex [Integer]
|
36
|
+
# @return [Sequitur::Production] the production removed from the grammar.
|
37
|
+
def remove_production: (Integer anIndex) -> Sequitur::Production
|
38
|
+
|
39
|
+
# Add the given token to the grammar.
|
40
|
+
# Append the token to the rhs of the start/start rule.
|
41
|
+
# @param aToken [Object] input token to add
|
42
|
+
def add_token: (untyped aToken) -> untyped
|
43
|
+
|
44
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
45
|
+
# A visitee is expected to accept the visit from a visitor object
|
46
|
+
# @param aVisitor [Sequitur::GrammarVisitor] the visitor object
|
47
|
+
def accept: (Sequitur::GrammarVisitor aVisitor) -> untyped
|
48
|
+
|
49
|
+
# Factory method. Returns a visitor for this grammar.
|
50
|
+
# @return [Sequitur::GrammarVisitor]
|
51
|
+
def visitor: () -> Sequitur::GrammarVisitor
|
52
|
+
|
53
|
+
# Append a given symbol to the rhs of passed production.
|
54
|
+
# @param aProduction [Production]
|
55
|
+
# @param aSymbol [Object]
|
56
|
+
def append_symbol_to: (Production aProduction, untyped aSymbol) -> untyped
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Namespace dedicated to grammar formatters.
|
3
|
+
module Formatter
|
4
|
+
# Superclass for grammar formatters.
|
5
|
+
class BaseFormatter
|
6
|
+
# The IO output stream in which the formatter's result will be sent.
|
7
|
+
attr_accessor output: untyped
|
8
|
+
|
9
|
+
# Constructor.
|
10
|
+
# @param anIO [IO] an output IO where the formatter's result will
|
11
|
+
# be placed.
|
12
|
+
def initialize: (untyped anIO) -> void
|
13
|
+
|
14
|
+
# Given a grammar or a grammar visitor, perform the visit
|
15
|
+
# and render the visit events in the output stream.
|
16
|
+
# @param aGrmOrVisitor [DynamicGrammar, GrammarVisitor]
|
17
|
+
def render: ((DynamicGrammar | GrammarVisitor) aGrmOrVisitor) -> untyped
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Sequitur
|
2
|
+
module Formatter
|
3
|
+
# A formatter class that can render a dynamic grammar in plain text.
|
4
|
+
# @example
|
5
|
+
# some_grammar = ... # Points to a DynamicGrammar-like object
|
6
|
+
# # Output the result to the standard console output
|
7
|
+
# formatter = Sequitur::Formatter::BaseText.new(STDOUT)
|
8
|
+
# # Render the grammar (through a visitor)
|
9
|
+
# formatter.run(some_grammar.visitor)
|
10
|
+
class BaseText < BaseFormatter
|
11
|
+
attr_reader prod_lookup: ::Hash[Production, Integer]
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
# @param anIO [IO] The output stream to which the rendered grammar
|
15
|
+
# is written.
|
16
|
+
def initialize: (IO anIO) -> void
|
17
|
+
|
18
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
19
|
+
# Notification of a visit event: the visitor is about to visit a grammar
|
20
|
+
# @param aGrammar [DynamicGrammar]
|
21
|
+
def before_grammar: (DynamicGrammar aGrammar) -> untyped
|
22
|
+
|
23
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
24
|
+
# Notification of a visit event: the visitor is about to visit
|
25
|
+
# a production
|
26
|
+
# @param aProduction [Production]
|
27
|
+
def before_production: (Production aProduction) -> untyped
|
28
|
+
|
29
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
30
|
+
# Notification of a visit event: the visitor is about to visit
|
31
|
+
# the rhs of a production
|
32
|
+
# @param _ [Array]
|
33
|
+
def before_rhs: (::Array[untyped] _)
|
34
|
+
|
35
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
36
|
+
# Notification of a visit event: the visitor is about to visit
|
37
|
+
# a terminal symbol from the rhs of a production
|
38
|
+
# @param aSymbol [Object]
|
39
|
+
def before_terminal: (untyped aSymbol) -> untyped
|
40
|
+
|
41
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
42
|
+
# Notification of a visit event: the visitor is about to visit
|
43
|
+
# a non-terminal (= an allusion to a production) in the rhs of a
|
44
|
+
# production
|
45
|
+
# @param aProduction [Production] a production occurring in the rhs
|
46
|
+
def before_non_terminal: (Production aProduction) -> untyped
|
47
|
+
|
48
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
49
|
+
# Notification of a visit event: the visitor complete the visit
|
50
|
+
# of a production
|
51
|
+
# @param _ [Production]
|
52
|
+
def after_production: (Production _) -> untyped
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# Generate a name of a given production.
|
57
|
+
# @param aProduction [Production]
|
58
|
+
# @return [String]
|
59
|
+
def prod_name: (Production aProduction) -> String
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module Sequitur
|
2
|
+
module Formatter
|
3
|
+
# A formatter class that can render the notification events
|
4
|
+
# from a grammar visitor
|
5
|
+
# @example
|
6
|
+
# some_grammar = ... # Points to a DynamicGrammar-like object
|
7
|
+
# # Output the result to the standard console output
|
8
|
+
# formatter = Sequitur::Formatter::Debug.new(STDOUT)
|
9
|
+
# # Render the visit notifications
|
10
|
+
# formatter.run(some_grammar.visitor)
|
11
|
+
class Debug < BaseFormatter
|
12
|
+
# @return [Integer] Current indentation level
|
13
|
+
attr_accessor indentation: Integer
|
14
|
+
|
15
|
+
# Constructor.
|
16
|
+
# @param anIO [IO] The output stream to which the rendered grammar
|
17
|
+
# is written.
|
18
|
+
def initialize: (IO anIO) -> void
|
19
|
+
|
20
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
21
|
+
# Notification of a visit event: the visitor is about to visit a grammar
|
22
|
+
# @param _ [DynamicGrammar]
|
23
|
+
def before_grammar: (DynamicGrammar _) -> untyped
|
24
|
+
|
25
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
26
|
+
# Notification of a visit event: the visitor is about to visit
|
27
|
+
# a production
|
28
|
+
# @param _ [Production]
|
29
|
+
def before_production: (Production _) -> untyped
|
30
|
+
|
31
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
32
|
+
# Notification of a visit event: the visitor is about to visit
|
33
|
+
# the rhs of a production
|
34
|
+
# @param _ [Array]
|
35
|
+
def before_rhs: (::Array[untyped] _) -> untyped
|
36
|
+
|
37
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
38
|
+
# Notification of a visit event: the visitor is about to visit
|
39
|
+
# a terminal symbol from the rhs of a production
|
40
|
+
# @param _ [Object]
|
41
|
+
def before_terminal: (untyped _) -> untyped
|
42
|
+
|
43
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
44
|
+
# Notification of a visit event: the visitor completed the visit of
|
45
|
+
# a terminal symbol from the rhs of a production
|
46
|
+
# @param _ [Object]
|
47
|
+
def after_terminal: (untyped _) -> untyped
|
48
|
+
|
49
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
50
|
+
# Notification of a visit event: the visitor is about to visit
|
51
|
+
# a non-terminal (= an allusion to a production) in the rhs of a
|
52
|
+
# production
|
53
|
+
# @param _ [Production] a production occurring in the rhs
|
54
|
+
def before_non_terminal: (Production _) -> untyped
|
55
|
+
|
56
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
57
|
+
# Notification of a visit event: the visitor completed the visit of
|
58
|
+
# a non-terminal symbol from the rhs of a production.
|
59
|
+
# @param _ [Object]
|
60
|
+
def after_non_terminal: (untyped _) -> untyped
|
61
|
+
|
62
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
63
|
+
# Notification of a visit event: the visitor completed the visit of
|
64
|
+
# the rhs of a production
|
65
|
+
# @param _ [Array]
|
66
|
+
def after_rhs: (::Array[untyped] _) -> untyped
|
67
|
+
|
68
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
69
|
+
# Notification of a visit event: the visitor completed the visit
|
70
|
+
# of a production
|
71
|
+
# @param _ [Production]
|
72
|
+
def after_production: (Production _) -> untyped
|
73
|
+
|
74
|
+
# Method called by a GrammarVisitor to which the formatter subscribed.
|
75
|
+
# Notification of a visit event: the visitor completed the visit
|
76
|
+
# of a grammar
|
77
|
+
# @param _ [DynamicGrammar]
|
78
|
+
def after_grammar: (DynamicGrammar _) -> untyped
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def indent: () -> Integer
|
83
|
+
|
84
|
+
def dedent: () -> Integer
|
85
|
+
|
86
|
+
def output_event: (Symbol anEvent, Integer indentationLevel) -> nil
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# In a context-free grammar, a production is a rule in which
|
3
|
+
# its left-hand side (LHS) consists solely of a non-terminal symbol
|
4
|
+
# and the right-hand side (RHS) consists of a sequence of symbols.
|
5
|
+
# The symbols in RHS can be either terminal or non-terminal symbols.
|
6
|
+
# The rule stipulates that the LHS is equivalent to the RHS,
|
7
|
+
# in other words every occurrence of the LHS can be substituted to
|
8
|
+
# corresponding RHS.
|
9
|
+
# Implementation note: the object id of the production is taken as its LHS.
|
10
|
+
class Production
|
11
|
+
# @return [Sequitur::SymbolSequence] The right-hand side (rhs)
|
12
|
+
# consists of a sequence of grammar symbols
|
13
|
+
attr_reader rhs: untyped
|
14
|
+
|
15
|
+
# @return [Integer] The reference count (= how times other productions reference this one)
|
16
|
+
attr_reader refcount: untyped
|
17
|
+
|
18
|
+
# @return [Array<Sequitur::Digram>] The sequence of digrams appearing in the RHS
|
19
|
+
attr_reader digrams: untyped
|
20
|
+
|
21
|
+
# Constructor.
|
22
|
+
# Build a production with an empty RHS.
|
23
|
+
def initialize: () -> void
|
24
|
+
|
25
|
+
# Identity testing.
|
26
|
+
# @param other [Production, ProductionRef] another production or production reference.
|
27
|
+
# @return [TrueClass, FalseClass] true when the receiver and other are the same.
|
28
|
+
def ==: ((Production | ProductionRef) other) -> bool
|
29
|
+
|
30
|
+
# Is the rhs empty?
|
31
|
+
# @return [TrueClass, FalseClass] true if the rhs has no members.
|
32
|
+
def empty?: () -> bool
|
33
|
+
|
34
|
+
# Increment the reference count by one.
|
35
|
+
# @return [Integer]
|
36
|
+
def incr_refcount: () -> Integer
|
37
|
+
|
38
|
+
# Decrement the reference count by one.
|
39
|
+
# @return [Integer]
|
40
|
+
def decr_refcount: () -> Integer
|
41
|
+
|
42
|
+
# Select the references to production appearing in the rhs.
|
43
|
+
# @return [Array<ProductionRef>]
|
44
|
+
def references: () -> Array[Sequitur::ProductionRef]
|
45
|
+
|
46
|
+
# Look in the rhs all the references to a production passed a argument.
|
47
|
+
# @param a_prod [Production, ProductionRef] The production to search for.
|
48
|
+
# @return [Array<ProductionRef>]
|
49
|
+
def references_of: ((Production | ProductionRef) a_prod) -> Array[ProductionRef]
|
50
|
+
|
51
|
+
# Enumerate the digrams appearing in the right-hand side (rhs)
|
52
|
+
# @return [Array<Sequitur::Digram>] the list of digrams found in rhs of this production.
|
53
|
+
def recalc_digrams: () -> Array[Sequitur::Digram]
|
54
|
+
|
55
|
+
# Does the rhs have exactly one digram only (= 2 symbols)?
|
56
|
+
# @return [TrueClass, FalseClass] true when the rhs contains exactly two symbols.
|
57
|
+
def single_digram?: () -> bool
|
58
|
+
|
59
|
+
# Detect whether the last digram occurs twice
|
60
|
+
# Assumption: when a digram occurs twice in a production then it must occur
|
61
|
+
# at the end of the rhs
|
62
|
+
# @return [TrueClass, FalseClass] true when the digram occurs twice in rhs.
|
63
|
+
def repeated_digram?: () -> bool
|
64
|
+
|
65
|
+
# Retrieve the last digram appearing in the RHS (if any).
|
66
|
+
# @return [Sequitur::Digram, NilClass] last digram in the rhs otherwise nil.
|
67
|
+
def last_digram: () -> (nil | Sequitur::Digram)
|
68
|
+
|
69
|
+
# Emit a text representation of the production rule.
|
70
|
+
# Text is of the form:
|
71
|
+
# object id of production : rhs as space-separated sequence of symbols.
|
72
|
+
# @return [String]
|
73
|
+
def to_string: () -> ::String
|
74
|
+
|
75
|
+
# Add a (grammar) symbol at the end of the RHS.
|
76
|
+
# @param aSymbol [Object] A (grammar) symbol to add.
|
77
|
+
def append_symbol: (untyped aSymbol) -> untyped
|
78
|
+
|
79
|
+
# Clear the right-hand side.
|
80
|
+
# Any referenced production has its reference counter decremented.
|
81
|
+
def clear_rhs: () -> untyped
|
82
|
+
|
83
|
+
# Find all the positions where the digram occurs in the rhs
|
84
|
+
# @param symb1 [Object] first symbol of the digram
|
85
|
+
# @param symb2 [Object] second symbol of the digram
|
86
|
+
# @return [Array<Integer>] the list of indices where the digram occurs in rhs.
|
87
|
+
# @example
|
88
|
+
# # Given the production p : a b c a b a b d
|
89
|
+
# #Then ...
|
90
|
+
# p.positions_of(a, b) # => [0, 3, 5]
|
91
|
+
# # Caution: "overlapping" digrams shouldn't be counted
|
92
|
+
# # Given the production p : a a b a a a c d
|
93
|
+
# # Then ...
|
94
|
+
# p.positions_of(a, a) # => [0, 3]
|
95
|
+
def positions_of: (untyped symb1, untyped symb2) -> Array[Integer]
|
96
|
+
|
97
|
+
# Given that the production P passed as argument has exactly 2 symbols
|
98
|
+
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
99
|
+
# s1 s2 by a reference to P.
|
100
|
+
# @param another [Production, ProductionRef] a production that
|
101
|
+
# consists exactly of one digram (= 2 symbols).
|
102
|
+
def reduce_step: ((Production | ProductionRef) another) -> untyped
|
103
|
+
|
104
|
+
# Replace every occurrence of 'another' production in self.rhs by
|
105
|
+
# the symbols in the rhs of 'another'.
|
106
|
+
# @param another [Production, ProductionRef] a production that
|
107
|
+
# consists exactly of one digram (= 2 symbols).
|
108
|
+
# @example Synopsis
|
109
|
+
# # Given the production p_A : a p_B b p_B c
|
110
|
+
# # And the production p_B : x y
|
111
|
+
# # Then...
|
112
|
+
# p_A.derive_step(p_B)
|
113
|
+
# #Modifies p_A as into: p_A -> a x y b x y c
|
114
|
+
def derive_step: ((Production | ProductionRef) another) -> untyped
|
115
|
+
|
116
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
117
|
+
# @param aVisitor[Sequitur::GrammarVisitor]
|
118
|
+
def accept: (Sequitur::GrammarVisitor aVisitor) -> untyped
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# A production reference is a grammar symbol that may appear in the right-hand
|
3
|
+
# side of a production P1 and that refers to a production P2.
|
4
|
+
# Every time a production P2 appears in the left-hand side of
|
5
|
+
# production P1, this is implemented by inserting a production reference to P2
|
6
|
+
# in the appropriate position in the RHS of P1.
|
7
|
+
# In the literature, production references are also called non terminal
|
8
|
+
# symbols
|
9
|
+
# @example
|
10
|
+
# # Given a production rule...
|
11
|
+
# prod = Sequitur::Production.new
|
12
|
+
# puts prod.refcount # outputs 0
|
13
|
+
# # ... Build a reference to it
|
14
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
15
|
+
# # ... Production reference count is updated...
|
16
|
+
# puts prod.refcount # outputs 1
|
17
|
+
class ProductionRef
|
18
|
+
# @return [Sequitur::Production] Link to the production to reference.
|
19
|
+
attr_reader production: Sequitur::Production
|
20
|
+
|
21
|
+
# Constructor
|
22
|
+
# @param target [Production, ProductionRef]
|
23
|
+
# The production that is being referenced.
|
24
|
+
def initialize: ((Production | ProductionRef) target) -> void
|
25
|
+
|
26
|
+
# Copy constructor invoked by dup or clone methods.
|
27
|
+
# @param orig [ProductionRef]
|
28
|
+
# @example
|
29
|
+
# prod = Sequitur::Production.new
|
30
|
+
# ref = Sequitur::ProductionRef.new(prod)
|
31
|
+
# copy_ref = ref.dup
|
32
|
+
# puts prod.refcount # outputs 2
|
33
|
+
def initialize_copy: (ProductionRef orig) -> void
|
34
|
+
|
35
|
+
# Emit the text representation of a production reference.
|
36
|
+
# @return [String]
|
37
|
+
def to_s: () -> String
|
38
|
+
|
39
|
+
alias to_string to_s
|
40
|
+
|
41
|
+
# Equality testing.
|
42
|
+
# A production ref is equal to another one when its
|
43
|
+
# refers to the same production or when it is compared to
|
44
|
+
# the production it refers to.
|
45
|
+
# @param other [Production, ProductionRef]
|
46
|
+
# @return [TrueClass, FalseClass]
|
47
|
+
def ==: ((Production | ProductionRef) other) -> bool
|
48
|
+
|
49
|
+
# Produce a hash value.
|
50
|
+
# A reference has no identity on its own,
|
51
|
+
# the method returns the hash value of the
|
52
|
+
# referenced production
|
53
|
+
# @return [Integer] the hash value
|
54
|
+
def hash: () -> Integer
|
55
|
+
|
56
|
+
# Make this reference point to the given production.
|
57
|
+
# @param aProduction [Production, ProductionRef] the production
|
58
|
+
# to refer to
|
59
|
+
def bind_to: ((Production | ProductionRef) aProduction) -> (nil | untyped)
|
60
|
+
|
61
|
+
# Clear the reference to the target production.
|
62
|
+
def unbind: () -> nil
|
63
|
+
|
64
|
+
# Check that the this object doesn't refer to any production.
|
65
|
+
# @return [TrueClass, FalseClass] true when this object doesn't
|
66
|
+
# point to a production.
|
67
|
+
def unbound?: () -> bool
|
68
|
+
|
69
|
+
# Part of the 'visitee' role in the Visitor design pattern.
|
70
|
+
# @param aVisitor [Sequitur::GrammarVisitor] the visitor
|
71
|
+
def accept: (Sequitur::GrammarVisitor aVisitor) -> untyped
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Module for classes implementing the Sequitur algorithm
|
3
|
+
# Specialization of the DynamicGrammar class.
|
4
|
+
# A Sequitur grammar is a context-free grammar that is entirely built
|
5
|
+
# from a sequence of input tokens through the Sequitur algorithm.
|
6
|
+
class SequiturGrammar < DynamicGrammar
|
7
|
+
# Build the grammar from an enumerator of tokens.
|
8
|
+
# @param anEnum [Enumerator] an enumerator that will iterate
|
9
|
+
# over the input tokens.
|
10
|
+
def initialize: (untyped anEnum) -> void
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# Struct used for internal purposes
|
15
|
+
CollisionDiagnosis: untyped
|
16
|
+
|
17
|
+
# Assuming that a new input token was added to the start production,
|
18
|
+
# enforce the digram unicity and rule utility rules
|
19
|
+
# begin
|
20
|
+
# if a digram D occurs twice in the grammar then
|
21
|
+
# add a production P : D (if not already there)
|
22
|
+
# replace both Ds with R (reduction step).
|
23
|
+
# end
|
24
|
+
# if a production P : RHS in referenced only once then
|
25
|
+
# replace P by its RHS (derivation step)
|
26
|
+
# remove P from grammar
|
27
|
+
# end
|
28
|
+
# end until digram unicity and rule utility are met
|
29
|
+
def enforce_rules: () -> untyped
|
30
|
+
|
31
|
+
# Check whether a digram is used twice in the grammar.
|
32
|
+
# Return an empty Hash if each digram appears once.
|
33
|
+
# Otherwise return a Hash with a pair of the form: digram => [Pi, Pk]
|
34
|
+
# Where Pi, Pk are two productions where the digram occurs.
|
35
|
+
def detect_collision: () -> untyped
|
36
|
+
|
37
|
+
# When a collision diagnosis indicates that a given
|
38
|
+
# digram d occurs twice in the grammar
|
39
|
+
# Then create a new production that will have
|
40
|
+
# the symbols of d as its rhs members.
|
41
|
+
def restore_unicity: (untyped aDiagnosis) -> untyped
|
42
|
+
|
43
|
+
# Return a production that is used less than twice in the grammar.
|
44
|
+
def detect_useless_production: () -> untyped
|
45
|
+
|
46
|
+
# Given the passed production P is referenced only once.
|
47
|
+
# Then replace P by its RHS where it is referenced.
|
48
|
+
# And delete P
|
49
|
+
def restore_utility: (untyped prod_index) -> untyped
|
50
|
+
|
51
|
+
# Create a new production that will have the symbols from digram
|
52
|
+
# as its rhs members.
|
53
|
+
def build_production_for: (untyped aDigram) -> untyped
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Module for classes implementing the Sequitur algorithm
|
3
|
+
# Represents a sequence (concatenation) of grammar symbols
|
4
|
+
# as they appear in rhs of productions
|
5
|
+
class SymbolSequence
|
6
|
+
# The sequence of symbols itself
|
7
|
+
attr_reader symbols: Array[untyped]
|
8
|
+
|
9
|
+
# Create an empty sequence
|
10
|
+
def initialize: () -> void
|
11
|
+
|
12
|
+
# Copy constructor invoked by dup or clone methods.
|
13
|
+
# @param orig [SymbolSequence]
|
14
|
+
def initialize_copy: (untyped orig) -> untyped
|
15
|
+
|
16
|
+
# Clear the symbol sequence.
|
17
|
+
def clear: () -> untyped
|
18
|
+
|
19
|
+
# Tell whether the sequence is empty.
|
20
|
+
# @[true / false] true only if the sequence has no symbol in it.
|
21
|
+
def empty?: () -> untyped
|
22
|
+
|
23
|
+
# Count the number of elements in the sequence.
|
24
|
+
# @[Fixnum] the number of elements
|
25
|
+
def size: () -> untyped
|
26
|
+
|
27
|
+
# Append a grammar symbol at the end of the sequence.
|
28
|
+
# @param aSymbol [Object] The symbol to append.
|
29
|
+
def <<: (untyped aSymbol) -> (nil | untyped)
|
30
|
+
|
31
|
+
# Retrieve the element from the sequence at given position.
|
32
|
+
# @param anIndex [Fixnum] A zero-based index of the element to access.
|
33
|
+
def []: (untyped anIndex) -> untyped
|
34
|
+
|
35
|
+
# Equality testing.
|
36
|
+
# @param other [SymbolSequence or Array] the other other sequence
|
37
|
+
# to compare to.
|
38
|
+
# @true when an item from self equals the corresponding
|
39
|
+
# item from 'other'
|
40
|
+
def ==: (untyped other) -> untyped
|
41
|
+
|
42
|
+
# Select the references to production appearing in the rhs.
|
43
|
+
# @[Array of ProductionRef]
|
44
|
+
def references: () -> untyped
|
45
|
+
|
46
|
+
# Select the references of the given production appearing in the rhs.
|
47
|
+
# @param aProduction [Production]
|
48
|
+
# @[Array of ProductionRef]
|
49
|
+
def references_of: (untyped aProduction) -> untyped
|
50
|
+
|
51
|
+
# Emit a text representation of the symbol sequence.
|
52
|
+
# Text is of the form: space-separated sequence of symbols.
|
53
|
+
# @[String]
|
54
|
+
def to_string: () -> untyped
|
55
|
+
|
56
|
+
# Insert at position the elements from another sequence.
|
57
|
+
# @param position [Fixnum] A zero-based index of the symbols to replace.
|
58
|
+
# @param another [SymbolSequence] A production with a two-elements rhs
|
59
|
+
# (a single digram).
|
60
|
+
def insert_at: (untyped position, untyped another) -> untyped
|
61
|
+
|
62
|
+
# Given that the production P passed as argument has exactly 2 symbols
|
63
|
+
# in its rhs s1 s2, substitute in the rhs of self all occurrences of
|
64
|
+
# s1 s2 by a reference to P.
|
65
|
+
# @param index [Integer] the position of a two symbol sequence to be replaced
|
66
|
+
# by the production
|
67
|
+
# @param aProduction [Production, ProductionRef] a production that
|
68
|
+
# consists exactly of one digram (= 2 symbols).
|
69
|
+
def reduce_step: (Integer index, (Production | ProductionRef) aProduction) -> untyped
|
70
|
+
|
71
|
+
# Remove the element at given position
|
72
|
+
# @param position [Integer] a zero-based index.
|
73
|
+
def delete_at: (untyped position) -> untyped
|
74
|
+
|
75
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
76
|
+
# @param aVisitor[GrammarVisitor]
|
77
|
+
def accept: (untyped aVisitor) -> untyped
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def invalidate_refs: () -> untyped
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module Sequitur
|
2
|
+
# Build a Sequitur-generated grammar based on the sequence of input tokens.
|
3
|
+
#
|
4
|
+
# @param tokens [String, Enumerator] The input sequence of input tokens.
|
5
|
+
# Can be a sequence of characters (i.e. a String) or an Enumerator.
|
6
|
+
# Tokens returned by enumerator should respond to the :hash message.
|
7
|
+
# @return [SequiturGrammar] a grammar that encodes the input.
|
8
|
+
def self.build_from: ((String | ::Enumerator[untyped]) tokens) -> SequiturGrammar
|
9
|
+
end
|