grammaphone 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/grammaphone.rb +40 -6
- data/lib/grammaphone/tokens.rb +33 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e243f173f31ceddb840bd5e1c166f2f5aae6574e79f44bcc8edab86f397e6b6e
|
4
|
+
data.tar.gz: 2c3a92d6ecfcccf67b8f3330bceae184df8a64dc4b1a0f3765085f69d57abf51
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 867e6191b314ea4a1c1bda97c053fc910ddb44fcea1d8dc24248307676277298066d40390741a48f8337a457283cf84baf92d4a67e03c30e5d90cb7fdbb964e2
|
7
|
+
data.tar.gz: 5fac8bfa0ec133c8fdc1e94cb9d673dd11a902487de46bb941262e33bdc6cd4475e15f4fa03947bf3a2a039eed01cef5c832f29131652a7af022ec7c12fd87d0
|
data/lib/grammaphone.rb
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
require_relative "grammaphone/errors"
|
2
|
+
require_relative "grammaphone/tokens"
|
3
|
+
require_relative "grammaphone/rule"
|
4
|
+
|
1
5
|
# Grammaphone is a dynamically-definable parser pseudo-generator based on a
|
2
6
|
# BNF-like grammar.
|
3
7
|
#
|
@@ -60,18 +64,29 @@
|
|
60
64
|
#
|
61
65
|
# Note that to match a space, you need to use the pattern, since the splitting function
|
62
66
|
# for rules splits on the space character, regardless of where it is.
|
63
|
-
|
64
|
-
require_relative "grammaphone/errors"
|
65
|
-
require_relative "grammaphone/tokens"
|
66
|
-
require_relative "grammaphone/rule"
|
67
|
-
|
68
67
|
class Grammaphone
|
69
68
|
|
69
|
+
# Creates a TokenStream instance using `split_method` as the function to
|
70
|
+
# split `src` into tokens.
|
71
|
+
#
|
72
|
+
# `split_method` is expected to take a String and return an Array of Strings.
|
70
73
|
def self.tokenize(src, &split_method)
|
71
74
|
TokenStream.new(src, &split_method)
|
72
75
|
end
|
73
76
|
|
74
|
-
#
|
77
|
+
# Creates a new instance of Grammaphone.
|
78
|
+
#
|
79
|
+
# `rules` is a Hash containing the rules of the grammar, as defined above.
|
80
|
+
#
|
81
|
+
# `node_type` must be a class that responds to <<. By default, this is Array.
|
82
|
+
#
|
83
|
+
# `default_action` is the method called on the results of a rule being matched.
|
84
|
+
# This function is passed the results of the rule matching, which is an instance
|
85
|
+
# of `node_type`, and the name of the rule matched. By default, this is the
|
86
|
+
# identity function, returning the input node.
|
87
|
+
#
|
88
|
+
# The results of the action are included in the output instead of the input
|
89
|
+
# instance of `node_type`.
|
75
90
|
def initialize(rules = {}, node_type = Array, &default_action)
|
76
91
|
raise ArgumentError.new("cannot form parser from a #{rules.class}") unless rules.kind_of? Hash
|
77
92
|
raise ArgumentError.new("syntax tree type must respond to <<") unless node_type.method_defined?(:"<<")
|
@@ -82,6 +97,13 @@ class Grammaphone
|
|
82
97
|
end
|
83
98
|
end
|
84
99
|
|
100
|
+
# Adds a rule with a single rule to the grammar, using the associated action,
|
101
|
+
# replacing existing the rule if there is a conflict.
|
102
|
+
#
|
103
|
+
# `action` is the method called on the results of the rule being matched.
|
104
|
+
# This function is passed the results of the rule matching, which is an instance
|
105
|
+
# of `node_type`, and the name of the rule matched. By default, this is the
|
106
|
+
# identity function, returning the input node.
|
85
107
|
def add_rule(name, rule, &action)
|
86
108
|
m = @rules.find {|r| r.name == name}
|
87
109
|
action = @default_action if action.nil?
|
@@ -93,10 +115,20 @@ class Grammaphone
|
|
93
115
|
end
|
94
116
|
end
|
95
117
|
|
118
|
+
# Returns a Hash containint a representation of existing rules. This does
|
119
|
+
# not provide access to the underlying rules.
|
96
120
|
def rules
|
97
121
|
@rules.map{|r| [r.name, r.rule]}.to_h
|
98
122
|
end
|
99
123
|
|
124
|
+
# Runs the grammar on the given token stream. If `token_stream` is not a
|
125
|
+
# TokenStream instance, then a new TokenStream instance is created.
|
126
|
+
#
|
127
|
+
# The initial rule is the first rule added, either from the initial Hash or
|
128
|
+
# the first call to `add_rule`.
|
129
|
+
#
|
130
|
+
# If the ruleset is empty when `parse` is called, an EmptyRulesetError is
|
131
|
+
# raised.
|
100
132
|
def parse(token_stream)
|
101
133
|
token_stream = TokenStream.new(token_stream) unless token_stream.kind_of?(TokenStream)
|
102
134
|
raise EmptyRulesetError if @rules.size == 0
|
@@ -104,6 +136,8 @@ class Grammaphone
|
|
104
136
|
res
|
105
137
|
end
|
106
138
|
|
139
|
+
# Runs the specified rule. Useful for testing purposes.
|
140
|
+
#
|
107
141
|
# Not to be released in shipped version
|
108
142
|
def test(name, token_stream)
|
109
143
|
self.send(name, TokenStream.new(token_stream))
|
data/lib/grammaphone/tokens.rb
CHANGED
@@ -7,6 +7,17 @@ class Grammaphone
|
|
7
7
|
# This doesn't need to be here, but it could potentially be useful
|
8
8
|
include Enumerable
|
9
9
|
|
10
|
+
# Creates a new instance of TokenStream, using the data from `tokens`. If
|
11
|
+
# `tokens` is a String, it's split using `split_method`, which takes a
|
12
|
+
# String and returns an Array. if `split_method` isn't provided, then
|
13
|
+
# `String#split` is called on `tokens`, using the space character as the
|
14
|
+
# separator.
|
15
|
+
#
|
16
|
+
# If `tokens` is an Array of Strings, the Array is duplicated, and used
|
17
|
+
# directly.
|
18
|
+
#
|
19
|
+
# If `tokens` is not a String or Array, then `to_a` is called on `tokens`
|
20
|
+
# and the result is used as the token stream.
|
10
21
|
def initialize(tokens, &split_method)
|
11
22
|
case tokens
|
12
23
|
when String
|
@@ -16,6 +27,7 @@ class Grammaphone
|
|
16
27
|
@enum = split_method.call(tokens).to_a
|
17
28
|
end
|
18
29
|
when Array
|
30
|
+
raise TokenStreamError unless tokens.all?{|t| t.kind_of?(String)}
|
19
31
|
@enum = tokens.dup
|
20
32
|
else
|
21
33
|
raise TokenStreamError unless tokens.respond_to?(:to_a)
|
@@ -110,29 +122,50 @@ class Grammaphone
|
|
110
122
|
end
|
111
123
|
end
|
112
124
|
|
125
|
+
# Token contains methods that classify what kind of element type a specific
|
126
|
+
# rule pattern is.
|
113
127
|
module Token
|
128
|
+
# The prefix used to denote a literal element.
|
114
129
|
LITERAL_PREFIX = "\""
|
115
130
|
|
131
|
+
# Checks if an element expects a literal value. A literal element is
|
132
|
+
# denoted by being prefixed by the value of `LITERAL_PREFIX`.
|
116
133
|
def self.literal?(token)
|
117
134
|
token[0] == LITERAL_PREFIX
|
118
135
|
end
|
119
136
|
|
137
|
+
# Removes the denotative marks of a literal, and returns the resulting value.
|
120
138
|
def self.clean_literal(token)
|
121
139
|
token[1..]
|
122
140
|
end
|
123
141
|
|
142
|
+
# Returns whether the token is described by the element and that the
|
143
|
+
# element is a literal.
|
144
|
+
#
|
145
|
+
# Returns `false` if the token is `nil`, since it's impossible to match a
|
146
|
+
# literal `nil`. Note, `nil` differs from an empty token.
|
124
147
|
def self.matches_literal?(element, token)
|
125
148
|
!token.nil? && literal?(element) && token == clean_literal(element)
|
126
149
|
end
|
127
150
|
|
151
|
+
# Checks if an element expects a pattern value. A pattern element is
|
152
|
+
# denoted by being surrounded by forward slashes.
|
128
153
|
def self.pattern?(token)
|
129
154
|
token[0] == "/" && token[-1] == "/"
|
130
155
|
end
|
131
156
|
|
157
|
+
# Removes the denotative marks of a pattern, and returns a Regexp that
|
158
|
+
# matches the pattern exactly. That is, the pattern describes the
|
159
|
+
# whole token, and nothing less.
|
132
160
|
def self.clean_pattern(token)
|
133
161
|
/\A#{token[1...-1]}\Z/
|
134
162
|
end
|
135
163
|
|
164
|
+
# Returns whether the token is described by the element and that the
|
165
|
+
# element is a pattern.
|
166
|
+
#
|
167
|
+
# Returns `false` if the token is `nil`, and the pattern doesn't match
|
168
|
+
# the empty string.
|
136
169
|
def self.matches_pattern?(element, token)
|
137
170
|
pattern?(element) && (token =~ clean_pattern(element)) ||
|
138
171
|
token.nil? && "" =~ clean_pattern(element)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: grammaphone
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kellen Watt
|
@@ -10,7 +10,7 @@ bindir: bin
|
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: A dynamic parser written in Ruby that uses a BNF-
|
13
|
+
description: A dynamic RD parser written in Ruby that uses a BNF-adjacent grammar.
|
14
14
|
email: kbw6d9@mst.edu
|
15
15
|
executables: []
|
16
16
|
extensions: []
|