parsby 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +11 -0
- data/Gemfile.lock +1 -1
- data/README.md +156 -51
- data/lib/parsby.rb +3 -1
- data/lib/parsby/combinators.rb +23 -3
- data/lib/parsby/version.rb +1 -1
- data/parsby.gemspec +12 -11
- metadata +10 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4338e72a81f41aaff91adddd6b35b3ab67813758
|
4
|
+
data.tar.gz: 73420f9b248109ab9ddffa85f62620776cbc3aa9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff1b11755d9f1713e144b24f0f7ad7cd3c2749918f11ec0b0c6b19c18aa62a0d8780d9aed500d4c607c14cc2fd2b5a436547b736b3e76b2926c0a784b189fb4f
|
7
|
+
data.tar.gz: 46fb3ff8817fe3c1f00dc8479f20b6cc779e534884a52c1ccc124feaa920c01c56a7d2d0f76f8a9697390e4af8c10d138df442de05e3e25869ce232c9fe4a1b8
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -5,14 +5,14 @@ Parser combinator library for Ruby, based on Haskell's Parsec.
|
|
5
5
|
- [Installation](#installation)
|
6
6
|
- [Examples](#examples)
|
7
7
|
- [Introduction](#introduction)
|
8
|
+
- [Some commonly used combinators](#some-commonly-used-combinators)
|
8
9
|
- [Defining combinators](#defining-combinators)
|
9
|
-
- [`Parsby.new`](#parsbynew)
|
10
10
|
- [Defining parsers as modules](#defining-parsers-as-modules)
|
11
11
|
- [`ExpectationFailed`](#expectationfailed)
|
12
12
|
- [Cleaning up the parse tree for the trace](#cleaning-up-the-parse-tree-for-the-trace)
|
13
|
-
- [`splicer.start` combinator](#splicerstart-combinator)
|
14
13
|
- [Recursive parsers with `lazy`](#recursive-parsers-with-lazy)
|
15
14
|
- [Parsing left-recursive languages with `reduce` combinator](#parsing-leftrecursive-languages-with-reduce-combinator)
|
15
|
+
- [`Parsby.new`](#parsbynew)
|
16
16
|
- [Parsing from a string, a file, a pipe, a socket, ...](#parsing-from-a-string-a-file-a-pipe-a-socket-)
|
17
17
|
- [Comparing with Haskell's Parsec](#comparing-with-haskells-parsec)
|
18
18
|
- [Development](#development)
|
@@ -60,12 +60,118 @@ between(lit("<"), lit(">"), decimal).parse "<100>"
|
|
60
60
|
#=> 100
|
61
61
|
```
|
62
62
|
|
63
|
-
|
64
|
-
|
63
|
+
## Some commonly used combinators
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
# Parse argument string literally
|
67
|
+
lit("foo").parse "foo"
|
68
|
+
#=> "foo"
|
69
|
+
|
70
|
+
# Case insensitive lit
|
71
|
+
ilit("Foo").parse "fOo"
|
72
|
+
#=> "fOo"
|
73
|
+
|
74
|
+
# Make any value into a parser that results in that value without
|
75
|
+
# consuming input.
|
76
|
+
pure("foo").parse ""
|
77
|
+
#=> "foo"
|
78
|
+
|
79
|
+
# Parse foo or bar
|
80
|
+
(lit("foo") | lit("bar")).parse "bar"
|
81
|
+
#=> "bar"
|
82
|
+
|
83
|
+
# Like `|`, parse one of foo or bar. `choice` is better when you have
|
84
|
+
# many choices to chose from. You can pass it any number of parsers or
|
85
|
+
# array of parsers.
|
86
|
+
choice(lit("foo"), lit("bar")).parse "bar"
|
87
|
+
#=> "bar"
|
88
|
+
|
89
|
+
# Parse with each argument in succesion and group the results in an
|
90
|
+
# array.
|
91
|
+
group(lit("foo"), lit("bar")).parse "foobar"
|
92
|
+
#=> ["foo", "bar"]
|
93
|
+
|
94
|
+
# Parse foo and bar, returning bar.
|
95
|
+
(lit("foo") > lit("bar")).parse "foobar"
|
96
|
+
#=> "bar"
|
97
|
+
|
98
|
+
# Parse foo and bar, returning foo.
|
99
|
+
(lit("foo") < lit("bar")).parse "foobar"
|
100
|
+
#=> "foo"
|
101
|
+
|
102
|
+
# Make parser optional
|
103
|
+
group(optional(lit("foo")), lit("bar")).parse "bar"
|
104
|
+
#=> [nil, "bar"]
|
105
|
+
|
106
|
+
# Use parser zero or more times, grouping results in array. many_1, does
|
107
|
+
# the same, but requires parsing at least once.
|
108
|
+
many(lit("foo")).parse "foofoo"
|
109
|
+
#=> ["foo", "foo"]
|
110
|
+
|
111
|
+
# Parse many, but each separated by something. sep_by_1 requires at least
|
112
|
+
# one element to be parsed.
|
113
|
+
sep_by(lit(","), lit("foo")).parse "foo,foo"
|
114
|
+
#=> ["foo", "foo"]
|
115
|
+
|
116
|
+
# `whitespace` (alias `ws`) is zero or more whitespace characters.
|
117
|
+
# `whitespace_1` (alias `ws_1`) is one or more whitespace characters.
|
118
|
+
# `spaced` allows a parser to be surrounded by optional whitespace.
|
119
|
+
# `whitespace_1` is the base definition. If you extend it to e.g. add the
|
120
|
+
# parsing of comments, the other combinators will also recognize that
|
121
|
+
# change.
|
122
|
+
(whitespace > lit("foo")).parse " foo"
|
123
|
+
#=> "foo"
|
124
|
+
group(lit("foo"), ws_1 > lit("bar")).parse "foo bar"
|
125
|
+
#=> ["foo", "bar"]
|
126
|
+
spaced(lit("foo")).parse " foo "
|
127
|
+
#=> "foo"
|
128
|
+
|
129
|
+
# Parse transform result according to block.
|
130
|
+
lit("foo").fmap {|x| x.upcase }.parse "foo"
|
131
|
+
#=> "FOO"
|
132
|
+
|
133
|
+
# join(p) is the same as p.fmap {|xs| xs.join }
|
134
|
+
join(sep_by(lit(","), lit("foo") | lit("bar"))).parse "foo,bar"
|
135
|
+
#=> "foobar"
|
136
|
+
|
137
|
+
# Parse a character from the choices in a set of strings or ranges
|
138
|
+
char_in(" \t\r\n").parse "\t"
|
139
|
+
#=> "\t"
|
140
|
+
typical_identifier_characters = ['a'..'z', 'A'..'Z', 0..9, "_"]
|
141
|
+
join(many(char_in("!?", typical_identifier_characters))).parse "foo23? bar"
|
142
|
+
#=> "foo23?"
|
143
|
+
|
144
|
+
# Parse any one character
|
145
|
+
any_char.parse "foo"
|
146
|
+
#=> "f"
|
147
|
+
|
148
|
+
# Require end of input at end of parse.
|
149
|
+
(lit("foo") < eof).parse "foobar"
|
150
|
+
#=> Parsby::ExpectationFailed: line 1:
|
151
|
+
foobar
|
152
|
+
| * failure: eof
|
153
|
+
\-/ *| success: lit("foo")
|
154
|
+
\|
|
155
|
+
| * failure: (lit("foo") < eof)
|
156
|
+
|
157
|
+
# Parse only when other parser fails.
|
158
|
+
join(many(any_char.that_fails(whitespace_1))).parse "foo bar"
|
159
|
+
#=> "foo"
|
160
|
+
|
161
|
+
# single(p) is the same as p.fmap {|x| [x] }
|
162
|
+
single(lit("foo")).parse "foo"
|
163
|
+
#=> ["foo"]
|
164
|
+
|
165
|
+
# p1 + p2 is the same as group(p1, p2).fmap {|(r1, r2)| r1 + r2 }
|
166
|
+
(lit("foo") + (ws > lit("bar"))).parse "foo bar"
|
167
|
+
#=> "foobar"
|
168
|
+
(single(lit("foo")) + many(ws > lit("bar"))).parse "foo bar bar"
|
169
|
+
#=> ["foo", "bar", "bar"]
|
170
|
+
```
|
65
171
|
|
66
172
|
## Defining combinators
|
67
173
|
|
68
|
-
If you look at the examples in this source, you'll notice that all
|
174
|
+
If you look at the examples in this source, you'll notice that almost all
|
69
175
|
combinators are defined with `define_combinator`. Strictly speaking, it's
|
70
176
|
not necessary to use that to define combinators. You can do it with
|
71
177
|
variable assignment or `def` syntax. Nevertheless, `define_combinator` is
|
@@ -81,6 +187,9 @@ between(lit("<"), lit(">"), lit("foo")).label
|
|
81
187
|
#=> 'between(lit("<"), lit(">"), lit("foo"))'
|
82
188
|
```
|
83
189
|
|
190
|
+
Having labels that resemble the source code is helpful for [the error
|
191
|
+
messages](#expectationfailed).
|
192
|
+
|
84
193
|
If we use `def` instead of `define_combinator`, then the label would be
|
85
194
|
that of its definition. In the following case, it would be that assigned by
|
86
195
|
`<`.
|
@@ -91,7 +200,7 @@ def between(left, right, p)
|
|
91
200
|
end
|
92
201
|
|
93
202
|
between(lit("<"), lit(">"), lit("foo")).label
|
94
|
-
|
203
|
+
#=> '((lit("<") > lit("foo")) < lit(">"))'
|
95
204
|
```
|
96
205
|
|
97
206
|
If we're to wrap that parser in a new one, then the label would be simply
|
@@ -103,38 +212,9 @@ def between(left, right, p)
|
|
103
212
|
end
|
104
213
|
|
105
214
|
between(lit("<"), lit(">"), lit("foo")).label.to_s
|
106
|
-
|
107
|
-
```
|
108
|
-
|
109
|
-
## `Parsby.new`
|
110
|
-
|
111
|
-
Now, normally one ought to be able to define parsers using just
|
112
|
-
combinators, but there are times when one might need more control. For
|
113
|
-
those times, the most raw way to define a parser is using `Parsby.new`.
|
114
|
-
|
115
|
-
Here's `lit` as an example:
|
116
|
-
|
117
|
-
```ruby
|
118
|
-
define_combinator :lit, wrap: false do |e, case_sensitive: true|
|
119
|
-
Parsby.new do |c|
|
120
|
-
a = c.bio.read e.length
|
121
|
-
if case_sensitive ? a == e : a.to_s.downcase == e.downcase
|
122
|
-
a
|
123
|
-
else
|
124
|
-
raise ExpectationFailed.new c
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
215
|
+
#=> "unknown"
|
128
216
|
```
|
129
217
|
|
130
|
-
It takes a string argument for what it `e`xpects to parse, and returns what
|
131
|
-
was `a`ctually parsed if it matches the expectation.
|
132
|
-
|
133
|
-
The block parameter `c` is a `Parsby::Context`. `c.bio` holds a
|
134
|
-
`Parsby::BackedIO`. The `parse` method of `Parsby` objects accepts ideally
|
135
|
-
any `IO` (and `String`s, which it turns into `StringIO`) and then wraps
|
136
|
-
them with `BackedIO` to give the `IO` the ability to backtrack.
|
137
|
-
|
138
218
|
## Defining parsers as modules
|
139
219
|
|
140
220
|
The typical pattern I use is something like this:
|
@@ -173,7 +253,7 @@ FoobarParser.foo.parse "foo"
|
|
173
253
|
```
|
174
254
|
|
175
255
|
Being able to use subparsers directly is useful for when you want to e.g.
|
176
|
-
parse JSON array, instead of any JSON value.
|
256
|
+
parse a JSON array, instead of any JSON value.
|
177
257
|
|
178
258
|
Writing the parser as a module like that also makes it easy to make a new
|
179
259
|
parser based on it:
|
@@ -258,16 +338,9 @@ as so. There are at least 6 ancestors/descendant parsers between `list` and
|
|
258
338
|
`sexp`. It'd be very much pointless to show them all. They convey little
|
259
339
|
additional information and their labels are very verbose.
|
260
340
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
tree look a little cleaner.
|
265
|
-
|
266
|
-
The name comes from JS's `Array.prototype.splice`, to which you can give a
|
267
|
-
starting position, and a count specifying the end, and it'll remove the
|
268
|
-
specified elements from an Array. We use `splicer` likewise, only it works
|
269
|
-
on parse trees. To show an example, here's a simplified definition of
|
270
|
-
`choice`:
|
341
|
+
The reason why they don't appear is because the `splicer.start` combinator
|
342
|
+
is used to make the tree look a little cleaner. To show an example of how
|
343
|
+
it works, here's a simplified definition of `choice`:
|
271
344
|
|
272
345
|
```ruby
|
273
346
|
define_combinator :choice do |*ps|
|
@@ -313,7 +386,8 @@ clearer. Let's use `splicer` to remove those:
|
|
313
386
|
end
|
314
387
|
```
|
315
388
|
|
316
|
-
|
389
|
+
This makes the `p` parsers appear as direct children of the `splicer.start`
|
390
|
+
parser in the trace. Let's fail it, again:
|
317
391
|
|
318
392
|
```
|
319
393
|
pry(main)> choice(lit("foo"), lit("bar"), lit("baz")).parse "qux"
|
@@ -327,9 +401,10 @@ Parsby::ExpectationFailed: line 1:
|
|
327
401
|
| * failure: choice(lit("foo"), lit("bar"), lit("baz"))
|
328
402
|
```
|
329
403
|
|
330
|
-
Now, the only issue left is that `define_combinator` wraps the
|
331
|
-
|
332
|
-
|
404
|
+
Now, the only issue left is that `define_combinator` wraps the resulting
|
405
|
+
parser in another parser. It does this so you can see the label assigned to
|
406
|
+
the combinator and to its definition separately. Let's disable that
|
407
|
+
wrapping by passing `wrap: false` to it:
|
333
408
|
|
334
409
|
```ruby
|
335
410
|
define_combinator :choice, wrap: false do |*ps|
|
@@ -343,6 +418,7 @@ false` to it:
|
|
343
418
|
end
|
344
419
|
```
|
345
420
|
|
421
|
+
That causes it to overwrite the label to the resulting parser of the block.
|
346
422
|
Let's fail it, again:
|
347
423
|
|
348
424
|
```
|
@@ -542,6 +618,35 @@ returning the result of the last successful parse.
|
|
542
618
|
In effect, we're parsing left operands bottom-up and right operands
|
543
619
|
top-down.
|
544
620
|
|
621
|
+
## `Parsby.new`
|
622
|
+
|
623
|
+
Normally one ought to be able to define parsers using just combinators, but
|
624
|
+
there are times when one might need more control. For those times, the most
|
625
|
+
raw way to define a parser is using `Parsby.new`.
|
626
|
+
|
627
|
+
Here's `lit` as an example:
|
628
|
+
|
629
|
+
```ruby
|
630
|
+
define_combinator :lit, wrap: false do |e, case_sensitive: true|
|
631
|
+
Parsby.new do |c|
|
632
|
+
a = c.bio.read e.length
|
633
|
+
if case_sensitive ? a == e : a.to_s.downcase == e.downcase
|
634
|
+
a
|
635
|
+
else
|
636
|
+
raise ExpectationFailed.new c
|
637
|
+
end
|
638
|
+
end
|
639
|
+
end
|
640
|
+
```
|
641
|
+
|
642
|
+
It takes a string argument for what it `e`xpects to parse, and returns what
|
643
|
+
was `a`ctually parsed if it matches the expectation.
|
644
|
+
|
645
|
+
The block parameter `c` is a `Parsby::Context`. `c.bio` holds a
|
646
|
+
`Parsby::BackedIO`. The `parse` method of `Parsby` objects accepts ideally
|
647
|
+
any `IO` (and `String`s, which it turns into `StringIO`) and then wraps
|
648
|
+
them with `BackedIO` to give the `IO` the ability to backtrack.
|
649
|
+
|
545
650
|
## Parsing from a string, a file, a pipe, a socket, ...
|
546
651
|
|
547
652
|
Any `IO` ought to work (unit tests currently have only checked pipes,
|
data/lib/parsby.rb
CHANGED
@@ -657,7 +657,9 @@ class Parsby
|
|
657
657
|
|
658
658
|
# x < y runs parser x then y and returns x.
|
659
659
|
def <(p)
|
660
|
-
|
660
|
+
~splicer.start do |m|
|
661
|
+
m.end(self).then {|r| m.end(p).then { pure r } }
|
662
|
+
end % "(#{label} < #{p.label})"
|
661
663
|
end
|
662
664
|
|
663
665
|
# x > y runs parser x then y and returns y.
|
data/lib/parsby/combinators.rb
CHANGED
@@ -185,12 +185,32 @@ class Parsby
|
|
185
185
|
Parsby::Splicer
|
186
186
|
end
|
187
187
|
|
188
|
-
# Parses a single char from
|
189
|
-
|
188
|
+
# Parses a single char from the char options provided as string and
|
189
|
+
# range arguments optionally arbitrarily nested in arrays.
|
190
|
+
#
|
191
|
+
# join(many(char_in('a'..'z', 0..9))).parse "foo23 bar"
|
192
|
+
# #=> "foo23"
|
193
|
+
#
|
194
|
+
# char_options = ['a'..'z', "!@#$%^"]
|
195
|
+
# join(many(char_in(0..9, char_options))).parse "foo23!@ bar"
|
196
|
+
# #=> "foo23!@"
|
197
|
+
#
|
198
|
+
define_combinator :char_in do |*strings|
|
199
|
+
string = strings
|
200
|
+
.flatten
|
201
|
+
.map do |s|
|
202
|
+
if s.is_a?(Range)
|
203
|
+
s.to_a.join
|
204
|
+
else
|
205
|
+
s
|
206
|
+
end
|
207
|
+
end
|
208
|
+
.join
|
209
|
+
|
190
210
|
~splicer.start do
|
191
211
|
Parsby.new do |c|
|
192
212
|
char = any_char.parse c
|
193
|
-
unless
|
213
|
+
unless string.chars.include? char
|
194
214
|
raise ExpectationFailed.new c
|
195
215
|
end
|
196
216
|
char
|
data/lib/parsby/version.rb
CHANGED
data/parsby.gemspec
CHANGED
@@ -6,25 +6,26 @@ require "parsby/version"
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "parsby"
|
8
8
|
spec.version = Parsby::VERSION
|
9
|
+
spec.licenses = ["MIT"]
|
9
10
|
spec.authors = ["Jorge Luis Martinez Gomez"]
|
10
11
|
spec.email = ["jol@jol.dev"]
|
11
12
|
|
12
13
|
spec.summary = %q{Parser combinator library inspired by Haskell's Parsec}
|
13
14
|
#spec.description = %q{TODO: Write a longer description or delete this line.}
|
14
|
-
|
15
|
+
spec.homepage = "https://github.com/jolmg/parsby"
|
15
16
|
|
16
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
17
18
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
18
|
-
|
19
|
-
|
19
|
+
if spec.respond_to?(:metadata)
|
20
|
+
#spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
#spec.metadata["homepage_uri"] = spec.homepage
|
23
|
+
#spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
24
|
+
spec.metadata["changelog_uri"] = "https://github.com/jolmg/parsby/blob/master/CHANGELOG.md"
|
25
|
+
else
|
26
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
27
|
+
"public gem pushes."
|
28
|
+
end
|
28
29
|
|
29
30
|
# Specify which files should be added to the gem when it is released.
|
30
31
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
@@ -38,5 +39,5 @@ Gem::Specification.new do |spec|
|
|
38
39
|
spec.add_development_dependency "bundler", "~> 1.17"
|
39
40
|
spec.add_development_dependency "rake", "~> 10.0"
|
40
41
|
spec.add_development_dependency "rspec", "~> 3.0"
|
41
|
-
spec.add_development_dependency "pry"
|
42
|
+
spec.add_development_dependency "pry", "~> 0"
|
42
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parsby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jorge Luis Martinez Gomez
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -56,14 +56,14 @@ dependencies:
|
|
56
56
|
name: pry
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
description:
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- ".rspec"
|
78
78
|
- ".ruby-version"
|
79
79
|
- ".travis.yml"
|
80
|
+
- CHANGELOG.md
|
80
81
|
- Gemfile
|
81
82
|
- Gemfile.lock
|
82
83
|
- LICENSE
|
@@ -96,9 +97,11 @@ files:
|
|
96
97
|
- lib/parsby/example/lisp_parser.rb
|
97
98
|
- lib/parsby/version.rb
|
98
99
|
- parsby.gemspec
|
99
|
-
homepage:
|
100
|
-
licenses:
|
101
|
-
|
100
|
+
homepage: https://github.com/jolmg/parsby
|
101
|
+
licenses:
|
102
|
+
- MIT
|
103
|
+
metadata:
|
104
|
+
changelog_uri: https://github.com/jolmg/parsby/blob/master/CHANGELOG.md
|
102
105
|
post_install_message:
|
103
106
|
rdoc_options: []
|
104
107
|
require_paths:
|