parsby 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +11 -0
- data/Gemfile.lock +1 -1
- data/README.md +156 -51
- data/lib/parsby.rb +3 -1
- data/lib/parsby/combinators.rb +23 -3
- data/lib/parsby/version.rb +1 -1
- data/parsby.gemspec +12 -11
- metadata +10 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4338e72a81f41aaff91adddd6b35b3ab67813758
|
4
|
+
data.tar.gz: 73420f9b248109ab9ddffa85f62620776cbc3aa9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff1b11755d9f1713e144b24f0f7ad7cd3c2749918f11ec0b0c6b19c18aa62a0d8780d9aed500d4c607c14cc2fd2b5a436547b736b3e76b2926c0a784b189fb4f
|
7
|
+
data.tar.gz: 46fb3ff8817fe3c1f00dc8479f20b6cc779e534884a52c1ccc124feaa920c01c56a7d2d0f76f8a9697390e4af8c10d138df442de05e3e25869ce232c9fe4a1b8
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -5,14 +5,14 @@ Parser combinator library for Ruby, based on Haskell's Parsec.
|
|
5
5
|
- [Installation](#installation)
|
6
6
|
- [Examples](#examples)
|
7
7
|
- [Introduction](#introduction)
|
8
|
+
- [Some commonly used combinators](#some-commonly-used-combinators)
|
8
9
|
- [Defining combinators](#defining-combinators)
|
9
|
-
- [`Parsby.new`](#parsbynew)
|
10
10
|
- [Defining parsers as modules](#defining-parsers-as-modules)
|
11
11
|
- [`ExpectationFailed`](#expectationfailed)
|
12
12
|
- [Cleaning up the parse tree for the trace](#cleaning-up-the-parse-tree-for-the-trace)
|
13
|
-
- [`splicer.start` combinator](#splicerstart-combinator)
|
14
13
|
- [Recursive parsers with `lazy`](#recursive-parsers-with-lazy)
|
15
14
|
- [Parsing left-recursive languages with `reduce` combinator](#parsing-leftrecursive-languages-with-reduce-combinator)
|
15
|
+
- [`Parsby.new`](#parsbynew)
|
16
16
|
- [Parsing from a string, a file, a pipe, a socket, ...](#parsing-from-a-string-a-file-a-pipe-a-socket-)
|
17
17
|
- [Comparing with Haskell's Parsec](#comparing-with-haskells-parsec)
|
18
18
|
- [Development](#development)
|
@@ -60,12 +60,118 @@ between(lit("<"), lit(">"), decimal).parse "<100>"
|
|
60
60
|
#=> 100
|
61
61
|
```
|
62
62
|
|
63
|
-
|
64
|
-
|
63
|
+
## Some commonly used combinators
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
# Parse argument string literally
|
67
|
+
lit("foo").parse "foo"
|
68
|
+
#=> "foo"
|
69
|
+
|
70
|
+
# Case insensitive lit
|
71
|
+
ilit("Foo").parse "fOo"
|
72
|
+
#=> "fOo"
|
73
|
+
|
74
|
+
# Make any value into a parser that results in that value without
|
75
|
+
# consuming input.
|
76
|
+
pure("foo").parse ""
|
77
|
+
#=> "foo"
|
78
|
+
|
79
|
+
# Parse foo or bar
|
80
|
+
(lit("foo") | lit("bar")).parse "bar"
|
81
|
+
#=> "bar"
|
82
|
+
|
83
|
+
# Like `|`, parse one of foo or bar. `choice` is better when you have
|
84
|
+
# many choices to chose from. You can pass it any number of parsers or
|
85
|
+
# array of parsers.
|
86
|
+
choice(lit("foo"), lit("bar")).parse "bar"
|
87
|
+
#=> "bar"
|
88
|
+
|
89
|
+
# Parse with each argument in succesion and group the results in an
|
90
|
+
# array.
|
91
|
+
group(lit("foo"), lit("bar")).parse "foobar"
|
92
|
+
#=> ["foo", "bar"]
|
93
|
+
|
94
|
+
# Parse foo and bar, returning bar.
|
95
|
+
(lit("foo") > lit("bar")).parse "foobar"
|
96
|
+
#=> "bar"
|
97
|
+
|
98
|
+
# Parse foo and bar, returning foo.
|
99
|
+
(lit("foo") < lit("bar")).parse "foobar"
|
100
|
+
#=> "foo"
|
101
|
+
|
102
|
+
# Make parser optional
|
103
|
+
group(optional(lit("foo")), lit("bar")).parse "bar"
|
104
|
+
#=> [nil, "bar"]
|
105
|
+
|
106
|
+
# Use parser zero or more times, grouping results in array. many_1, does
|
107
|
+
# the same, but requires parsing at least once.
|
108
|
+
many(lit("foo")).parse "foofoo"
|
109
|
+
#=> ["foo", "foo"]
|
110
|
+
|
111
|
+
# Parse many, but each separated by something. sep_by_1 requires at least
|
112
|
+
# one element to be parsed.
|
113
|
+
sep_by(lit(","), lit("foo")).parse "foo,foo"
|
114
|
+
#=> ["foo", "foo"]
|
115
|
+
|
116
|
+
# `whitespace` (alias `ws`) is zero or more whitespace characters.
|
117
|
+
# `whitespace_1` (alias `ws_1`) is one or more whitespace characters.
|
118
|
+
# `spaced` allows a parser to be surrounded by optional whitespace.
|
119
|
+
# `whitespace_1` is the base definition. If you extend it to e.g. add the
|
120
|
+
# parsing of comments, the other combinators will also recognize that
|
121
|
+
# change.
|
122
|
+
(whitespace > lit("foo")).parse " foo"
|
123
|
+
#=> "foo"
|
124
|
+
group(lit("foo"), ws_1 > lit("bar")).parse "foo bar"
|
125
|
+
#=> ["foo", "bar"]
|
126
|
+
spaced(lit("foo")).parse " foo "
|
127
|
+
#=> "foo"
|
128
|
+
|
129
|
+
# Parse transform result according to block.
|
130
|
+
lit("foo").fmap {|x| x.upcase }.parse "foo"
|
131
|
+
#=> "FOO"
|
132
|
+
|
133
|
+
# join(p) is the same as p.fmap {|xs| xs.join }
|
134
|
+
join(sep_by(lit(","), lit("foo") | lit("bar"))).parse "foo,bar"
|
135
|
+
#=> "foobar"
|
136
|
+
|
137
|
+
# Parse a character from the choices in a set of strings or ranges
|
138
|
+
char_in(" \t\r\n").parse "\t"
|
139
|
+
#=> "\t"
|
140
|
+
typical_identifier_characters = ['a'..'z', 'A'..'Z', 0..9, "_"]
|
141
|
+
join(many(char_in("!?", typical_identifier_characters))).parse "foo23? bar"
|
142
|
+
#=> "foo23?"
|
143
|
+
|
144
|
+
# Parse any one character
|
145
|
+
any_char.parse "foo"
|
146
|
+
#=> "f"
|
147
|
+
|
148
|
+
# Require end of input at end of parse.
|
149
|
+
(lit("foo") < eof).parse "foobar"
|
150
|
+
#=> Parsby::ExpectationFailed: line 1:
|
151
|
+
foobar
|
152
|
+
| * failure: eof
|
153
|
+
\-/ *| success: lit("foo")
|
154
|
+
\|
|
155
|
+
| * failure: (lit("foo") < eof)
|
156
|
+
|
157
|
+
# Parse only when other parser fails.
|
158
|
+
join(many(any_char.that_fails(whitespace_1))).parse "foo bar"
|
159
|
+
#=> "foo"
|
160
|
+
|
161
|
+
# single(p) is the same as p.fmap {|x| [x] }
|
162
|
+
single(lit("foo")).parse "foo"
|
163
|
+
#=> ["foo"]
|
164
|
+
|
165
|
+
# p1 + p2 is the same as group(p1, p2).fmap {|(r1, r2)| r1 + r2 }
|
166
|
+
(lit("foo") + (ws > lit("bar"))).parse "foo bar"
|
167
|
+
#=> "foobar"
|
168
|
+
(single(lit("foo")) + many(ws > lit("bar"))).parse "foo bar bar"
|
169
|
+
#=> ["foo", "bar", "bar"]
|
170
|
+
```
|
65
171
|
|
66
172
|
## Defining combinators
|
67
173
|
|
68
|
-
If you look at the examples in this source, you'll notice that all
|
174
|
+
If you look at the examples in this source, you'll notice that almost all
|
69
175
|
combinators are defined with `define_combinator`. Strictly speaking, it's
|
70
176
|
not necessary to use that to define combinators. You can do it with
|
71
177
|
variable assignment or `def` syntax. Nevertheless, `define_combinator` is
|
@@ -81,6 +187,9 @@ between(lit("<"), lit(">"), lit("foo")).label
|
|
81
187
|
#=> 'between(lit("<"), lit(">"), lit("foo"))'
|
82
188
|
```
|
83
189
|
|
190
|
+
Having labels that resemble the source code is helpful for [the error
|
191
|
+
messages](#expectationfailed).
|
192
|
+
|
84
193
|
If we use `def` instead of `define_combinator`, then the label would be
|
85
194
|
that of its definition. In the following case, it would be that assigned by
|
86
195
|
`<`.
|
@@ -91,7 +200,7 @@ def between(left, right, p)
|
|
91
200
|
end
|
92
201
|
|
93
202
|
between(lit("<"), lit(">"), lit("foo")).label
|
94
|
-
|
203
|
+
#=> '((lit("<") > lit("foo")) < lit(">"))'
|
95
204
|
```
|
96
205
|
|
97
206
|
If we're to wrap that parser in a new one, then the label would be simply
|
@@ -103,38 +212,9 @@ def between(left, right, p)
|
|
103
212
|
end
|
104
213
|
|
105
214
|
between(lit("<"), lit(">"), lit("foo")).label.to_s
|
106
|
-
|
107
|
-
```
|
108
|
-
|
109
|
-
## `Parsby.new`
|
110
|
-
|
111
|
-
Now, normally one ought to be able to define parsers using just
|
112
|
-
combinators, but there are times when one might need more control. For
|
113
|
-
those times, the most raw way to define a parser is using `Parsby.new`.
|
114
|
-
|
115
|
-
Here's `lit` as an example:
|
116
|
-
|
117
|
-
```ruby
|
118
|
-
define_combinator :lit, wrap: false do |e, case_sensitive: true|
|
119
|
-
Parsby.new do |c|
|
120
|
-
a = c.bio.read e.length
|
121
|
-
if case_sensitive ? a == e : a.to_s.downcase == e.downcase
|
122
|
-
a
|
123
|
-
else
|
124
|
-
raise ExpectationFailed.new c
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
215
|
+
#=> "unknown"
|
128
216
|
```
|
129
217
|
|
130
|
-
It takes a string argument for what it `e`xpects to parse, and returns what
|
131
|
-
was `a`ctually parsed if it matches the expectation.
|
132
|
-
|
133
|
-
The block parameter `c` is a `Parsby::Context`. `c.bio` holds a
|
134
|
-
`Parsby::BackedIO`. The `parse` method of `Parsby` objects accepts ideally
|
135
|
-
any `IO` (and `String`s, which it turns into `StringIO`) and then wraps
|
136
|
-
them with `BackedIO` to give the `IO` the ability to backtrack.
|
137
|
-
|
138
218
|
## Defining parsers as modules
|
139
219
|
|
140
220
|
The typical pattern I use is something like this:
|
@@ -173,7 +253,7 @@ FoobarParser.foo.parse "foo"
|
|
173
253
|
```
|
174
254
|
|
175
255
|
Being able to use subparsers directly is useful for when you want to e.g.
|
176
|
-
parse JSON array, instead of any JSON value.
|
256
|
+
parse a JSON array, instead of any JSON value.
|
177
257
|
|
178
258
|
Writing the parser as a module like that also makes it easy to make a new
|
179
259
|
parser based on it:
|
@@ -258,16 +338,9 @@ as so. There are at least 6 ancestors/descendant parsers between `list` and
|
|
258
338
|
`sexp`. It'd be very much pointless to show them all. They convey little
|
259
339
|
additional information and their labels are very verbose.
|
260
340
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
tree look a little cleaner.
|
265
|
-
|
266
|
-
The name comes from JS's `Array.prototype.splice`, to which you can give a
|
267
|
-
starting position, and a count specifying the end, and it'll remove the
|
268
|
-
specified elements from an Array. We use `splicer` likewise, only it works
|
269
|
-
on parse trees. To show an example, here's a simplified definition of
|
270
|
-
`choice`:
|
341
|
+
The reason why they don't appear is because the `splicer.start` combinator
|
342
|
+
is used to make the tree look a little cleaner. To show an example of how
|
343
|
+
it works, here's a simplified definition of `choice`:
|
271
344
|
|
272
345
|
```ruby
|
273
346
|
define_combinator :choice do |*ps|
|
@@ -313,7 +386,8 @@ clearer. Let's use `splicer` to remove those:
|
|
313
386
|
end
|
314
387
|
```
|
315
388
|
|
316
|
-
|
389
|
+
This makes the `p` parsers appear as direct children of the `splicer.start`
|
390
|
+
parser in the trace. Let's fail it, again:
|
317
391
|
|
318
392
|
```
|
319
393
|
pry(main)> choice(lit("foo"), lit("bar"), lit("baz")).parse "qux"
|
@@ -327,9 +401,10 @@ Parsby::ExpectationFailed: line 1:
|
|
327
401
|
| * failure: choice(lit("foo"), lit("bar"), lit("baz"))
|
328
402
|
```
|
329
403
|
|
330
|
-
Now, the only issue left is that `define_combinator` wraps the
|
331
|
-
|
332
|
-
|
404
|
+
Now, the only issue left is that `define_combinator` wraps the resulting
|
405
|
+
parser in another parser. It does this so you can see the label assigned to
|
406
|
+
the combinator and to its definition separately. Let's disable that
|
407
|
+
wrapping by passing `wrap: false` to it:
|
333
408
|
|
334
409
|
```ruby
|
335
410
|
define_combinator :choice, wrap: false do |*ps|
|
@@ -343,6 +418,7 @@ false` to it:
|
|
343
418
|
end
|
344
419
|
```
|
345
420
|
|
421
|
+
That causes it to overwrite the label to the resulting parser of the block.
|
346
422
|
Let's fail it, again:
|
347
423
|
|
348
424
|
```
|
@@ -542,6 +618,35 @@ returning the result of the last successful parse.
|
|
542
618
|
In effect, we're parsing left operands bottom-up and right operands
|
543
619
|
top-down.
|
544
620
|
|
621
|
+
## `Parsby.new`
|
622
|
+
|
623
|
+
Normally one ought to be able to define parsers using just combinators, but
|
624
|
+
there are times when one might need more control. For those times, the most
|
625
|
+
raw way to define a parser is using `Parsby.new`.
|
626
|
+
|
627
|
+
Here's `lit` as an example:
|
628
|
+
|
629
|
+
```ruby
|
630
|
+
define_combinator :lit, wrap: false do |e, case_sensitive: true|
|
631
|
+
Parsby.new do |c|
|
632
|
+
a = c.bio.read e.length
|
633
|
+
if case_sensitive ? a == e : a.to_s.downcase == e.downcase
|
634
|
+
a
|
635
|
+
else
|
636
|
+
raise ExpectationFailed.new c
|
637
|
+
end
|
638
|
+
end
|
639
|
+
end
|
640
|
+
```
|
641
|
+
|
642
|
+
It takes a string argument for what it `e`xpects to parse, and returns what
|
643
|
+
was `a`ctually parsed if it matches the expectation.
|
644
|
+
|
645
|
+
The block parameter `c` is a `Parsby::Context`. `c.bio` holds a
|
646
|
+
`Parsby::BackedIO`. The `parse` method of `Parsby` objects accepts ideally
|
647
|
+
any `IO` (and `String`s, which it turns into `StringIO`) and then wraps
|
648
|
+
them with `BackedIO` to give the `IO` the ability to backtrack.
|
649
|
+
|
545
650
|
## Parsing from a string, a file, a pipe, a socket, ...
|
546
651
|
|
547
652
|
Any `IO` ought to work (unit tests currently have only checked pipes,
|
data/lib/parsby.rb
CHANGED
@@ -657,7 +657,9 @@ class Parsby
|
|
657
657
|
|
658
658
|
# x < y runs parser x then y and returns x.
|
659
659
|
def <(p)
|
660
|
-
|
660
|
+
~splicer.start do |m|
|
661
|
+
m.end(self).then {|r| m.end(p).then { pure r } }
|
662
|
+
end % "(#{label} < #{p.label})"
|
661
663
|
end
|
662
664
|
|
663
665
|
# x > y runs parser x then y and returns y.
|
data/lib/parsby/combinators.rb
CHANGED
@@ -185,12 +185,32 @@ class Parsby
|
|
185
185
|
Parsby::Splicer
|
186
186
|
end
|
187
187
|
|
188
|
-
# Parses a single char from
|
189
|
-
|
188
|
+
# Parses a single char from the char options provided as string and
|
189
|
+
# range arguments optionally arbitrarily nested in arrays.
|
190
|
+
#
|
191
|
+
# join(many(char_in('a'..'z', 0..9))).parse "foo23 bar"
|
192
|
+
# #=> "foo23"
|
193
|
+
#
|
194
|
+
# char_options = ['a'..'z', "!@#$%^"]
|
195
|
+
# join(many(char_in(0..9, char_options))).parse "foo23!@ bar"
|
196
|
+
# #=> "foo23!@"
|
197
|
+
#
|
198
|
+
define_combinator :char_in do |*strings|
|
199
|
+
string = strings
|
200
|
+
.flatten
|
201
|
+
.map do |s|
|
202
|
+
if s.is_a?(Range)
|
203
|
+
s.to_a.join
|
204
|
+
else
|
205
|
+
s
|
206
|
+
end
|
207
|
+
end
|
208
|
+
.join
|
209
|
+
|
190
210
|
~splicer.start do
|
191
211
|
Parsby.new do |c|
|
192
212
|
char = any_char.parse c
|
193
|
-
unless
|
213
|
+
unless string.chars.include? char
|
194
214
|
raise ExpectationFailed.new c
|
195
215
|
end
|
196
216
|
char
|
data/lib/parsby/version.rb
CHANGED
data/parsby.gemspec
CHANGED
@@ -6,25 +6,26 @@ require "parsby/version"
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "parsby"
|
8
8
|
spec.version = Parsby::VERSION
|
9
|
+
spec.licenses = ["MIT"]
|
9
10
|
spec.authors = ["Jorge Luis Martinez Gomez"]
|
10
11
|
spec.email = ["jol@jol.dev"]
|
11
12
|
|
12
13
|
spec.summary = %q{Parser combinator library inspired by Haskell's Parsec}
|
13
14
|
#spec.description = %q{TODO: Write a longer description or delete this line.}
|
14
|
-
|
15
|
+
spec.homepage = "https://github.com/jolmg/parsby"
|
15
16
|
|
16
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
17
18
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
18
|
-
|
19
|
-
|
19
|
+
if spec.respond_to?(:metadata)
|
20
|
+
#spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
#spec.metadata["homepage_uri"] = spec.homepage
|
23
|
+
#spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
24
|
+
spec.metadata["changelog_uri"] = "https://github.com/jolmg/parsby/blob/master/CHANGELOG.md"
|
25
|
+
else
|
26
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
27
|
+
"public gem pushes."
|
28
|
+
end
|
28
29
|
|
29
30
|
# Specify which files should be added to the gem when it is released.
|
30
31
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
@@ -38,5 +39,5 @@ Gem::Specification.new do |spec|
|
|
38
39
|
spec.add_development_dependency "bundler", "~> 1.17"
|
39
40
|
spec.add_development_dependency "rake", "~> 10.0"
|
40
41
|
spec.add_development_dependency "rspec", "~> 3.0"
|
41
|
-
spec.add_development_dependency "pry"
|
42
|
+
spec.add_development_dependency "pry", "~> 0"
|
42
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parsby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jorge Luis Martinez Gomez
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -56,14 +56,14 @@ dependencies:
|
|
56
56
|
name: pry
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
description:
|
@@ -77,6 +77,7 @@ files:
|
|
77
77
|
- ".rspec"
|
78
78
|
- ".ruby-version"
|
79
79
|
- ".travis.yml"
|
80
|
+
- CHANGELOG.md
|
80
81
|
- Gemfile
|
81
82
|
- Gemfile.lock
|
82
83
|
- LICENSE
|
@@ -96,9 +97,11 @@ files:
|
|
96
97
|
- lib/parsby/example/lisp_parser.rb
|
97
98
|
- lib/parsby/version.rb
|
98
99
|
- parsby.gemspec
|
99
|
-
homepage:
|
100
|
-
licenses:
|
101
|
-
|
100
|
+
homepage: https://github.com/jolmg/parsby
|
101
|
+
licenses:
|
102
|
+
- MIT
|
103
|
+
metadata:
|
104
|
+
changelog_uri: https://github.com/jolmg/parsby/blob/master/CHANGELOG.md
|
102
105
|
post_install_message:
|
103
106
|
rdoc_options: []
|
104
107
|
require_paths:
|