pratt_parser 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{CHANGELOG → CHANGELOG.md} +4 -0
- data/lib/pratt_parser.rb +67 -27
- data/pratt_parser.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26194f8a48f5f675a6f72f56f0e7759a8b65befa
|
4
|
+
data.tar.gz: d6c21b23737c64c889f78798b5bacd4c150aa6f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61f4baf7f9cacdd579dceaf19e861eca48f56a94256ad942f11d24cb0075ae08c428818cda0a53419f26b3a11d699952f7e619e67c88ea3cc1531551bfd30199
|
7
|
+
data.tar.gz: 08aaae2ea30912fbe9d52ae9860ebd870440d3772a4cda43bf8c0b64566a450872ca9ad83aad295229988637738144ab5279d80a77994e54e65c727af11ef45d
|
data/{CHANGELOG → CHANGELOG.md}
RENAMED
data/lib/pratt_parser.rb
CHANGED
@@ -1,40 +1,53 @@
|
|
1
|
-
# A Pratt parser. Similar to a recursive
|
1
|
+
# A Pratt parser. Similar to a recursive descent parser but instead of
|
2
2
|
# coding a function for each production, the syntax is coded in a set
|
3
3
|
# of token objects that are yielded by the lexer. New operators and
|
4
|
-
# statements can be slipped
|
4
|
+
# statements can be slipped into the language with the proper
|
5
5
|
# precedence by adding new token objects to the lexer without altering
|
6
6
|
# the code for existing tokens. Pretty cool.
|
7
7
|
#
|
8
|
-
# lexer is
|
9
|
-
#
|
10
|
-
# lbp
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
# lbp-1 for right.
|
8
|
+
# lexer is must have an +#each+ method that returns token objects. A token
|
9
|
+
# has three methods:
|
10
|
+
# lbp::
|
11
|
+
# Returns the operator precedence. Higher numbers bind more tightly.
|
12
|
+
# nud(parser)::
|
13
|
+
# Called when the token is the first token in an expression,
|
14
|
+
# including a recursive call to +expresssion+ (i.e., subexpression).
|
15
|
+
# For example, +nud+ would be called for a unary operator, a literal,
|
16
|
+
# or for the "if" in the construct "if <cond> then <expr>". It is
|
17
|
+
# the token's responsibility to call +parser.expression+,
|
18
|
+
# +parser.expect+, and/or =parser.if?+ to handle the remainder of the
|
19
|
+
# (sub)expression, if any.
|
20
|
+
# led(parser, left)::
|
21
|
+
# Called when the token is preceeded by a subexpression, passed in
|
22
|
+
# as +left+. The token may be postfix or infix. It is the token's
|
23
|
+
# responsibility to call +parser.expression+, +parser.expect+,
|
24
|
+
# and/or +parser.if?+ to handle the remainder of the expression, if
|
25
|
+
# any, and combine it with +left+.
|
27
26
|
#
|
28
|
-
#
|
27
|
+
# Only +lbp+ is mandatory. +nud+ and +led+ will be called only when
|
28
|
+
# necessary, if ever. For example, +nud+ will never be called for a
|
29
|
+
# strictly infix token. If the token appears at the start of a
|
30
|
+
# (sub)expression then an exception that isn't at al appropriate
|
31
|
+
# to the abstraction will be thrown.
|
32
|
+
#
|
33
|
+
# +nud+ and +led+ can call +parser.expression+(+rbp+) to recursively
|
34
|
+
# parse the right subexpression. +rbp+ should be the token's +lbp+ for
|
35
|
+
# left-associativity, +lbp-1+ for right.
|
36
|
+
#
|
37
|
+
# "PrattParser.new(lexer).eval" will return the result of the parse.
|
29
38
|
#
|
30
39
|
# Syntax errors aren't handled at the moment and will cause ridiculous
|
31
|
-
# exceptions to be raised such as NoMethodError
|
32
|
-
|
33
|
-
#
|
34
|
-
# http://
|
35
|
-
# http://
|
40
|
+
# exceptions to be raised such as +NoMethodError+.
|
41
|
+
#
|
42
|
+
# Further reading:
|
43
|
+
# * http://javascript.crockford.com/tdop/tdop.html
|
44
|
+
# * http://effbot.org/zone/simple-top-down-parsing.htm
|
45
|
+
# * http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
|
36
46
|
|
37
47
|
class PrattParser
|
48
|
+
# Creates a new +PrattParser+. +lexer+ is an +Enumerable+, or something
|
49
|
+
# with an +#each+ method.
|
50
|
+
#
|
38
51
|
def initialize(lexer)
|
39
52
|
@lexer = Enumerator.new do |y|
|
40
53
|
lexer.each do |token|
|
@@ -46,11 +59,18 @@ class PrattParser
|
|
46
59
|
@token = nil
|
47
60
|
end
|
48
61
|
|
62
|
+
# Runs the tokens through the parse engine and returns the result,
|
63
|
+
# or throws some exception on parse error.
|
64
|
+
#
|
49
65
|
def eval
|
50
66
|
@token = @lexer.next
|
51
67
|
expression(0)
|
52
68
|
end
|
53
69
|
|
70
|
+
# For use by token +#led+ methods to parse subexpressions with
|
71
|
+
# binding power less than +rbp+. Whatever that means. Don't worry,
|
72
|
+
# it just does the Right Thing.
|
73
|
+
#
|
54
74
|
def expression(rbp)
|
55
75
|
t = @token
|
56
76
|
@token = @lexer.next
|
@@ -63,6 +83,17 @@ class PrattParser
|
|
63
83
|
left
|
64
84
|
end
|
65
85
|
|
86
|
+
# Checks whether the lookahead token is of the
|
87
|
+
# +expected_token_class+ and raises an exception if it isn't.
|
88
|
+
# Alternatively a block may be given; the block is passed the
|
89
|
+
# lookahead token and should raise an exception if it's not an
|
90
|
+
# expected token. In either case if no exception is raised then the
|
91
|
+
# lookahead token is consumed.
|
92
|
+
#
|
93
|
+
# +expect+ can be used to match the
|
94
|
+
# right parenthesis in a parenthesized expression, the colon in a
|
95
|
+
# "cond ? then : else" expression, etc.
|
96
|
+
#
|
66
97
|
def expect(expected_token_class = nil, &block)
|
67
98
|
block ||= lambda do |token|
|
68
99
|
if token.class != expected_token_class
|
@@ -73,6 +104,15 @@ class PrattParser
|
|
73
104
|
@token = @lexer.next
|
74
105
|
end
|
75
106
|
|
107
|
+
# Checks whether the lookahead token is of the
|
108
|
+
# +token_class+. If it is, consumes the lookahead token
|
109
|
+
# and returns truthy, else just returns falsy.
|
110
|
+
# Alternatively a block can be given which is passed the token
|
111
|
+
# and should return truthy or falssy.
|
112
|
+
#
|
113
|
+
# +if?+ can be used for optional tokens such as the +else+
|
114
|
+
# clause in "if cond then val1 [else val2] end".
|
115
|
+
#
|
76
116
|
def if?(token_class, &block)
|
77
117
|
block ||= lambda do |token|
|
78
118
|
token.class == token_class
|
data/pratt_parser.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.test_files = `git ls-files -- spec/*`.split("\n")
|
9
9
|
gem.name = "pratt_parser"
|
10
10
|
gem.require_paths = ["lib"]
|
11
|
-
gem.version = "0.1.
|
11
|
+
gem.version = "0.1.1"
|
12
12
|
gem.license = "MIT"
|
13
13
|
# Needs Enumerator which was added in 1.9.
|
14
14
|
gem.required_ruby_version = ">= 1.9"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pratt_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom May
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A Pratt parser. Create token objects to define your language. Create
|
14
14
|
a lexer to return tokens. Call the parser to grok the language.
|
@@ -20,7 +20,7 @@ extra_rdoc_files: []
|
|
20
20
|
files:
|
21
21
|
- ".gitignore"
|
22
22
|
- ".travis.yml"
|
23
|
-
- CHANGELOG
|
23
|
+
- CHANGELOG.md
|
24
24
|
- Gemfile
|
25
25
|
- Gemfile.lock
|
26
26
|
- LICENSE.md
|