d-parse 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +104 -0
- data/Guardfile +3 -0
- data/LICENSE +19 -0
- data/NEWS.md +0 -0
- data/README.md +137 -0
- data/Rakefile +14 -0
- data/d-parse.gemspec +26 -0
- data/lib/d-parse.rb +10 -0
- data/lib/d-parse/dsl.rb +71 -0
- data/lib/d-parse/failure.rb +46 -0
- data/lib/d-parse/parser.rb +102 -0
- data/lib/d-parse/parsers.rb +26 -0
- data/lib/d-parse/parsers/combinators/alt.rb +32 -0
- data/lib/d-parse/parsers/combinators/repeat.rb +42 -0
- data/lib/d-parse/parsers/combinators/seq.rb +49 -0
- data/lib/d-parse/parsers/highlevel/char_in.rb +13 -0
- data/lib/d-parse/parsers/highlevel/intersperse.rb +18 -0
- data/lib/d-parse/parsers/highlevel/json.rb +237 -0
- data/lib/d-parse/parsers/highlevel/opt.rb +16 -0
- data/lib/d-parse/parsers/highlevel/string.rb +13 -0
- data/lib/d-parse/parsers/highlevel/whitespace_char.rb +15 -0
- data/lib/d-parse/parsers/modifiers/capturing.rb +13 -0
- data/lib/d-parse/parsers/modifiers/describe.rb +28 -0
- data/lib/d-parse/parsers/modifiers/ignore.rb +17 -0
- data/lib/d-parse/parsers/modifiers/lazy.rb +18 -0
- data/lib/d-parse/parsers/modifiers/map.rb +24 -0
- data/lib/d-parse/parsers/primitives/any.rb +22 -0
- data/lib/d-parse/parsers/primitives/bind.rb +25 -0
- data/lib/d-parse/parsers/primitives/char.rb +27 -0
- data/lib/d-parse/parsers/primitives/char_not.rb +27 -0
- data/lib/d-parse/parsers/primitives/char_not_in.rb +30 -0
- data/lib/d-parse/parsers/primitives/eof.rb +21 -0
- data/lib/d-parse/parsers/primitives/except.rb +33 -0
- data/lib/d-parse/parsers/primitives/fail.rb +17 -0
- data/lib/d-parse/parsers/primitives/succeed.rb +13 -0
- data/lib/d-parse/position.rb +31 -0
- data/lib/d-parse/success.rb +35 -0
- data/lib/d-parse/version.rb +3 -0
- data/samples/parse-bind +25 -0
- data/samples/parse-csv +19 -0
- data/samples/parse-errortest +45 -0
- data/samples/parse-fun +61 -0
- data/samples/parse-json +18 -0
- data/samples/parse-readme +27 -0
- data/spec/d-parse/failure_spec.rb +36 -0
- data/spec/d-parse/parser_spec.rb +77 -0
- data/spec/d-parse/parsers/alt_spec.rb +48 -0
- data/spec/d-parse/parsers/any_spec.rb +15 -0
- data/spec/d-parse/parsers/bind_spec.rb +31 -0
- data/spec/d-parse/parsers/capture_spec.rb +11 -0
- data/spec/d-parse/parsers/char_in_spec.rb +22 -0
- data/spec/d-parse/parsers/char_not_in_spec.rb +23 -0
- data/spec/d-parse/parsers/char_not_spec.rb +16 -0
- data/spec/d-parse/parsers/char_spec.rb +22 -0
- data/spec/d-parse/parsers/describe_spec.rb +22 -0
- data/spec/d-parse/parsers/end_of_input_spec.rb +20 -0
- data/spec/d-parse/parsers/except_spec.rb +20 -0
- data/spec/d-parse/parsers/fail_spec.rb +12 -0
- data/spec/d-parse/parsers/intersperse_spec.rb +18 -0
- data/spec/d-parse/parsers/json_spec.rb +69 -0
- data/spec/d-parse/parsers/lazy_spec.rb +16 -0
- data/spec/d-parse/parsers/map_spec.rb +54 -0
- data/spec/d-parse/parsers/optional_spec.rb +16 -0
- data/spec/d-parse/parsers/or_spec.rb +26 -0
- data/spec/d-parse/parsers/repeat_spec.rb +40 -0
- data/spec/d-parse/parsers/sequence_spec.rb +52 -0
- data/spec/d-parse/parsers/string_spec.rb +19 -0
- data/spec/d-parse/parsers/succeed_spec.rb +12 -0
- data/spec/d-parse/parsers/whitespace_char_spec.rb +14 -0
- data/spec/spec_helper.rb +97 -0
- metadata +140 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
module DParse
|
2
|
+
class Parser
|
3
|
+
def apply(input)
|
4
|
+
# FIXME: convert input to chars first
|
5
|
+
read(input, DParse::Position.new)
|
6
|
+
end
|
7
|
+
|
8
|
+
def match?(input)
|
9
|
+
apply(input).success?
|
10
|
+
end
|
11
|
+
|
12
|
+
def read(_input, _pos)
|
13
|
+
raise NotImplementedError
|
14
|
+
end
|
15
|
+
|
16
|
+
def expectation_message
|
17
|
+
'?'
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect
|
21
|
+
raise NotImplementedError
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
inspect
|
26
|
+
end
|
27
|
+
|
28
|
+
def first
|
29
|
+
map { |d| d[0] }
|
30
|
+
end
|
31
|
+
|
32
|
+
def second
|
33
|
+
map { |d| d[1] }
|
34
|
+
end
|
35
|
+
|
36
|
+
def select_odd
|
37
|
+
map { |d| d.select.with_index { |_, i| i.odd? } }
|
38
|
+
end
|
39
|
+
|
40
|
+
def select_even
|
41
|
+
map { |d| d.select.with_index { |_, i| i.even? } }
|
42
|
+
end
|
43
|
+
|
44
|
+
def flatten
|
45
|
+
map { |d| d.is_a?(Array) ? d.reduce(:+) : d }
|
46
|
+
end
|
47
|
+
|
48
|
+
def compact
|
49
|
+
map { |d, _, _| d.compact }
|
50
|
+
end
|
51
|
+
|
52
|
+
def map(&block)
|
53
|
+
DParse::Parsers::Map.new(self, &block)
|
54
|
+
end
|
55
|
+
|
56
|
+
def ignore
|
57
|
+
DParse::Parsers::Ignore.new(self)
|
58
|
+
end
|
59
|
+
|
60
|
+
def bind(&block)
|
61
|
+
DParse::Parsers::Bind.new(self, &block)
|
62
|
+
end
|
63
|
+
|
64
|
+
def capture
|
65
|
+
DParse::Parsers::Capturing.new(self)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def display(char)
|
71
|
+
case char
|
72
|
+
when nil
|
73
|
+
'end of input'
|
74
|
+
when "\n"
|
75
|
+
'line break (LF)'
|
76
|
+
when "\r"
|
77
|
+
'line break (CR)'
|
78
|
+
else
|
79
|
+
quote_char =
|
80
|
+
if char == '\''
|
81
|
+
'"'
|
82
|
+
else
|
83
|
+
'\''
|
84
|
+
end
|
85
|
+
|
86
|
+
display_char =
|
87
|
+
case char
|
88
|
+
when '\\'
|
89
|
+
'\\'
|
90
|
+
when '"'
|
91
|
+
'"'
|
92
|
+
when '\''
|
93
|
+
'\''
|
94
|
+
else
|
95
|
+
char.inspect.gsub(/^"|"$/, '')
|
96
|
+
end
|
97
|
+
|
98
|
+
quote_char + display_char + quote_char
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require_relative 'parsers/primitives/any'
|
2
|
+
require_relative 'parsers/primitives/bind'
|
3
|
+
require_relative 'parsers/primitives/char'
|
4
|
+
require_relative 'parsers/primitives/char_not'
|
5
|
+
require_relative 'parsers/primitives/char_not_in'
|
6
|
+
require_relative 'parsers/primitives/eof'
|
7
|
+
require_relative 'parsers/primitives/fail'
|
8
|
+
require_relative 'parsers/primitives/except'
|
9
|
+
require_relative 'parsers/primitives/succeed'
|
10
|
+
|
11
|
+
require_relative 'parsers/combinators/alt'
|
12
|
+
require_relative 'parsers/combinators/seq'
|
13
|
+
require_relative 'parsers/combinators/repeat'
|
14
|
+
|
15
|
+
require_relative 'parsers/modifiers/capturing'
|
16
|
+
require_relative 'parsers/modifiers/lazy'
|
17
|
+
require_relative 'parsers/modifiers/map'
|
18
|
+
require_relative 'parsers/modifiers/ignore'
|
19
|
+
require_relative 'parsers/modifiers/describe'
|
20
|
+
|
21
|
+
require_relative 'parsers/highlevel/char_in'
|
22
|
+
require_relative 'parsers/highlevel/intersperse'
|
23
|
+
require_relative 'parsers/highlevel/opt'
|
24
|
+
require_relative 'parsers/highlevel/json'
|
25
|
+
require_relative 'parsers/highlevel/string'
|
26
|
+
require_relative 'parsers/highlevel/whitespace_char'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module DParse
|
2
|
+
module Parsers
|
3
|
+
class Alt < DParse::Parser
|
4
|
+
def initialize(*parsers)
|
5
|
+
# FIXME: ensure >0 parsers are provided
|
6
|
+
@parsers = parsers
|
7
|
+
end
|
8
|
+
|
9
|
+
def read(input, pos)
|
10
|
+
init = DParse::Failure.new(input, DParse::Position::FAR_BEHIND)
|
11
|
+
@parsers.reduce(init) do |old_res, parser|
|
12
|
+
case old_res
|
13
|
+
when DParse::Success
|
14
|
+
old_res
|
15
|
+
when DParse::Failure
|
16
|
+
new_res = parser.read(input, pos)
|
17
|
+
case new_res
|
18
|
+
when DParse::Success
|
19
|
+
new_res.with_best_failure(old_res)
|
20
|
+
when DParse::Failure
|
21
|
+
[old_res, new_res].max_by { |r| r.pos.index }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def inspect
|
28
|
+
"alt(#{@parsers.map(&:inspect).join(',')})"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module DParse
|
2
|
+
module Parsers
|
3
|
+
class Repeat < DParse::Parser
|
4
|
+
def initialize(parser)
|
5
|
+
@parser = parser
|
6
|
+
end
|
7
|
+
|
8
|
+
def read(input, pos)
|
9
|
+
prev_res = Success.new(input, pos, data: [])
|
10
|
+
best_failure = nil
|
11
|
+
|
12
|
+
loop do
|
13
|
+
new_res = @parser.read(input, prev_res.pos)
|
14
|
+
best_failure = find_best_failure(best_failure, new_res)
|
15
|
+
|
16
|
+
if prev_res.pos.index == new_res.pos.index
|
17
|
+
return prev_res.with_best_failure(best_failure)
|
18
|
+
end
|
19
|
+
|
20
|
+
case new_res
|
21
|
+
when Success
|
22
|
+
prev_res = new_res.map { |d| prev_res.data + [d] }
|
23
|
+
else
|
24
|
+
return prev_res.with_best_failure(best_failure)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def inspect
|
30
|
+
"repeat(#{@parser})"
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def find_best_failure(*results)
|
36
|
+
results
|
37
|
+
.select { |r| r.is_a?(DParse::Failure) }
|
38
|
+
.max_by { |r| r.pos.index }
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module DParse
|
2
|
+
module Parsers
|
3
|
+
class Seq < DParse::Parser
|
4
|
+
attr_reader :parsers
|
5
|
+
|
6
|
+
def initialize(*parsers)
|
7
|
+
@parsers = parsers
|
8
|
+
end
|
9
|
+
|
10
|
+
def read(input, pos)
|
11
|
+
@parsers.reduce(Success.new(input, pos, data: [])) do |res, parser|
|
12
|
+
case res
|
13
|
+
when Success
|
14
|
+
new_res = parser.read(input, res.pos).map { |d| res.data + [d] }
|
15
|
+
with_best_failure(new_res, res)
|
16
|
+
when Failure
|
17
|
+
res
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def inspect
|
23
|
+
"seq(#{@parsers.map(&:inspect).join(',')})"
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
# Returns a Success or Failure that most accurately describes the
|
29
|
+
# failure, meaning the result that has the highest position.
|
30
|
+
def with_best_failure(new_res, res)
|
31
|
+
results = [new_res, res]
|
32
|
+
results += [new_res.best_failure] if new_res.is_a?(Success)
|
33
|
+
results += [res.best_failure] if res.is_a?(Success)
|
34
|
+
|
35
|
+
best_failure =
|
36
|
+
results
|
37
|
+
.select { |r| r.is_a?(DParse::Failure) }
|
38
|
+
.max_by { |r| r.pos.index }
|
39
|
+
|
40
|
+
case new_res
|
41
|
+
when Success
|
42
|
+
new_res.with_best_failure(best_failure)
|
43
|
+
when Failure
|
44
|
+
best_failure
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module DParse
|
2
|
+
module Parsers
|
3
|
+
class CharIn < DParse::Parser
|
4
|
+
def self.new(chars)
|
5
|
+
DParse::Parsers::Alt.new(*chars.map { |c| DParse::Parsers::Char.new(c) })
|
6
|
+
end
|
7
|
+
|
8
|
+
def initialize(*)
|
9
|
+
raise ArgumentError, "#{self.class} is not supposed to be initialized"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module DParse
|
2
|
+
module Parsers
|
3
|
+
class Intersperse < DParse::Parser
|
4
|
+
def self.new(a, b)
|
5
|
+
DParse::Parsers::Seq.new(
|
6
|
+
a,
|
7
|
+
DParse::Parsers::Repeat.new(
|
8
|
+
DParse::Parsers::Seq.new(b, a),
|
9
|
+
).flatten.map { |d| d || [] },
|
10
|
+
).map { |d| [d[0]] + d[1] }
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(*)
|
14
|
+
raise ArgumentError, "#{self.class} is not supposed to be initialized"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,237 @@
|
|
1
|
+
module DParse
|
2
|
+
module Parsers
|
3
|
+
class JSON < DParse::Parser
|
4
|
+
def self.new
|
5
|
+
extend DParse::DSL
|
6
|
+
|
7
|
+
json_value = nil # Undefined for now
|
8
|
+
|
9
|
+
whitespace =
|
10
|
+
repeat(
|
11
|
+
alt(
|
12
|
+
char(' '),
|
13
|
+
char("\t"),
|
14
|
+
char("\r"),
|
15
|
+
char("\n"),
|
16
|
+
),
|
17
|
+
)
|
18
|
+
|
19
|
+
# String
|
20
|
+
|
21
|
+
json_digit_hex =
|
22
|
+
alt(
|
23
|
+
char_in('0'..'9'),
|
24
|
+
char_in('a'..'f'),
|
25
|
+
char_in('A'..'F'),
|
26
|
+
)
|
27
|
+
|
28
|
+
json_string =
|
29
|
+
seq(
|
30
|
+
char('"').ignore,
|
31
|
+
repeat(
|
32
|
+
alt(
|
33
|
+
char_not_in(%w(" \\ )).capture,
|
34
|
+
seq(
|
35
|
+
char('\\').ignore,
|
36
|
+
alt(
|
37
|
+
char_in(%w(" \\ / b f n r t)).capture,
|
38
|
+
seq(
|
39
|
+
char('u').capture,
|
40
|
+
seq(
|
41
|
+
json_digit_hex,
|
42
|
+
json_digit_hex,
|
43
|
+
json_digit_hex,
|
44
|
+
json_digit_hex,
|
45
|
+
).capture,
|
46
|
+
),
|
47
|
+
),
|
48
|
+
).compact,
|
49
|
+
),
|
50
|
+
),
|
51
|
+
char('"').ignore,
|
52
|
+
).compact.first.map do |d, _, _|
|
53
|
+
new_chars =
|
54
|
+
d.map do |char|
|
55
|
+
case char
|
56
|
+
when ::String
|
57
|
+
char
|
58
|
+
when ::Array
|
59
|
+
case char[0]
|
60
|
+
when '"'
|
61
|
+
'"'
|
62
|
+
when '\\'
|
63
|
+
'\\'
|
64
|
+
when '/'
|
65
|
+
'/'
|
66
|
+
when 'b'
|
67
|
+
"\b"
|
68
|
+
when 'f'
|
69
|
+
"\f"
|
70
|
+
when 'n'
|
71
|
+
"\n"
|
72
|
+
when 'r'
|
73
|
+
"\r"
|
74
|
+
when 't'
|
75
|
+
"\t"
|
76
|
+
else
|
77
|
+
if char[0].is_a?(Array) && char[0][0] == 'u'
|
78
|
+
char[0][1].to_i(16).chr(Encoding::UTF_8)
|
79
|
+
else
|
80
|
+
raise "Unexpected escape sequence #{char[0].inspect}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
else
|
84
|
+
raise "??? #{char.inspect} (#{char.class})"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
new_chars.join('')
|
89
|
+
end
|
90
|
+
|
91
|
+
# Array
|
92
|
+
|
93
|
+
json_elements =
|
94
|
+
intersperse(
|
95
|
+
lazy { json_value },
|
96
|
+
seq(
|
97
|
+
whitespace,
|
98
|
+
char(','),
|
99
|
+
whitespace,
|
100
|
+
).ignore,
|
101
|
+
).compact
|
102
|
+
|
103
|
+
json_array =
|
104
|
+
seq(
|
105
|
+
char('[').ignore,
|
106
|
+
whitespace.ignore,
|
107
|
+
json_elements,
|
108
|
+
whitespace.ignore,
|
109
|
+
char(']').ignore,
|
110
|
+
).compact.first
|
111
|
+
|
112
|
+
# Misc
|
113
|
+
|
114
|
+
json_true =
|
115
|
+
string('true').map { true }
|
116
|
+
|
117
|
+
json_false =
|
118
|
+
string('false').map { false }
|
119
|
+
|
120
|
+
json_null =
|
121
|
+
string('null').map { nil }
|
122
|
+
|
123
|
+
# Number
|
124
|
+
|
125
|
+
json_digit =
|
126
|
+
char_in('0'..'9')
|
127
|
+
|
128
|
+
json_number =
|
129
|
+
seq(
|
130
|
+
opt(char('-')).capture,
|
131
|
+
alt(
|
132
|
+
char('0'),
|
133
|
+
seq(
|
134
|
+
char_in('1'..'9'),
|
135
|
+
repeat(json_digit),
|
136
|
+
),
|
137
|
+
).capture,
|
138
|
+
opt(
|
139
|
+
seq(
|
140
|
+
char('.').ignore,
|
141
|
+
repeat(json_digit).capture,
|
142
|
+
).compact,
|
143
|
+
),
|
144
|
+
opt(
|
145
|
+
seq(
|
146
|
+
alt(char('e'), char('E')),
|
147
|
+
alt(char('+'), char('-'), succeed).capture,
|
148
|
+
repeat(json_digit).capture,
|
149
|
+
).compact,
|
150
|
+
),
|
151
|
+
).map do |d, _, _|
|
152
|
+
sign_char = d[0]
|
153
|
+
digits_before_dot = d[1]
|
154
|
+
digits_after_dot = d[2]
|
155
|
+
sci_data = d[3]
|
156
|
+
|
157
|
+
base =
|
158
|
+
if digits_after_dot
|
159
|
+
[sign_char, digits_before_dot, '.', digits_after_dot].join('').to_f
|
160
|
+
else
|
161
|
+
[sign_char, digits_before_dot].join('').to_i(10)
|
162
|
+
end
|
163
|
+
|
164
|
+
factor =
|
165
|
+
if sci_data
|
166
|
+
sign_char = sci_data[0]
|
167
|
+
exponent = sci_data[1]
|
168
|
+
|
169
|
+
unsigned_factor = exponent.to_i(10)
|
170
|
+
|
171
|
+
case sign_char
|
172
|
+
when '+', ''
|
173
|
+
10**unsigned_factor
|
174
|
+
when '-'
|
175
|
+
- 10**unsigned_factor
|
176
|
+
end
|
177
|
+
else
|
178
|
+
1
|
179
|
+
end
|
180
|
+
|
181
|
+
base * factor
|
182
|
+
end
|
183
|
+
|
184
|
+
# Object
|
185
|
+
|
186
|
+
json_pair =
|
187
|
+
seq(
|
188
|
+
json_string,
|
189
|
+
whitespace.ignore,
|
190
|
+
char(':').ignore,
|
191
|
+
whitespace.ignore,
|
192
|
+
lazy { json_value },
|
193
|
+
).compact
|
194
|
+
|
195
|
+
json_pairs =
|
196
|
+
intersperse(
|
197
|
+
json_pair,
|
198
|
+
seq(
|
199
|
+
whitespace,
|
200
|
+
char(','),
|
201
|
+
whitespace,
|
202
|
+
).ignore,
|
203
|
+
).compact.map { |d| Hash[d] }
|
204
|
+
|
205
|
+
json_object =
|
206
|
+
seq(
|
207
|
+
char('{').ignore,
|
208
|
+
whitespace.ignore,
|
209
|
+
json_pairs,
|
210
|
+
whitespace.ignore,
|
211
|
+
char('}').ignore,
|
212
|
+
).compact.first
|
213
|
+
|
214
|
+
# Value
|
215
|
+
|
216
|
+
json_value =
|
217
|
+
alt(
|
218
|
+
json_string,
|
219
|
+
json_number,
|
220
|
+
json_object,
|
221
|
+
json_array,
|
222
|
+
json_true,
|
223
|
+
json_false,
|
224
|
+
json_null,
|
225
|
+
)
|
226
|
+
|
227
|
+
# All
|
228
|
+
|
229
|
+
json_object
|
230
|
+
end
|
231
|
+
|
232
|
+
def initialize(*)
|
233
|
+
raise ArgumentError, "#{self.class} is not supposed to be initialized"
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|