edn_turbo 0.5.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/CHANGELOG.md +15 -0
- data/Dockerfile +34 -0
- data/LICENSE +1 -1
- data/README.md +8 -22
- data/Rakefile +22 -19
- data/bin/build_docker_image.sh +11 -0
- data/bin/console.sh +5 -0
- data/docker-compose.yml +10 -0
- data/ext/edn_turbo/edn_parser.cc +336 -314
- data/ext/edn_turbo/edn_parser.rl +63 -41
- data/ext/edn_turbo/extconf.rb +24 -1
- data/ext/edn_turbo/main.cc +189 -166
- data/ext/edn_turbo/parser.h +104 -76
- data/ext/edn_turbo/parser_def.cc +204 -182
- data/ext/edn_turbo/util.cc +241 -219
- data/ext/edn_turbo/util.h +48 -26
- data/ext/edn_turbo/util_unicode.cc +41 -19
- data/ext/edn_turbo/util_unicode.h +29 -7
- data/lib/edn_turbo.rb +22 -0
- data/lib/edn_turbo/edn_parser.rb +22 -0
- data/lib/edn_turbo/version.rb +23 -3
- data/spec/edn_turbo/edn_parser_spec.rb +384 -0
- data/spec/spec_helper.rb +96 -0
- metadata +42 -11
- data/test/test_output_diff.rb +0 -408
data/ext/edn_turbo/parser.h
CHANGED
@@ -1,86 +1,114 @@
|
|
1
|
+
// The MIT License (MIT)
|
2
|
+
|
3
|
+
// Copyright (c) 2015-2019 Ed Porras
|
4
|
+
|
5
|
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
// of this software and associated documentation files (the "Software"), to deal
|
7
|
+
// in the Software without restriction, including without limitation the rights
|
8
|
+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
// copies of the Software, and to permit persons to whom the Software is
|
10
|
+
// furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
// The above copyright notice and this permission notice shall be included in
|
13
|
+
// all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
// THE SOFTWARE.
|
22
|
+
|
1
23
|
#pragma once
|
2
24
|
|
3
25
|
#include <string>
|
4
26
|
#include <vector>
|
5
27
|
#include <stack>
|
6
28
|
|
7
|
-
|
8
29
|
namespace edn
|
9
30
|
{
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
31
|
+
//
|
32
|
+
// C-extension EDN Parser class representation
|
33
|
+
class Parser
|
34
|
+
{
|
35
|
+
public:
|
36
|
+
Parser() :
|
37
|
+
p(nullptr), pe(nullptr), eof(nullptr),
|
38
|
+
core_io(nullptr), read_io(Qnil),
|
39
|
+
io_buffer(nullptr), io_buffer_len(0),
|
40
|
+
line_number(1) {
|
41
|
+
new_meta_list();
|
42
|
+
}
|
43
|
+
~Parser();
|
44
|
+
|
45
|
+
// prohibit
|
46
|
+
Parser(const Parser&) = delete;
|
47
|
+
Parser(const Parser&&) = delete;
|
48
|
+
Parser& operator=(const Parser&) = delete;
|
49
|
+
Parser& operator=(const Parser&&) = delete;
|
50
|
+
|
51
|
+
// change input source
|
52
|
+
void set_source(const char* src, std::size_t len);
|
53
|
+
void set_source(FILE* fp);
|
54
|
+
void set_source(VALUE string_io);
|
55
|
+
|
56
|
+
bool is_eof() const { return (p == eof); }
|
57
|
+
|
58
|
+
// parses an entire stream
|
59
|
+
VALUE parse(const char* s, std::size_t len);
|
60
|
+
|
61
|
+
// returns the next element in the current stream
|
62
|
+
VALUE next();
|
63
|
+
|
64
|
+
private:
|
65
|
+
// ragel needs these
|
66
|
+
const char* p;
|
67
|
+
const char* pe;
|
68
|
+
const char* eof;
|
69
|
+
FILE* core_io; // for IO streams
|
70
|
+
VALUE read_io; // for non-core IO that responds to read()
|
71
|
+
char* io_buffer;
|
72
|
+
uint32_t io_buffer_len;
|
73
|
+
std::size_t line_number;
|
74
|
+
std::vector<VALUE> discard;
|
75
|
+
std::stack<std::vector<VALUE>* > metadata;
|
76
|
+
|
77
|
+
void reset_state();
|
78
|
+
void fill_buf();
|
79
|
+
|
80
|
+
const char* parse_value (const char *p, const char *pe, VALUE& v);
|
81
|
+
const char* parse_string (const char *p, const char *pe, VALUE& v);
|
82
|
+
const char* parse_keyword (const char *p, const char *pe, VALUE& v);
|
83
|
+
const char* parse_decimal (const char *p, const char *pe, VALUE& v);
|
84
|
+
const char* parse_integer (const char *p, const char *pe, VALUE& v);
|
85
|
+
const char* parse_operator(const char *p, const char *pe, VALUE& v);
|
86
|
+
const char* parse_esc_char(const char *p, const char *pe, VALUE& v);
|
87
|
+
const char* parse_symbol (const char *p, const char *pe, VALUE& v);
|
88
|
+
const char* parse_vector (const char *p, const char *pe, VALUE& v);
|
89
|
+
const char* parse_list (const char *p, const char *pe, VALUE& v);
|
90
|
+
const char* parse_map (const char *p, const char *pe, VALUE& v);
|
91
|
+
const char* parse_dispatch(const char *p, const char *pe, VALUE& v);
|
92
|
+
const char* parse_set (const char *p, const char *pe, VALUE& v);
|
93
|
+
const char* parse_discard (const char *p, const char *pe);
|
94
|
+
const char* parse_tagged (const char *p, const char *pe, VALUE& v);
|
95
|
+
const char* parse_meta (const char *p, const char *pe);
|
96
|
+
|
97
|
+
enum eTokenState { TOKEN_OK, TOKEN_ERROR, TOKEN_IS_DISCARD, TOKEN_IS_META };
|
98
|
+
|
99
|
+
eTokenState parse_next(VALUE& value);
|
100
|
+
|
101
|
+
// metadata
|
102
|
+
VALUE ruby_meta();
|
103
|
+
void new_meta_list() { metadata.push( new std::vector<VALUE>() ); }
|
104
|
+
void del_top_meta_list() { delete metadata.top(); metadata.pop(); }
|
105
|
+
void append_to_meta(VALUE m) { metadata.top()->push_back(m); }
|
106
|
+
bool meta_empty() const { return metadata.top()->empty(); }
|
107
|
+
std::size_t meta_size() const { return metadata.top()->size(); }
|
108
|
+
|
109
|
+
void error(const std::string& f, const std::string& err, char c) const;
|
110
|
+
void error(const std::string& f, char err_c) const { error(f, "", err_c); }
|
111
|
+
void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
|
112
|
+
}; // Parser
|
85
113
|
|
86
114
|
} // namespace
|
data/ext/edn_turbo/parser_def.cc
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
// The MIT License (MIT)
|
2
|
+
|
3
|
+
// Copyright (c) 2015-2019 Ed Porras
|
4
|
+
|
5
|
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
// of this software and associated documentation files (the "Software"), to deal
|
7
|
+
// in the Software without restriction, including without limitation the rights
|
8
|
+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
// copies of the Software, and to permit persons to whom the Software is
|
10
|
+
// furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
// The above copyright notice and this permission notice shall be included in
|
13
|
+
// all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
// THE SOFTWARE.
|
22
|
+
|
1
23
|
#include <iostream>
|
2
24
|
#include <string>
|
3
25
|
#include <stack>
|
@@ -9,189 +31,189 @@
|
|
9
31
|
|
10
32
|
namespace edn
|
11
33
|
{
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
34
|
+
//
|
35
|
+
// parser destructor
|
36
|
+
Parser::~Parser()
|
37
|
+
{
|
38
|
+
reset_state();
|
39
|
+
del_top_meta_list();
|
40
|
+
|
41
|
+
if (io_buffer) {
|
42
|
+
free(reinterpret_cast<void*>(io_buffer));
|
43
|
+
}
|
44
|
+
}
|
23
45
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
}
|
116
|
-
str_buf += c;
|
117
|
-
}
|
118
|
-
|
119
|
-
} else if (read_io != Qnil) {
|
120
|
-
// StringIO, etc. Call read() from ruby side
|
121
|
-
VALUE v = edn::util::ruby_io_read(read_io);
|
122
|
-
if (TYPE(v) == T_STRING) {
|
123
|
-
str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
|
124
|
-
}
|
125
|
-
}
|
126
|
-
|
127
|
-
// set the buffer to read from
|
128
|
-
if (str_buf.length() > 0) {
|
129
|
-
// first time when io_buffer is NULL, pe & p = 0
|
130
|
-
uintmax_t new_length = (pe - p) + str_buf.length();
|
131
|
-
if (new_length > (((uintmax_t) 1 << 32) - 1)) {
|
132
|
-
// icu -> 32-bit. TODO: handle
|
133
|
-
rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
|
134
|
-
}
|
135
|
-
char* start = NULL;
|
136
|
-
|
137
|
-
// allocate or extend storage needed
|
138
|
-
if (!io_buffer) {
|
139
|
-
io_buffer = reinterpret_cast<char*>(malloc(new_length));
|
140
|
-
start = io_buffer;
|
141
|
-
} else if (io_buffer_len < new_length) {
|
142
|
-
// resize the buffer
|
143
|
-
io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
|
144
|
-
}
|
145
|
-
|
146
|
-
if (!start) {
|
147
|
-
// appending to the buffer but move the data not yet
|
148
|
-
// parsed first to the front
|
149
|
-
memmove(io_buffer, p, pe - p);
|
150
|
-
start = io_buffer + (pe - p);
|
46
|
+
// =================================================================
|
47
|
+
// for token-by-token parsing. If a discard or metadata is parsed,
|
48
|
+
// attempt to get the following value
|
49
|
+
//
|
50
|
+
VALUE Parser::next()
|
51
|
+
{
|
52
|
+
VALUE token = EDN_EOF_CONST;
|
53
|
+
|
54
|
+
// buffer if reading from an IO
|
55
|
+
if (core_io || (read_io != Qnil)) {
|
56
|
+
fill_buf();
|
57
|
+
}
|
58
|
+
|
59
|
+
while (!is_eof())
|
60
|
+
{
|
61
|
+
// fetch a token. If it's metadata or discard
|
62
|
+
VALUE v = EDN_EOF_CONST;
|
63
|
+
eTokenState state = parse_next(v);
|
64
|
+
|
65
|
+
if (state == TOKEN_OK) {
|
66
|
+
// valid token
|
67
|
+
token = v;
|
68
|
+
break;
|
69
|
+
}
|
70
|
+
else if (state == TOKEN_ERROR) {
|
71
|
+
token = EDN_EOF_CONST;
|
72
|
+
break;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
return token;
|
77
|
+
}
|
78
|
+
|
79
|
+
// reset parsing state
|
80
|
+
//
|
81
|
+
void Parser::reset_state()
|
82
|
+
{
|
83
|
+
line_number = 1;
|
84
|
+
discard.clear();
|
85
|
+
|
86
|
+
// remove any remaining levels except for the first
|
87
|
+
while (metadata.size() > 1) {
|
88
|
+
del_top_meta_list();
|
89
|
+
}
|
90
|
+
// but clear any metadata on the first
|
91
|
+
metadata.top()->clear();
|
92
|
+
|
93
|
+
// clean up
|
94
|
+
core_io = nullptr;
|
95
|
+
read_io = Qnil;
|
96
|
+
p = pe = eof = nullptr;
|
97
|
+
}
|
98
|
+
|
99
|
+
//
|
100
|
+
// set a new source
|
101
|
+
void Parser::set_source(const char* src, std::size_t len)
|
102
|
+
{
|
103
|
+
reset_state();
|
104
|
+
// set ragel state
|
105
|
+
p = src;
|
106
|
+
pe = src + len;
|
107
|
+
eof = pe;
|
108
|
+
}
|
109
|
+
|
110
|
+
void Parser::set_source(FILE* fp)
|
111
|
+
{
|
112
|
+
reset_state();
|
113
|
+
core_io = fp;
|
114
|
+
}
|
115
|
+
|
116
|
+
void Parser::set_source(VALUE str_io)
|
117
|
+
{
|
118
|
+
reset_state();
|
119
|
+
read_io = str_io;
|
120
|
+
}
|
121
|
+
|
122
|
+
//
|
123
|
+
// for IO sources, read and fill a buffer
|
124
|
+
void Parser::fill_buf()
|
125
|
+
{
|
126
|
+
std::string str_buf;
|
127
|
+
|
128
|
+
// read as much data available
|
129
|
+
if (core_io) {
|
130
|
+
// ruby core IO types
|
131
|
+
char c;
|
132
|
+
while (1)
|
133
|
+
{
|
134
|
+
c = fgetc(core_io);
|
135
|
+
if (c == EOF) {
|
136
|
+
break;
|
151
137
|
}
|
138
|
+
str_buf += c;
|
139
|
+
}
|
140
|
+
|
141
|
+
} else if (read_io != Qnil) {
|
142
|
+
// StringIO, etc. Call read() from ruby side
|
143
|
+
VALUE v = edn::util::ruby_io_read(read_io);
|
144
|
+
if (TYPE(v) == T_STRING) {
|
145
|
+
str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
// set the buffer to read from
|
150
|
+
if (str_buf.length() > 0) {
|
151
|
+
// first time when io_buffer is null, pe & p = 0
|
152
|
+
uintmax_t new_length = (pe - p) + str_buf.length();
|
153
|
+
if (new_length > (((uintmax_t) 1 << 32) - 1)) {
|
154
|
+
// icu -> 32-bit. TODO: handle
|
155
|
+
rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
|
156
|
+
}
|
157
|
+
char* start = nullptr;
|
158
|
+
|
159
|
+
// allocate or extend storage needed
|
160
|
+
if (!io_buffer) {
|
161
|
+
io_buffer = reinterpret_cast<char*>(malloc(new_length));
|
162
|
+
start = io_buffer;
|
163
|
+
} else if (io_buffer_len < new_length) {
|
164
|
+
// resize the buffer
|
165
|
+
io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
|
166
|
+
}
|
167
|
+
|
168
|
+
if (!start) {
|
169
|
+
// appending to the buffer but move the data not yet
|
170
|
+
// parsed first to the front
|
171
|
+
memmove(io_buffer, p, pe - p);
|
172
|
+
start = io_buffer + (pe - p);
|
173
|
+
}
|
152
174
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
175
|
+
// and copy
|
176
|
+
memcpy(start, str_buf.c_str(), str_buf.length());
|
177
|
+
io_buffer_len = static_cast<uint32_t>(new_length);
|
178
|
+
|
179
|
+
// set ragel state
|
180
|
+
p = io_buffer;
|
181
|
+
pe = p + new_length;
|
182
|
+
eof = pe;
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
// =================================================================
|
187
|
+
// METADATA
|
188
|
+
//
|
189
|
+
// returns an array of metadata value(s) saved in reverse order
|
190
|
+
// (right to left) - the ruby side will interpret this
|
191
|
+
VALUE Parser::ruby_meta()
|
192
|
+
{
|
193
|
+
VALUE m_ary = rb_ary_new();
|
194
|
+
|
195
|
+
// pop from the back of the top-most list
|
196
|
+
while (!metadata.top()->empty()) {
|
197
|
+
rb_ary_push(m_ary, metadata.top()->back());
|
198
|
+
metadata.top()->pop_back();
|
199
|
+
}
|
200
|
+
|
201
|
+
return m_ary;
|
202
|
+
}
|
203
|
+
|
204
|
+
|
205
|
+
// =================================================================
|
206
|
+
//
|
207
|
+
// error reporting
|
208
|
+
void Parser::error(const std::string& func, const std::string& err, char c) const
|
209
|
+
{
|
210
|
+
std::cerr << "Parse error "
|
211
|
+
// "from " << func << "() "
|
212
|
+
;
|
213
|
+
if (err.length() > 0)
|
214
|
+
std::cerr << "(" << err << ") ";
|
215
|
+
if (c != '\0')
|
216
|
+
std::cerr << "at '" << c << "' ";
|
217
|
+
std::cerr << "on line " << line_number << std::endl;
|
218
|
+
}
|
197
219
|
}
|