edn_turbo 0.5.7 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,86 +1,115 @@
1
+ // The MIT License (MIT)
2
+
3
+ // Copyright (c) 2015-2021 Ed Porras
4
+
5
+ // Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ // of this software and associated documentation files (the "Software"), to deal
7
+ // in the Software without restriction, including without limitation the rights
8
+ // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ // copies of the Software, and to permit persons to whom the Software is
10
+ // furnished to do so, subject to the following conditions:
11
+
12
+ // The above copyright notice and this permission notice shall be included in
13
+ // all copies or substantial portions of the Software.
14
+
15
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ // THE SOFTWARE.
22
+
1
23
  #pragma once
2
24
 
3
25
  #include <string>
4
26
  #include <vector>
5
27
  #include <stack>
6
28
 
7
-
8
29
  namespace edn
9
30
  {
10
- //
11
- // C-extension EDN Parser class representation
12
- class Parser
13
- {
14
- public:
15
- Parser() : p(NULL), pe(NULL), eof(NULL),
16
- core_io(NULL), read_io(Qnil),
17
- io_buffer(NULL), io_buffer_len(0),
18
- line_number(1) {
19
- new_meta_list();
20
- }
21
- ~Parser();
22
-
23
- // change input source
24
- void set_source(const char* src, std::size_t len);
25
- void set_source(FILE* fp);
26
- void set_source(VALUE string_io);
27
-
28
- bool is_eof() const { return (p == eof); }
29
-
30
- // parses an entire stream
31
- VALUE parse(const char* s, std::size_t len);
32
-
33
- // returns the next element in the current stream
34
- VALUE next();
35
-
36
- private:
37
- // ragel needs these
38
- const char* p;
39
- const char* pe;
40
- const char* eof;
41
- FILE* core_io; // for IO streams
42
- VALUE read_io; // for non-core IO that responds to read()
43
- char* io_buffer;
44
- uint32_t io_buffer_len;
45
- std::size_t line_number;
46
- std::vector<VALUE> discard;
47
- std::stack<std::vector<VALUE>* > metadata;
48
-
49
- void reset_state();
50
- void fill_buf();
51
-
52
- const char* parse_value (const char *p, const char *pe, VALUE& v);
53
- const char* parse_string (const char *p, const char *pe, VALUE& v);
54
- const char* parse_keyword (const char *p, const char *pe, VALUE& v);
55
- const char* parse_decimal (const char *p, const char *pe, VALUE& v);
56
- const char* parse_integer (const char *p, const char *pe, VALUE& v);
57
- const char* parse_operator(const char *p, const char *pe, VALUE& v);
58
- const char* parse_esc_char(const char *p, const char *pe, VALUE& v);
59
- const char* parse_symbol (const char *p, const char *pe, VALUE& v);
60
- const char* parse_vector (const char *p, const char *pe, VALUE& v);
61
- const char* parse_list (const char *p, const char *pe, VALUE& v);
62
- const char* parse_map (const char *p, const char *pe, VALUE& v);
63
- const char* parse_dispatch(const char *p, const char *pe, VALUE& v);
64
- const char* parse_set (const char *p, const char *pe, VALUE& v);
65
- const char* parse_discard (const char *p, const char *pe);
66
- const char* parse_tagged (const char *p, const char *pe, VALUE& v);
67
- const char* parse_meta (const char *p, const char *pe);
68
-
69
- enum eTokenState { TOKEN_OK, TOKEN_ERROR, TOKEN_IS_DISCARD, TOKEN_IS_META };
70
-
71
- eTokenState parse_next(VALUE& value);
72
-
73
- // metadata
74
- VALUE ruby_meta();
75
- void new_meta_list() { metadata.push( new std::vector<VALUE>() ); }
76
- void del_top_meta_list() { delete metadata.top(); metadata.pop(); }
77
- void append_to_meta(VALUE m) { metadata.top()->push_back(m); }
78
- bool meta_empty() const { return metadata.top()->empty(); }
79
- std::size_t meta_size() const { return metadata.top()->size(); }
80
-
81
- void error(const std::string& f, const std::string& err, char c) const;
82
- void error(const std::string& f, char err_c) const { error(f, "", err_c); }
83
- void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
84
- }; // Parser
31
+ //
32
+ // C-extension EDN Parser class representation
33
+ class Parser
34
+ {
35
+ public:
36
+ Parser() :
37
+ p(nullptr), pe(nullptr), eof(nullptr),
38
+ core_io(nullptr), read_io(Qnil),
39
+ io_buffer(nullptr), io_buffer_len(0),
40
+ line_number(1) {
41
+ new_meta_list();
42
+ }
43
+ ~Parser();
44
+
45
+ // prohibit
46
+ Parser(const Parser&) = delete;
47
+ Parser(const Parser&&) = delete;
48
+ Parser& operator=(const Parser&) = delete;
49
+ Parser& operator=(const Parser&&) = delete;
50
+
51
+ // change input source
52
+ void set_source(const char* src, std::size_t len);
53
+ void set_source(FILE* fp);
54
+ void set_source(VALUE string_io);
55
+
56
+ bool is_eof() const { return (p == eof); }
57
+
58
+ // parses an entire stream
59
+ VALUE parse(const char* s, std::size_t len);
60
+
61
+ // returns the next element in the current stream
62
+ VALUE next();
63
+
64
+ private:
65
+ // ragel needs these
66
+ const char* p;
67
+ const char* pe;
68
+ const char* eof;
69
+ FILE* core_io; // for IO streams
70
+ VALUE read_io; // for non-core IO that responds to read()
71
+ char* io_buffer;
72
+ uint32_t io_buffer_len;
73
+ std::size_t line_number;
74
+ std::vector<VALUE> discard;
75
+ std::stack<std::vector<VALUE>* > metadata;
76
+
77
+ void reset_state();
78
+ void fill_buf();
79
+
80
+ const char* parse_value (const char *p, const char *pe, VALUE& v);
81
+ const char* parse_string (const char *p, const char *pe, VALUE& v);
82
+ const char* parse_keyword (const char *p, const char *pe, VALUE& v);
83
+ const char* parse_decimal (const char *p, const char *pe, VALUE& v);
84
+ const char* parse_integer (const char *p, const char *pe, VALUE& v);
85
+ const char* parse_ratio (const char *p, const char *pe, VALUE& v);
86
+ const char* parse_operator(const char *p, const char *pe, VALUE& v);
87
+ const char* parse_esc_char(const char *p, const char *pe, VALUE& v);
88
+ const char* parse_symbol (const char *p, const char *pe, VALUE& v);
89
+ const char* parse_vector (const char *p, const char *pe, VALUE& v);
90
+ const char* parse_list (const char *p, const char *pe, VALUE& v);
91
+ const char* parse_map (const char *p, const char *pe, VALUE& v);
92
+ const char* parse_dispatch(const char *p, const char *pe, VALUE& v);
93
+ const char* parse_set (const char *p, const char *pe, VALUE& v);
94
+ const char* parse_discard (const char *p, const char *pe);
95
+ const char* parse_tagged (const char *p, const char *pe, VALUE& v);
96
+ const char* parse_meta (const char *p, const char *pe);
97
+
98
+ enum eTokenState { TOKEN_OK, TOKEN_ERROR, TOKEN_IS_DISCARD, TOKEN_IS_META };
99
+
100
+ eTokenState parse_next(VALUE& value);
101
+
102
+ // metadata
103
+ VALUE ruby_meta();
104
+ void new_meta_list() { metadata.push( new std::vector<VALUE>() ); }
105
+ void del_top_meta_list() { delete metadata.top(); metadata.pop(); }
106
+ void append_to_meta(VALUE m) { metadata.top()->push_back(m); }
107
+ bool meta_empty() const { return metadata.top()->empty(); }
108
+ std::size_t meta_size() const { return metadata.top()->size(); }
109
+
110
+ void error(const std::string& f, const std::string& err, char c) const;
111
+ void error(const std::string& f, char err_c) const { error(f, "", err_c); }
112
+ void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
113
+ }; // Parser
85
114
 
86
115
  } // namespace
@@ -1,3 +1,25 @@
1
+ // The MIT License (MIT)
2
+
3
+ // Copyright (c) 2015-2021 Ed Porras
4
+
5
+ // Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ // of this software and associated documentation files (the "Software"), to deal
7
+ // in the Software without restriction, including without limitation the rights
8
+ // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ // copies of the Software, and to permit persons to whom the Software is
10
+ // furnished to do so, subject to the following conditions:
11
+
12
+ // The above copyright notice and this permission notice shall be included in
13
+ // all copies or substantial portions of the Software.
14
+
15
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ // THE SOFTWARE.
22
+
1
23
  #include <iostream>
2
24
  #include <string>
3
25
  #include <stack>
@@ -9,189 +31,189 @@
9
31
 
10
32
  namespace edn
11
33
  {
12
- //
13
- // parser destructor
14
- Parser::~Parser()
15
- {
16
- reset_state();
17
- del_top_meta_list();
18
-
19
- if (io_buffer) {
20
- free(reinterpret_cast<void*>(io_buffer));
21
- }
22
- }
34
+ //
35
+ // parser destructor
36
+ Parser::~Parser()
37
+ {
38
+ reset_state();
39
+ del_top_meta_list();
40
+
41
+ if (io_buffer) {
42
+ free(reinterpret_cast<void*>(io_buffer));
43
+ }
44
+ }
23
45
 
24
- // =================================================================
25
- // for token-by-token parsing. If a discard or metadata is parsed,
26
- // attempt to get the following value
27
- //
28
- VALUE Parser::next()
29
- {
30
- VALUE token = EDN_EOF_CONST;
31
-
32
- // buffer if reading from an IO
33
- if (core_io || (read_io != Qnil)) {
34
- fill_buf();
35
- }
36
-
37
- while (!is_eof())
38
- {
39
- // fetch a token. If it's metadata or discard
40
- VALUE v = EDN_EOF_CONST;
41
- eTokenState state = parse_next(v);
42
-
43
- if (state == TOKEN_OK) {
44
- // valid token
45
- token = v;
46
- break;
47
- }
48
- else if (state == TOKEN_ERROR) {
49
- token = EDN_EOF_CONST;
50
- break;
51
- }
52
- }
53
-
54
- return token;
55
- }
56
-
57
- // reset parsing state
58
- //
59
- void Parser::reset_state()
60
- {
61
- line_number = 1;
62
- discard.clear();
63
-
64
- // remove any remaining levels except for the first
65
- while (metadata.size() > 1) {
66
- del_top_meta_list();
67
- }
68
- // but clear any metadata on the first
69
- metadata.top()->clear();
70
-
71
- // clean up
72
- core_io = NULL;
73
- read_io = Qnil;
74
- p = pe = eof = NULL;
75
- }
76
-
77
- //
78
- // set a new source
79
- void Parser::set_source(const char* src, std::size_t len)
80
- {
81
- reset_state();
82
- // set ragel state
83
- p = src;
84
- pe = src + len;
85
- eof = pe;
86
- }
87
-
88
- void Parser::set_source(FILE* fp)
89
- {
90
- reset_state();
91
- core_io = fp;
92
- }
93
-
94
- void Parser::set_source(VALUE str_io)
95
- {
96
- reset_state();
97
- read_io = str_io;
98
- }
99
-
100
- //
101
- // for IO sources, read and fill a buffer
102
- void Parser::fill_buf()
103
- {
104
- std::string str_buf;
105
-
106
- // read as much data available
107
- if (core_io) {
108
- // ruby core IO types
109
- char c;
110
- while (1)
111
- {
112
- c = fgetc(core_io);
113
- if (c == EOF) {
114
- break;
115
- }
116
- str_buf += c;
117
- }
118
-
119
- } else if (read_io != Qnil) {
120
- // StringIO, etc. Call read() from ruby side
121
- VALUE v = edn::util::ruby_io_read(read_io);
122
- if (TYPE(v) == T_STRING) {
123
- str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
124
- }
125
- }
126
-
127
- // set the buffer to read from
128
- if (str_buf.length() > 0) {
129
- // first time when io_buffer is NULL, pe & p = 0
130
- uintmax_t new_length = (pe - p) + str_buf.length();
131
- if (new_length > (((uintmax_t) 1 << 32) - 1)) {
132
- // icu -> 32-bit. TODO: handle
133
- rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
134
- }
135
- char* start = NULL;
136
-
137
- // allocate or extend storage needed
138
- if (!io_buffer) {
139
- io_buffer = reinterpret_cast<char*>(malloc(new_length));
140
- start = io_buffer;
141
- } else if (io_buffer_len < new_length) {
142
- // resize the buffer
143
- io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
144
- }
145
-
146
- if (!start) {
147
- // appending to the buffer but move the data not yet
148
- // parsed first to the front
149
- memmove(io_buffer, p, pe - p);
150
- start = io_buffer + (pe - p);
46
+ // =================================================================
47
+ // for token-by-token parsing. If a discard or metadata is parsed,
48
+ // attempt to get the following value
49
+ //
50
+ VALUE Parser::next()
51
+ {
52
+ VALUE token = EDN_EOF_CONST;
53
+
54
+ // buffer if reading from an IO
55
+ if (core_io || (read_io != Qnil)) {
56
+ fill_buf();
57
+ }
58
+
59
+ while (!is_eof())
60
+ {
61
+ // fetch a token. If it's metadata or discard
62
+ VALUE v = EDN_EOF_CONST;
63
+ eTokenState state = parse_next(v);
64
+
65
+ if (state == TOKEN_OK) {
66
+ // valid token
67
+ token = v;
68
+ break;
69
+ }
70
+ else if (state == TOKEN_ERROR) {
71
+ token = EDN_EOF_CONST;
72
+ break;
73
+ }
74
+ }
75
+
76
+ return token;
77
+ }
78
+
79
+ // reset parsing state
80
+ //
81
+ void Parser::reset_state()
82
+ {
83
+ line_number = 1;
84
+ discard.clear();
85
+
86
+ // remove any remaining levels except for the first
87
+ while (metadata.size() > 1) {
88
+ del_top_meta_list();
89
+ }
90
+ // but clear any metadata on the first
91
+ metadata.top()->clear();
92
+
93
+ // clean up
94
+ core_io = nullptr;
95
+ read_io = Qnil;
96
+ p = pe = eof = nullptr;
97
+ }
98
+
99
+ //
100
+ // set a new source
101
+ void Parser::set_source(const char* src, std::size_t len)
102
+ {
103
+ reset_state();
104
+ // set ragel state
105
+ p = src;
106
+ pe = src + len;
107
+ eof = pe;
108
+ }
109
+
110
+ void Parser::set_source(FILE* fp)
111
+ {
112
+ reset_state();
113
+ core_io = fp;
114
+ }
115
+
116
+ void Parser::set_source(VALUE str_io)
117
+ {
118
+ reset_state();
119
+ read_io = str_io;
120
+ }
121
+
122
+ //
123
+ // for IO sources, read and fill a buffer
124
+ void Parser::fill_buf()
125
+ {
126
+ std::string str_buf;
127
+
128
+ // read as much data available
129
+ if (core_io) {
130
+ // ruby core IO types
131
+ char c;
132
+ while (1)
133
+ {
134
+ c = fgetc(core_io);
135
+ if (c == EOF) {
136
+ break;
151
137
  }
138
+ str_buf += c;
139
+ }
140
+
141
+ } else if (read_io != Qnil) {
142
+ // StringIO, etc. Call read() from ruby side
143
+ VALUE v = edn::util::ruby_io_read(read_io);
144
+ if (TYPE(v) == T_STRING) {
145
+ str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
146
+ }
147
+ }
148
+
149
+ // set the buffer to read from
150
+ if (str_buf.length() > 0) {
151
+ // first time when io_buffer is null, pe & p = 0
152
+ uintmax_t new_length = (pe - p) + str_buf.length();
153
+ if (new_length > (((uintmax_t) 1 << 32) - 1)) {
154
+ // icu -> 32-bit. TODO: handle
155
+ rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
156
+ }
157
+ char* start = nullptr;
158
+
159
+ // allocate or extend storage needed
160
+ if (!io_buffer) {
161
+ io_buffer = reinterpret_cast<char*>(malloc(new_length));
162
+ start = io_buffer;
163
+ } else if (io_buffer_len < new_length) {
164
+ // resize the buffer
165
+ io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
166
+ }
167
+
168
+ if (!start) {
169
+ // appending to the buffer but move the data not yet
170
+ // parsed first to the front
171
+ memmove(io_buffer, p, pe - p);
172
+ start = io_buffer + (pe - p);
173
+ }
152
174
 
153
- // and copy
154
- memcpy(start, str_buf.c_str(), str_buf.length());
155
- io_buffer_len = (uint32_t) new_length;
156
-
157
- // set ragel state
158
- p = io_buffer;
159
- pe = p + new_length;
160
- eof = pe;
161
- }
162
- }
163
-
164
- // =================================================================
165
- // METADATA
166
- //
167
- // returns an array of metadata value(s) saved in reverse order
168
- // (right to left) - the ruby side will interpret this
169
- VALUE Parser::ruby_meta()
170
- {
171
- VALUE m_ary = rb_ary_new();
172
-
173
- // pop from the back of the top-most list
174
- while (!metadata.top()->empty()) {
175
- rb_ary_push(m_ary, metadata.top()->back());
176
- metadata.top()->pop_back();
177
- }
178
-
179
- return m_ary;
180
- }
181
-
182
-
183
- // =================================================================
184
- //
185
- // error reporting
186
- void Parser::error(const std::string& func, const std::string& err, char c) const
187
- {
188
- std::cerr << "Parse error "
189
- // "from " << func << "() "
190
- ;
191
- if (err.length() > 0)
192
- std::cerr << "(" << err << ") ";
193
- if (c != '\0')
194
- std::cerr << "at '" << c << "' ";
195
- std::cerr << "on line " << line_number << std::endl;
196
- }
175
+ // and copy
176
+ memcpy(start, str_buf.c_str(), str_buf.length());
177
+ io_buffer_len = static_cast<uint32_t>(new_length);
178
+
179
+ // set ragel state
180
+ p = io_buffer;
181
+ pe = p + new_length;
182
+ eof = pe;
183
+ }
184
+ }
185
+
186
+ // =================================================================
187
+ // METADATA
188
+ //
189
+ // returns an array of metadata value(s) saved in reverse order
190
+ // (right to left) - the ruby side will interpret this
191
+ VALUE Parser::ruby_meta()
192
+ {
193
+ VALUE m_ary = rb_ary_new();
194
+
195
+ // pop from the back of the top-most list
196
+ while (!metadata.top()->empty()) {
197
+ rb_ary_push(m_ary, metadata.top()->back());
198
+ metadata.top()->pop_back();
199
+ }
200
+
201
+ return m_ary;
202
+ }
203
+
204
+
205
+ // =================================================================
206
+ //
207
+ // error reporting
208
+ void Parser::error(const std::string& /*func*/, const std::string& err, char c) const
209
+ {
210
+ std::cerr << "Parse error "
211
+ // "from " << func << "() "
212
+ ;
213
+ if (err.length() > 0)
214
+ std::cerr << "(" << err << ") ";
215
+ if (c != '\0')
216
+ std::cerr << "at '" << c << "' ";
217
+ std::cerr << "on line " << line_number << std::endl;
218
+ }
197
219
  }