edn_turbo 0.5.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,86 +1,114 @@
1
+ // The MIT License (MIT)
2
+
3
+ // Copyright (c) 2015-2019 Ed Porras
4
+
5
+ // Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ // of this software and associated documentation files (the "Software"), to deal
7
+ // in the Software without restriction, including without limitation the rights
8
+ // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ // copies of the Software, and to permit persons to whom the Software is
10
+ // furnished to do so, subject to the following conditions:
11
+
12
+ // The above copyright notice and this permission notice shall be included in
13
+ // all copies or substantial portions of the Software.
14
+
15
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ // THE SOFTWARE.
22
+
1
23
  #pragma once
2
24
 
3
25
  #include <string>
4
26
  #include <vector>
5
27
  #include <stack>
6
28
 
7
-
8
29
  namespace edn
9
30
  {
10
- //
11
- // C-extension EDN Parser class representation
12
- class Parser
13
- {
14
- public:
15
- Parser() : p(NULL), pe(NULL), eof(NULL),
16
- core_io(NULL), read_io(Qnil),
17
- io_buffer(NULL), io_buffer_len(0),
18
- line_number(1) {
19
- new_meta_list();
20
- }
21
- ~Parser();
22
-
23
- // change input source
24
- void set_source(const char* src, std::size_t len);
25
- void set_source(FILE* fp);
26
- void set_source(VALUE string_io);
27
-
28
- bool is_eof() const { return (p == eof); }
29
-
30
- // parses an entire stream
31
- VALUE parse(const char* s, std::size_t len);
32
-
33
- // returns the next element in the current stream
34
- VALUE next();
35
-
36
- private:
37
- // ragel needs these
38
- const char* p;
39
- const char* pe;
40
- const char* eof;
41
- FILE* core_io; // for IO streams
42
- VALUE read_io; // for non-core IO that responds to read()
43
- char* io_buffer;
44
- uint32_t io_buffer_len;
45
- std::size_t line_number;
46
- std::vector<VALUE> discard;
47
- std::stack<std::vector<VALUE>* > metadata;
48
-
49
- void reset_state();
50
- void fill_buf();
51
-
52
- const char* parse_value (const char *p, const char *pe, VALUE& v);
53
- const char* parse_string (const char *p, const char *pe, VALUE& v);
54
- const char* parse_keyword (const char *p, const char *pe, VALUE& v);
55
- const char* parse_decimal (const char *p, const char *pe, VALUE& v);
56
- const char* parse_integer (const char *p, const char *pe, VALUE& v);
57
- const char* parse_operator(const char *p, const char *pe, VALUE& v);
58
- const char* parse_esc_char(const char *p, const char *pe, VALUE& v);
59
- const char* parse_symbol (const char *p, const char *pe, VALUE& v);
60
- const char* parse_vector (const char *p, const char *pe, VALUE& v);
61
- const char* parse_list (const char *p, const char *pe, VALUE& v);
62
- const char* parse_map (const char *p, const char *pe, VALUE& v);
63
- const char* parse_dispatch(const char *p, const char *pe, VALUE& v);
64
- const char* parse_set (const char *p, const char *pe, VALUE& v);
65
- const char* parse_discard (const char *p, const char *pe);
66
- const char* parse_tagged (const char *p, const char *pe, VALUE& v);
67
- const char* parse_meta (const char *p, const char *pe);
68
-
69
- enum eTokenState { TOKEN_OK, TOKEN_ERROR, TOKEN_IS_DISCARD, TOKEN_IS_META };
70
-
71
- eTokenState parse_next(VALUE& value);
72
-
73
- // metadata
74
- VALUE ruby_meta();
75
- void new_meta_list() { metadata.push( new std::vector<VALUE>() ); }
76
- void del_top_meta_list() { delete metadata.top(); metadata.pop(); }
77
- void append_to_meta(VALUE m) { metadata.top()->push_back(m); }
78
- bool meta_empty() const { return metadata.top()->empty(); }
79
- std::size_t meta_size() const { return metadata.top()->size(); }
80
-
81
- void error(const std::string& f, const std::string& err, char c) const;
82
- void error(const std::string& f, char err_c) const { error(f, "", err_c); }
83
- void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
84
- }; // Parser
31
+ //
32
+ // C-extension EDN Parser class representation
33
+ class Parser
34
+ {
35
+ public:
36
+ Parser() :
37
+ p(nullptr), pe(nullptr), eof(nullptr),
38
+ core_io(nullptr), read_io(Qnil),
39
+ io_buffer(nullptr), io_buffer_len(0),
40
+ line_number(1) {
41
+ new_meta_list();
42
+ }
43
+ ~Parser();
44
+
45
+ // prohibit
46
+ Parser(const Parser&) = delete;
47
+ Parser(const Parser&&) = delete;
48
+ Parser& operator=(const Parser&) = delete;
49
+ Parser& operator=(const Parser&&) = delete;
50
+
51
+ // change input source
52
+ void set_source(const char* src, std::size_t len);
53
+ void set_source(FILE* fp);
54
+ void set_source(VALUE string_io);
55
+
56
+ bool is_eof() const { return (p == eof); }
57
+
58
+ // parses an entire stream
59
+ VALUE parse(const char* s, std::size_t len);
60
+
61
+ // returns the next element in the current stream
62
+ VALUE next();
63
+
64
+ private:
65
+ // ragel needs these
66
+ const char* p;
67
+ const char* pe;
68
+ const char* eof;
69
+ FILE* core_io; // for IO streams
70
+ VALUE read_io; // for non-core IO that responds to read()
71
+ char* io_buffer;
72
+ uint32_t io_buffer_len;
73
+ std::size_t line_number;
74
+ std::vector<VALUE> discard;
75
+ std::stack<std::vector<VALUE>* > metadata;
76
+
77
+ void reset_state();
78
+ void fill_buf();
79
+
80
+ const char* parse_value (const char *p, const char *pe, VALUE& v);
81
+ const char* parse_string (const char *p, const char *pe, VALUE& v);
82
+ const char* parse_keyword (const char *p, const char *pe, VALUE& v);
83
+ const char* parse_decimal (const char *p, const char *pe, VALUE& v);
84
+ const char* parse_integer (const char *p, const char *pe, VALUE& v);
85
+ const char* parse_operator(const char *p, const char *pe, VALUE& v);
86
+ const char* parse_esc_char(const char *p, const char *pe, VALUE& v);
87
+ const char* parse_symbol (const char *p, const char *pe, VALUE& v);
88
+ const char* parse_vector (const char *p, const char *pe, VALUE& v);
89
+ const char* parse_list (const char *p, const char *pe, VALUE& v);
90
+ const char* parse_map (const char *p, const char *pe, VALUE& v);
91
+ const char* parse_dispatch(const char *p, const char *pe, VALUE& v);
92
+ const char* parse_set (const char *p, const char *pe, VALUE& v);
93
+ const char* parse_discard (const char *p, const char *pe);
94
+ const char* parse_tagged (const char *p, const char *pe, VALUE& v);
95
+ const char* parse_meta (const char *p, const char *pe);
96
+
97
+ enum eTokenState { TOKEN_OK, TOKEN_ERROR, TOKEN_IS_DISCARD, TOKEN_IS_META };
98
+
99
+ eTokenState parse_next(VALUE& value);
100
+
101
+ // metadata
102
+ VALUE ruby_meta();
103
+ void new_meta_list() { metadata.push( new std::vector<VALUE>() ); }
104
+ void del_top_meta_list() { delete metadata.top(); metadata.pop(); }
105
+ void append_to_meta(VALUE m) { metadata.top()->push_back(m); }
106
+ bool meta_empty() const { return metadata.top()->empty(); }
107
+ std::size_t meta_size() const { return metadata.top()->size(); }
108
+
109
+ void error(const std::string& f, const std::string& err, char c) const;
110
+ void error(const std::string& f, char err_c) const { error(f, "", err_c); }
111
+ void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
112
+ }; // Parser
85
113
 
86
114
  } // namespace
@@ -1,3 +1,25 @@
1
+ // The MIT License (MIT)
2
+
3
+ // Copyright (c) 2015-2019 Ed Porras
4
+
5
+ // Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ // of this software and associated documentation files (the "Software"), to deal
7
+ // in the Software without restriction, including without limitation the rights
8
+ // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ // copies of the Software, and to permit persons to whom the Software is
10
+ // furnished to do so, subject to the following conditions:
11
+
12
+ // The above copyright notice and this permission notice shall be included in
13
+ // all copies or substantial portions of the Software.
14
+
15
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ // THE SOFTWARE.
22
+
1
23
  #include <iostream>
2
24
  #include <string>
3
25
  #include <stack>
@@ -9,189 +31,189 @@
9
31
 
10
32
  namespace edn
11
33
  {
12
- //
13
- // parser destructor
14
- Parser::~Parser()
15
- {
16
- reset_state();
17
- del_top_meta_list();
18
-
19
- if (io_buffer) {
20
- free(reinterpret_cast<void*>(io_buffer));
21
- }
22
- }
34
+ //
35
+ // parser destructor
36
+ Parser::~Parser()
37
+ {
38
+ reset_state();
39
+ del_top_meta_list();
40
+
41
+ if (io_buffer) {
42
+ free(reinterpret_cast<void*>(io_buffer));
43
+ }
44
+ }
23
45
 
24
- // =================================================================
25
- // for token-by-token parsing. If a discard or metadata is parsed,
26
- // attempt to get the following value
27
- //
28
- VALUE Parser::next()
29
- {
30
- VALUE token = EDN_EOF_CONST;
31
-
32
- // buffer if reading from an IO
33
- if (core_io || (read_io != Qnil)) {
34
- fill_buf();
35
- }
36
-
37
- while (!is_eof())
38
- {
39
- // fetch a token. If it's metadata or discard
40
- VALUE v = EDN_EOF_CONST;
41
- eTokenState state = parse_next(v);
42
-
43
- if (state == TOKEN_OK) {
44
- // valid token
45
- token = v;
46
- break;
47
- }
48
- else if (state == TOKEN_ERROR) {
49
- token = EDN_EOF_CONST;
50
- break;
51
- }
52
- }
53
-
54
- return token;
55
- }
56
-
57
- // reset parsing state
58
- //
59
- void Parser::reset_state()
60
- {
61
- line_number = 1;
62
- discard.clear();
63
-
64
- // remove any remaining levels except for the first
65
- while (metadata.size() > 1) {
66
- del_top_meta_list();
67
- }
68
- // but clear any metadata on the first
69
- metadata.top()->clear();
70
-
71
- // clean up
72
- core_io = NULL;
73
- read_io = Qnil;
74
- p = pe = eof = NULL;
75
- }
76
-
77
- //
78
- // set a new source
79
- void Parser::set_source(const char* src, std::size_t len)
80
- {
81
- reset_state();
82
- // set ragel state
83
- p = src;
84
- pe = src + len;
85
- eof = pe;
86
- }
87
-
88
- void Parser::set_source(FILE* fp)
89
- {
90
- reset_state();
91
- core_io = fp;
92
- }
93
-
94
- void Parser::set_source(VALUE str_io)
95
- {
96
- reset_state();
97
- read_io = str_io;
98
- }
99
-
100
- //
101
- // for IO sources, read and fill a buffer
102
- void Parser::fill_buf()
103
- {
104
- std::string str_buf;
105
-
106
- // read as much data available
107
- if (core_io) {
108
- // ruby core IO types
109
- char c;
110
- while (1)
111
- {
112
- c = fgetc(core_io);
113
- if (c == EOF) {
114
- break;
115
- }
116
- str_buf += c;
117
- }
118
-
119
- } else if (read_io != Qnil) {
120
- // StringIO, etc. Call read() from ruby side
121
- VALUE v = edn::util::ruby_io_read(read_io);
122
- if (TYPE(v) == T_STRING) {
123
- str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
124
- }
125
- }
126
-
127
- // set the buffer to read from
128
- if (str_buf.length() > 0) {
129
- // first time when io_buffer is NULL, pe & p = 0
130
- uintmax_t new_length = (pe - p) + str_buf.length();
131
- if (new_length > (((uintmax_t) 1 << 32) - 1)) {
132
- // icu -> 32-bit. TODO: handle
133
- rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
134
- }
135
- char* start = NULL;
136
-
137
- // allocate or extend storage needed
138
- if (!io_buffer) {
139
- io_buffer = reinterpret_cast<char*>(malloc(new_length));
140
- start = io_buffer;
141
- } else if (io_buffer_len < new_length) {
142
- // resize the buffer
143
- io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
144
- }
145
-
146
- if (!start) {
147
- // appending to the buffer but move the data not yet
148
- // parsed first to the front
149
- memmove(io_buffer, p, pe - p);
150
- start = io_buffer + (pe - p);
46
+ // =================================================================
47
+ // for token-by-token parsing. If a discard or metadata is parsed,
48
+ // attempt to get the following value
49
+ //
50
+ VALUE Parser::next()
51
+ {
52
+ VALUE token = EDN_EOF_CONST;
53
+
54
+ // buffer if reading from an IO
55
+ if (core_io || (read_io != Qnil)) {
56
+ fill_buf();
57
+ }
58
+
59
+ while (!is_eof())
60
+ {
61
+ // fetch a token. If it's metadata or discard
62
+ VALUE v = EDN_EOF_CONST;
63
+ eTokenState state = parse_next(v);
64
+
65
+ if (state == TOKEN_OK) {
66
+ // valid token
67
+ token = v;
68
+ break;
69
+ }
70
+ else if (state == TOKEN_ERROR) {
71
+ token = EDN_EOF_CONST;
72
+ break;
73
+ }
74
+ }
75
+
76
+ return token;
77
+ }
78
+
79
+ // reset parsing state
80
+ //
81
+ void Parser::reset_state()
82
+ {
83
+ line_number = 1;
84
+ discard.clear();
85
+
86
+ // remove any remaining levels except for the first
87
+ while (metadata.size() > 1) {
88
+ del_top_meta_list();
89
+ }
90
+ // but clear any metadata on the first
91
+ metadata.top()->clear();
92
+
93
+ // clean up
94
+ core_io = nullptr;
95
+ read_io = Qnil;
96
+ p = pe = eof = nullptr;
97
+ }
98
+
99
+ //
100
+ // set a new source
101
+ void Parser::set_source(const char* src, std::size_t len)
102
+ {
103
+ reset_state();
104
+ // set ragel state
105
+ p = src;
106
+ pe = src + len;
107
+ eof = pe;
108
+ }
109
+
110
+ void Parser::set_source(FILE* fp)
111
+ {
112
+ reset_state();
113
+ core_io = fp;
114
+ }
115
+
116
+ void Parser::set_source(VALUE str_io)
117
+ {
118
+ reset_state();
119
+ read_io = str_io;
120
+ }
121
+
122
+ //
123
+ // for IO sources, read and fill a buffer
124
+ void Parser::fill_buf()
125
+ {
126
+ std::string str_buf;
127
+
128
+ // read as much data available
129
+ if (core_io) {
130
+ // ruby core IO types
131
+ char c;
132
+ while (1)
133
+ {
134
+ c = fgetc(core_io);
135
+ if (c == EOF) {
136
+ break;
151
137
  }
138
+ str_buf += c;
139
+ }
140
+
141
+ } else if (read_io != Qnil) {
142
+ // StringIO, etc. Call read() from ruby side
143
+ VALUE v = edn::util::ruby_io_read(read_io);
144
+ if (TYPE(v) == T_STRING) {
145
+ str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
146
+ }
147
+ }
148
+
149
+ // set the buffer to read from
150
+ if (str_buf.length() > 0) {
151
+ // first time when io_buffer is null, pe & p = 0
152
+ uintmax_t new_length = (pe - p) + str_buf.length();
153
+ if (new_length > (((uintmax_t) 1 << 32) - 1)) {
154
+ // icu -> 32-bit. TODO: handle
155
+ rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
156
+ }
157
+ char* start = nullptr;
158
+
159
+ // allocate or extend storage needed
160
+ if (!io_buffer) {
161
+ io_buffer = reinterpret_cast<char*>(malloc(new_length));
162
+ start = io_buffer;
163
+ } else if (io_buffer_len < new_length) {
164
+ // resize the buffer
165
+ io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
166
+ }
167
+
168
+ if (!start) {
169
+ // appending to the buffer but move the data not yet
170
+ // parsed first to the front
171
+ memmove(io_buffer, p, pe - p);
172
+ start = io_buffer + (pe - p);
173
+ }
152
174
 
153
- // and copy
154
- memcpy(start, str_buf.c_str(), str_buf.length());
155
- io_buffer_len = (uint32_t) new_length;
156
-
157
- // set ragel state
158
- p = io_buffer;
159
- pe = p + new_length;
160
- eof = pe;
161
- }
162
- }
163
-
164
- // =================================================================
165
- // METADATA
166
- //
167
- // returns an array of metadata value(s) saved in reverse order
168
- // (right to left) - the ruby side will interpret this
169
- VALUE Parser::ruby_meta()
170
- {
171
- VALUE m_ary = rb_ary_new();
172
-
173
- // pop from the back of the top-most list
174
- while (!metadata.top()->empty()) {
175
- rb_ary_push(m_ary, metadata.top()->back());
176
- metadata.top()->pop_back();
177
- }
178
-
179
- return m_ary;
180
- }
181
-
182
-
183
- // =================================================================
184
- //
185
- // error reporting
186
- void Parser::error(const std::string& func, const std::string& err, char c) const
187
- {
188
- std::cerr << "Parse error "
189
- // "from " << func << "() "
190
- ;
191
- if (err.length() > 0)
192
- std::cerr << "(" << err << ") ";
193
- if (c != '\0')
194
- std::cerr << "at '" << c << "' ";
195
- std::cerr << "on line " << line_number << std::endl;
196
- }
175
+ // and copy
176
+ memcpy(start, str_buf.c_str(), str_buf.length());
177
+ io_buffer_len = static_cast<uint32_t>(new_length);
178
+
179
+ // set ragel state
180
+ p = io_buffer;
181
+ pe = p + new_length;
182
+ eof = pe;
183
+ }
184
+ }
185
+
186
+ // =================================================================
187
+ // METADATA
188
+ //
189
+ // returns an array of metadata value(s) saved in reverse order
190
+ // (right to left) - the ruby side will interpret this
191
+ VALUE Parser::ruby_meta()
192
+ {
193
+ VALUE m_ary = rb_ary_new();
194
+
195
+ // pop from the back of the top-most list
196
+ while (!metadata.top()->empty()) {
197
+ rb_ary_push(m_ary, metadata.top()->back());
198
+ metadata.top()->pop_back();
199
+ }
200
+
201
+ return m_ary;
202
+ }
203
+
204
+
205
+ // =================================================================
206
+ //
207
+ // error reporting
208
+ void Parser::error(const std::string& func, const std::string& err, char c) const
209
+ {
210
+ std::cerr << "Parse error "
211
+ // "from " << func << "() "
212
+ ;
213
+ if (err.length() > 0)
214
+ std::cerr << "(" << err << ") ";
215
+ if (c != '\0')
216
+ std::cerr << "at '" << c << "' ";
217
+ std::cerr << "on line " << line_number << std::endl;
218
+ }
197
219
  }