edn_turbo 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,424 +0,0 @@
1
- #include <iostream>
2
- #include <string>
3
- #include <stack>
4
- #include <vector>
5
- #include <limits>
6
- #include <exception>
7
-
8
- #include <cstring>
9
- #include <stdexcept>
10
-
11
- #include <ruby/ruby.h>
12
- #include <ruby/encoding.h>
13
-
14
- #include "edn_parser.h"
15
- #include "edn_parser_util.h"
16
-
17
- namespace edn
18
- {
19
- //
20
- // used to determine max number of chars in string value of a type
21
- template <typename T>
22
- static std::size_t get_max_chars(T)
23
- {
24
- std::stringstream s;
25
- s << std::fixed << std::numeric_limits<T>::max();
26
- return s.str().length();
27
- }
28
-
29
- static const std::size_t LL_max_chars = get_max_chars<>((long) 1);
30
- static const std::size_t LD_max_chars = get_max_chars<>((double) 1);
31
-
32
-
33
- // parser destructor
34
- //
35
- Parser::~Parser()
36
- {
37
- reset_state();
38
- del_top_meta_list();
39
-
40
- if (io_buffer) {
41
- free(reinterpret_cast<void*>(io_buffer));
42
- }
43
- }
44
-
45
- // =================================================================
46
- // for token-by-token parsing. If a discard or metadata is parsed,
47
- // attempt to get the following value
48
- //
49
- VALUE Parser::next()
50
- {
51
- VALUE token = EDNT_EOF_CONST;
52
-
53
- // buffer if reading from an IO
54
- if (core_io || (read_io != Qnil)) {
55
- fill_buf();
56
- }
57
-
58
- while (!is_eof())
59
- {
60
- // fetch a token. If it's metadata or discard
61
- VALUE v = EDNT_EOF_CONST;
62
- eTokenState state = parse_next(v);
63
-
64
- if (state == TOKEN_OK) {
65
- // valid token
66
- token = v;
67
- break;
68
- }
69
- else if (state == TOKEN_ERROR) {
70
- token = EDNT_EOF_CONST;
71
- break;
72
- }
73
- }
74
-
75
- return token;
76
- }
77
-
78
- // reset parsing state
79
- //
80
- void Parser::reset_state()
81
- {
82
- line_number = 1;
83
- discard.clear();
84
-
85
- // remove any remaining levels except for the first
86
- while (metadata.size() > 1) {
87
- del_top_meta_list();
88
- }
89
- // but clear any metadata on the first
90
- metadata.top()->clear();
91
-
92
- // clean up
93
- core_io = NULL;
94
- read_io = Qnil;
95
- p = pe = eof = NULL;
96
- }
97
-
98
- //
99
- // set a new source
100
- void Parser::set_source(const char* src, std::size_t len)
101
- {
102
- reset_state();
103
- // set ragel state
104
- p = src;
105
- pe = src + len;
106
- eof = pe;
107
- }
108
-
109
- void Parser::set_source(FILE* fp)
110
- {
111
- reset_state();
112
- core_io = fp;
113
- }
114
-
115
- void Parser::set_source(VALUE str_io)
116
- {
117
- reset_state();
118
- read_io = str_io;
119
- }
120
-
121
- //
122
- // for IO sources, read and fill a buffer
123
- void Parser::fill_buf()
124
- {
125
- std::string str_buf;
126
-
127
- // read as much data available
128
- if (core_io) {
129
- // ruby core IO types
130
- char c;
131
- while (1)
132
- {
133
- c = fgetc(core_io);
134
- if (c == EOF) {
135
- break;
136
- }
137
- str_buf += c;
138
- }
139
-
140
- } else if (read_io != Qnil) {
141
- // StringIO, etc. Call read() from ruby side
142
- VALUE v = ruby_io_read(read_io);
143
- if (TYPE(v) == T_STRING) {
144
- str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
145
- }
146
- }
147
-
148
- // set the buffer to read from
149
- if (str_buf.length() > 0) {
150
- // first time when io_buffer is NULL, pe & p = 0
151
- uintmax_t new_length = (pe - p) + str_buf.length();
152
- if (new_length > (((uintmax_t) 1 << 32) - 1)) {
153
- // icu -> 32-bit. TODO: handle
154
- rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
155
- }
156
- char* start = NULL;
157
-
158
- // allocate or extend storage needed
159
- if (!io_buffer) {
160
- io_buffer = reinterpret_cast<char*>(malloc(new_length));
161
- start = io_buffer;
162
- } else if (io_buffer_len < new_length) {
163
- // resize the buffer
164
- io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
165
- }
166
-
167
- if (!start) {
168
- // appending to the buffer but move the data not yet
169
- // parsed first to the front
170
- memmove(io_buffer, p, pe - p);
171
- start = io_buffer + (pe - p);
172
- }
173
-
174
- // and copy
175
- memcpy(start, str_buf.c_str(), str_buf.length());
176
- io_buffer_len = (uint32_t) new_length;
177
-
178
- // set ragel state
179
- p = io_buffer;
180
- pe = p + new_length;
181
- eof = pe;
182
- }
183
- }
184
-
185
-
186
- // =================================================================
187
- // work-around for idiotic rb_protect convention in order to avoid
188
- // using ruby/rice
189
- //
190
- typedef VALUE (edn_rb_f_type)( VALUE arg );
191
-
192
- // we're using at most 2 args
193
- struct prot_args {
194
- prot_args(VALUE r, ID m) :
195
- receiver(r), method(m), count(0) {
196
- }
197
- prot_args(VALUE r, ID m, VALUE arg) :
198
- receiver(r), method(m), count(1) {
199
- args[0] = arg;
200
- }
201
- prot_args(VALUE r, ID m, VALUE arg1, VALUE arg2) :
202
- receiver(r), method(m), count(2) {
203
- args[0] = arg1;
204
- args[1] = arg2;
205
- }
206
-
207
- VALUE call() const {
208
- return ((count == 0) ?
209
- rb_funcall( receiver, method, 0 ) :
210
- rb_funcall2( receiver, method, count, args ));
211
- }
212
-
213
- private:
214
- VALUE receiver;
215
- ID method;
216
- int count;
217
- VALUE args[2];
218
- };
219
-
220
- // this allows us to wrap with rb_protect()
221
- static inline VALUE edn_wrap_funcall2( VALUE arg )
222
- {
223
- const prot_args* a = reinterpret_cast<const prot_args*>(arg);
224
- if (a)
225
- return a->call();
226
- return Qnil;
227
- }
228
-
229
- static inline VALUE edn_prot_rb_funcall( edn_rb_f_type func, VALUE args )
230
- {
231
- int error;
232
- VALUE s = rb_protect( func, args, &error );
233
- if (error) Parser::throw_error(error);
234
- return s;
235
- }
236
-
237
- static inline VALUE edn_prot_rb_new_str(const char* str) {
238
- int error;
239
- VALUE s = rb_protect( reinterpret_cast<VALUE (*)(VALUE)>(rb_str_new_cstr),
240
- reinterpret_cast<VALUE>(str), &error );
241
- if (error) Parser::throw_error(error);
242
- return s;
243
- }
244
-
245
- static inline VALUE edn_rb_enc_associate_utf8(VALUE str)
246
- {
247
- return rb_enc_associate(str, rb_utf8_encoding() );
248
- }
249
-
250
- // =================================================================
251
- // utils
252
-
253
- //
254
- // convert to int.. if string rep has more digits than long can
255
- // hold, call into ruby to get a big num
256
- VALUE Parser::integer_to_ruby(const char* str, std::size_t len)
257
- {
258
- if (str[len-1] == 'M' || len >= LL_max_chars)
259
- {
260
- std::string buf(str, len);
261
- VALUE vs = edn_prot_rb_new_str(buf.c_str());
262
- prot_args args(vs, EDNT_STRING_TO_I_METHOD);
263
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
264
- }
265
-
266
- return LONG2NUM(buftotype<long>(str, len));
267
- }
268
-
269
- //
270
- // as above.. TODO: check exponential..
271
- VALUE Parser::float_to_ruby(const char* str, std::size_t len)
272
- {
273
- if (str[len-1] == 'M' || len >= LD_max_chars)
274
- {
275
- std::string buf(str, len);
276
- VALUE vs = edn_prot_rb_new_str(buf.c_str());
277
-
278
- if (str[len-1] == 'M') {
279
- return Parser::make_edn_type(EDNT_MAKE_BIG_DECIMAL_METHOD, vs);
280
- }
281
-
282
- prot_args args(vs, EDNT_STRING_TO_F_METHOD);
283
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
284
- }
285
-
286
- return rb_float_new(buftotype<double>(str, len));
287
- }
288
-
289
-
290
- //
291
- // read from a StringIO - expensive!!!
292
- //
293
- VALUE Parser::ruby_io_read(VALUE io)
294
- {
295
- prot_args args(io, EDNT_READ_METHOD);
296
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
297
- }
298
-
299
- //
300
- // copies the string data, unescaping any present values that need to be replaced
301
- //
302
- bool Parser::parse_byte_stream(const char *p_start, const char *p_end, VALUE& v_utf8,
303
- bool encode)
304
- {
305
- if (p_end > p_start) {
306
- std::string buf;
307
-
308
- if (encode) {
309
- if (!util::to_utf8(p_start, (uint32_t) (p_end - p_start), buf))
310
- return false;
311
- }
312
- else {
313
- buf.append(p_start, p_end - p_start);
314
- }
315
-
316
- // utf-8 encode
317
- VALUE vs = edn_prot_rb_new_str(buf.c_str());
318
- int error;
319
- v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
320
- if (error) Parser::throw_error(error);
321
- return true;
322
- } else if (p_end == p_start) {
323
- v_utf8 = rb_str_new("", 0);
324
- return true;
325
- }
326
-
327
- return false;
328
- }
329
-
330
- //
331
- // handles things like \c, \newline
332
- //
333
- bool Parser::parse_escaped_char(const char *p, const char *pe, VALUE& v)
334
- {
335
- std::string buf;
336
- std::size_t len = pe - p;
337
- buf.append(p, len);
338
-
339
- if (len > 1) {
340
- if (buf == "newline") buf = '\n';
341
- else if (buf == "tab") buf = '\t';
342
- else if (buf == "return") buf = '\r';
343
- else if (buf == "space") buf = ' ';
344
- else if (buf == "formfeed") buf = '\f';
345
- else if (buf == "backspace") buf = '\b';
346
- // TODO: is this supported?
347
- else if (buf == "verticaltab") buf = '\v';
348
- else return false;
349
- }
350
-
351
- v = edn_prot_rb_new_str( buf.c_str() );
352
- return true;
353
- }
354
-
355
-
356
- //
357
- // get a set representation from the ruby side. See edn_turbo.rb
358
- VALUE Parser::make_edn_type(ID method, VALUE sym)
359
- {
360
- VALUE edn_module = rb_const_get(rb_cObject, edn::EDN_MODULE_SYMBOL);
361
- prot_args args(edn_module, method, sym);
362
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
363
- }
364
-
365
- VALUE Parser::make_edn_type(ID method, VALUE name, VALUE data)
366
- {
367
- VALUE module = rb_const_get(rb_cObject, edn::EDN_MODULE_SYMBOL);
368
- return make_edn_type(module, method, name, data);
369
- }
370
-
371
- VALUE Parser::make_edn_type(VALUE module, ID method, VALUE name, VALUE data)
372
- {
373
- prot_args args(module, method, name, data);
374
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
375
- }
376
-
377
-
378
- // =================================================================
379
- // METADATA
380
- //
381
- // returns an array of metadata value(s) saved in reverse order
382
- // (right to left) - the ruby side will interpret this
383
- VALUE Parser::ruby_meta()
384
- {
385
- VALUE m_ary = rb_ary_new();
386
-
387
- // pop from the back of the top-most list
388
- while (!metadata.top()->empty()) {
389
- rb_ary_push(m_ary, metadata.top()->back());
390
- metadata.top()->pop_back();
391
- }
392
-
393
- return m_ary;
394
- }
395
-
396
-
397
- // =================================================================
398
- //
399
- // error reporting
400
- void Parser::throw_error(int error)
401
- {
402
- if (error == 0)
403
- return;
404
-
405
- VALUE err = rb_errinfo();
406
- VALUE klass = rb_class_path(CLASS_OF(err));
407
- VALUE message = rb_obj_as_string(err);
408
- std::stringstream msg;
409
- msg << RSTRING_PTR(klass) << " exception: " << RSTRING_PTR(message);
410
- throw std::runtime_error(msg.str());
411
- }
412
-
413
- void Parser::error(const std::string& func, const std::string& err, char c) const
414
- {
415
- std::cerr << "Parse error "
416
- // "from " << func << "() "
417
- ;
418
- if (err.length() > 0)
419
- std::cerr << "(" << err << ") ";
420
- if (c != '\0')
421
- std::cerr << "at '" << c << "' ";
422
- std::cerr << "on line " << line_number << std::endl;
423
- }
424
- }
@@ -1,11 +0,0 @@
1
- #pragma once
2
-
3
- #include <string>
4
-
5
- namespace edn
6
- {
7
- namespace util
8
- {
9
- bool to_utf8(const char *s, uint32_t len, std::string& rslt);
10
- }
11
- }
@@ -1,33 +0,0 @@
1
- #include <string>
2
-
3
- //
4
- // needed to define this in its own file because icu and ruby have
5
- // differing definitions for Uchar and the compiler complains
6
- //
7
- #include <unicode/utypes.h>
8
- #include <unicode/ustring.h>
9
- #include <unicode/ucnv.h>
10
- #include <unicode/unistr.h>
11
-
12
- #include "edn_parser_util.h"
13
-
14
- namespace edn
15
- {
16
- namespace util
17
- {
18
- //
19
- // unescapes any values that need to be replaced, saves it to utf8
20
- //
21
- bool to_utf8(const char *s, uint32_t len, std::string& rslt)
22
- {
23
- icu::UnicodeString ustr(s, len);
24
-
25
- if (ustr.isBogus()) {
26
- return false;
27
- }
28
-
29
- ustr.unescape().toUTF8String(rslt);
30
- return true;
31
- }
32
- }
33
- }