edn_turbo 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,424 +0,0 @@
1
- #include <iostream>
2
- #include <string>
3
- #include <stack>
4
- #include <vector>
5
- #include <limits>
6
- #include <exception>
7
-
8
- #include <cstring>
9
- #include <stdexcept>
10
-
11
- #include <ruby/ruby.h>
12
- #include <ruby/encoding.h>
13
-
14
- #include "edn_parser.h"
15
- #include "edn_parser_util.h"
16
-
17
- namespace edn
18
- {
19
- //
20
- // used to determine max number of chars in string value of a type
21
- template <typename T>
22
- static std::size_t get_max_chars(T)
23
- {
24
- std::stringstream s;
25
- s << std::fixed << std::numeric_limits<T>::max();
26
- return s.str().length();
27
- }
28
-
29
- static const std::size_t LL_max_chars = get_max_chars<>((long) 1);
30
- static const std::size_t LD_max_chars = get_max_chars<>((double) 1);
31
-
32
-
33
- // parser destructor
34
- //
35
- Parser::~Parser()
36
- {
37
- reset_state();
38
- del_top_meta_list();
39
-
40
- if (io_buffer) {
41
- free(reinterpret_cast<void*>(io_buffer));
42
- }
43
- }
44
-
45
- // =================================================================
46
- // for token-by-token parsing. If a discard or metadata is parsed,
47
- // attempt to get the following value
48
- //
49
- VALUE Parser::next()
50
- {
51
- VALUE token = EDNT_EOF_CONST;
52
-
53
- // buffer if reading from an IO
54
- if (core_io || (read_io != Qnil)) {
55
- fill_buf();
56
- }
57
-
58
- while (!is_eof())
59
- {
60
- // fetch a token. If it's metadata or discard
61
- VALUE v = EDNT_EOF_CONST;
62
- eTokenState state = parse_next(v);
63
-
64
- if (state == TOKEN_OK) {
65
- // valid token
66
- token = v;
67
- break;
68
- }
69
- else if (state == TOKEN_ERROR) {
70
- token = EDNT_EOF_CONST;
71
- break;
72
- }
73
- }
74
-
75
- return token;
76
- }
77
-
78
- // reset parsing state
79
- //
80
- void Parser::reset_state()
81
- {
82
- line_number = 1;
83
- discard.clear();
84
-
85
- // remove any remaining levels except for the first
86
- while (metadata.size() > 1) {
87
- del_top_meta_list();
88
- }
89
- // but clear any metadata on the first
90
- metadata.top()->clear();
91
-
92
- // clean up
93
- core_io = NULL;
94
- read_io = Qnil;
95
- p = pe = eof = NULL;
96
- }
97
-
98
- //
99
- // set a new source
100
- void Parser::set_source(const char* src, std::size_t len)
101
- {
102
- reset_state();
103
- // set ragel state
104
- p = src;
105
- pe = src + len;
106
- eof = pe;
107
- }
108
-
109
- void Parser::set_source(FILE* fp)
110
- {
111
- reset_state();
112
- core_io = fp;
113
- }
114
-
115
- void Parser::set_source(VALUE str_io)
116
- {
117
- reset_state();
118
- read_io = str_io;
119
- }
120
-
121
- //
122
- // for IO sources, read and fill a buffer
123
- void Parser::fill_buf()
124
- {
125
- std::string str_buf;
126
-
127
- // read as much data available
128
- if (core_io) {
129
- // ruby core IO types
130
- char c;
131
- while (1)
132
- {
133
- c = fgetc(core_io);
134
- if (c == EOF) {
135
- break;
136
- }
137
- str_buf += c;
138
- }
139
-
140
- } else if (read_io != Qnil) {
141
- // StringIO, etc. Call read() from ruby side
142
- VALUE v = ruby_io_read(read_io);
143
- if (TYPE(v) == T_STRING) {
144
- str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
145
- }
146
- }
147
-
148
- // set the buffer to read from
149
- if (str_buf.length() > 0) {
150
- // first time when io_buffer is NULL, pe & p = 0
151
- uintmax_t new_length = (pe - p) + str_buf.length();
152
- if (new_length > (((uintmax_t) 1 << 32) - 1)) {
153
- // icu -> 32-bit. TODO: handle
154
- rb_raise(rb_eRuntimeError, "Unsupported string buffer length");
155
- }
156
- char* start = NULL;
157
-
158
- // allocate or extend storage needed
159
- if (!io_buffer) {
160
- io_buffer = reinterpret_cast<char*>(malloc(new_length));
161
- start = io_buffer;
162
- } else if (io_buffer_len < new_length) {
163
- // resize the buffer
164
- io_buffer = reinterpret_cast<char*>(realloc(reinterpret_cast<void*>(io_buffer), new_length));
165
- }
166
-
167
- if (!start) {
168
- // appending to the buffer but move the data not yet
169
- // parsed first to the front
170
- memmove(io_buffer, p, pe - p);
171
- start = io_buffer + (pe - p);
172
- }
173
-
174
- // and copy
175
- memcpy(start, str_buf.c_str(), str_buf.length());
176
- io_buffer_len = (uint32_t) new_length;
177
-
178
- // set ragel state
179
- p = io_buffer;
180
- pe = p + new_length;
181
- eof = pe;
182
- }
183
- }
184
-
185
-
186
- // =================================================================
187
- // work-around for idiotic rb_protect convention in order to avoid
188
- // using ruby/rice
189
- //
190
- typedef VALUE (edn_rb_f_type)( VALUE arg );
191
-
192
- // we're using at most 2 args
193
- struct prot_args {
194
- prot_args(VALUE r, ID m) :
195
- receiver(r), method(m), count(0) {
196
- }
197
- prot_args(VALUE r, ID m, VALUE arg) :
198
- receiver(r), method(m), count(1) {
199
- args[0] = arg;
200
- }
201
- prot_args(VALUE r, ID m, VALUE arg1, VALUE arg2) :
202
- receiver(r), method(m), count(2) {
203
- args[0] = arg1;
204
- args[1] = arg2;
205
- }
206
-
207
- VALUE call() const {
208
- return ((count == 0) ?
209
- rb_funcall( receiver, method, 0 ) :
210
- rb_funcall2( receiver, method, count, args ));
211
- }
212
-
213
- private:
214
- VALUE receiver;
215
- ID method;
216
- int count;
217
- VALUE args[2];
218
- };
219
-
220
- // this allows us to wrap with rb_protect()
221
- static inline VALUE edn_wrap_funcall2( VALUE arg )
222
- {
223
- const prot_args* a = reinterpret_cast<const prot_args*>(arg);
224
- if (a)
225
- return a->call();
226
- return Qnil;
227
- }
228
-
229
- static inline VALUE edn_prot_rb_funcall( edn_rb_f_type func, VALUE args )
230
- {
231
- int error;
232
- VALUE s = rb_protect( func, args, &error );
233
- if (error) Parser::throw_error(error);
234
- return s;
235
- }
236
-
237
- static inline VALUE edn_prot_rb_new_str(const char* str) {
238
- int error;
239
- VALUE s = rb_protect( reinterpret_cast<VALUE (*)(VALUE)>(rb_str_new_cstr),
240
- reinterpret_cast<VALUE>(str), &error );
241
- if (error) Parser::throw_error(error);
242
- return s;
243
- }
244
-
245
- static inline VALUE edn_rb_enc_associate_utf8(VALUE str)
246
- {
247
- return rb_enc_associate(str, rb_utf8_encoding() );
248
- }
249
-
250
- // =================================================================
251
- // utils
252
-
253
- //
254
- // convert to int.. if string rep has more digits than long can
255
- // hold, call into ruby to get a big num
256
- VALUE Parser::integer_to_ruby(const char* str, std::size_t len)
257
- {
258
- if (str[len-1] == 'M' || len >= LL_max_chars)
259
- {
260
- std::string buf(str, len);
261
- VALUE vs = edn_prot_rb_new_str(buf.c_str());
262
- prot_args args(vs, EDNT_STRING_TO_I_METHOD);
263
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
264
- }
265
-
266
- return LONG2NUM(buftotype<long>(str, len));
267
- }
268
-
269
- //
270
- // as above.. TODO: check exponential..
271
- VALUE Parser::float_to_ruby(const char* str, std::size_t len)
272
- {
273
- if (str[len-1] == 'M' || len >= LD_max_chars)
274
- {
275
- std::string buf(str, len);
276
- VALUE vs = edn_prot_rb_new_str(buf.c_str());
277
-
278
- if (str[len-1] == 'M') {
279
- return Parser::make_edn_type(EDNT_MAKE_BIG_DECIMAL_METHOD, vs);
280
- }
281
-
282
- prot_args args(vs, EDNT_STRING_TO_F_METHOD);
283
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
284
- }
285
-
286
- return rb_float_new(buftotype<double>(str, len));
287
- }
288
-
289
-
290
- //
291
- // read from a StringIO - expensive!!!
292
- //
293
- VALUE Parser::ruby_io_read(VALUE io)
294
- {
295
- prot_args args(io, EDNT_READ_METHOD);
296
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
297
- }
298
-
299
- //
300
- // copies the string data, unescaping any present values that need to be replaced
301
- //
302
- bool Parser::parse_byte_stream(const char *p_start, const char *p_end, VALUE& v_utf8,
303
- bool encode)
304
- {
305
- if (p_end > p_start) {
306
- std::string buf;
307
-
308
- if (encode) {
309
- if (!util::to_utf8(p_start, (uint32_t) (p_end - p_start), buf))
310
- return false;
311
- }
312
- else {
313
- buf.append(p_start, p_end - p_start);
314
- }
315
-
316
- // utf-8 encode
317
- VALUE vs = edn_prot_rb_new_str(buf.c_str());
318
- int error;
319
- v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
320
- if (error) Parser::throw_error(error);
321
- return true;
322
- } else if (p_end == p_start) {
323
- v_utf8 = rb_str_new("", 0);
324
- return true;
325
- }
326
-
327
- return false;
328
- }
329
-
330
- //
331
- // handles things like \c, \newline
332
- //
333
- bool Parser::parse_escaped_char(const char *p, const char *pe, VALUE& v)
334
- {
335
- std::string buf;
336
- std::size_t len = pe - p;
337
- buf.append(p, len);
338
-
339
- if (len > 1) {
340
- if (buf == "newline") buf = '\n';
341
- else if (buf == "tab") buf = '\t';
342
- else if (buf == "return") buf = '\r';
343
- else if (buf == "space") buf = ' ';
344
- else if (buf == "formfeed") buf = '\f';
345
- else if (buf == "backspace") buf = '\b';
346
- // TODO: is this supported?
347
- else if (buf == "verticaltab") buf = '\v';
348
- else return false;
349
- }
350
-
351
- v = edn_prot_rb_new_str( buf.c_str() );
352
- return true;
353
- }
354
-
355
-
356
- //
357
- // get a set representation from the ruby side. See edn_turbo.rb
358
- VALUE Parser::make_edn_type(ID method, VALUE sym)
359
- {
360
- VALUE edn_module = rb_const_get(rb_cObject, edn::EDN_MODULE_SYMBOL);
361
- prot_args args(edn_module, method, sym);
362
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
363
- }
364
-
365
- VALUE Parser::make_edn_type(ID method, VALUE name, VALUE data)
366
- {
367
- VALUE module = rb_const_get(rb_cObject, edn::EDN_MODULE_SYMBOL);
368
- return make_edn_type(module, method, name, data);
369
- }
370
-
371
- VALUE Parser::make_edn_type(VALUE module, ID method, VALUE name, VALUE data)
372
- {
373
- prot_args args(module, method, name, data);
374
- return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
375
- }
376
-
377
-
378
- // =================================================================
379
- // METADATA
380
- //
381
- // returns an array of metadata value(s) saved in reverse order
382
- // (right to left) - the ruby side will interpret this
383
- VALUE Parser::ruby_meta()
384
- {
385
- VALUE m_ary = rb_ary_new();
386
-
387
- // pop from the back of the top-most list
388
- while (!metadata.top()->empty()) {
389
- rb_ary_push(m_ary, metadata.top()->back());
390
- metadata.top()->pop_back();
391
- }
392
-
393
- return m_ary;
394
- }
395
-
396
-
397
- // =================================================================
398
- //
399
- // error reporting
400
- void Parser::throw_error(int error)
401
- {
402
- if (error == 0)
403
- return;
404
-
405
- VALUE err = rb_errinfo();
406
- VALUE klass = rb_class_path(CLASS_OF(err));
407
- VALUE message = rb_obj_as_string(err);
408
- std::stringstream msg;
409
- msg << RSTRING_PTR(klass) << " exception: " << RSTRING_PTR(message);
410
- throw std::runtime_error(msg.str());
411
- }
412
-
413
- void Parser::error(const std::string& func, const std::string& err, char c) const
414
- {
415
- std::cerr << "Parse error "
416
- // "from " << func << "() "
417
- ;
418
- if (err.length() > 0)
419
- std::cerr << "(" << err << ") ";
420
- if (c != '\0')
421
- std::cerr << "at '" << c << "' ";
422
- std::cerr << "on line " << line_number << std::endl;
423
- }
424
- }
@@ -1,11 +0,0 @@
1
- #pragma once
2
-
3
- #include <string>
4
-
5
- namespace edn
6
- {
7
- namespace util
8
- {
9
- bool to_utf8(const char *s, uint32_t len, std::string& rslt);
10
- }
11
- }
@@ -1,33 +0,0 @@
1
- #include <string>
2
-
3
- //
4
- // needed to define this in its own file because icu and ruby have
5
- // differing definitions for Uchar and the compiler complains
6
- //
7
- #include <unicode/utypes.h>
8
- #include <unicode/ustring.h>
9
- #include <unicode/ucnv.h>
10
- #include <unicode/unistr.h>
11
-
12
- #include "edn_parser_util.h"
13
-
14
- namespace edn
15
- {
16
- namespace util
17
- {
18
- //
19
- // unescapes any values that need to be replaced, saves it to utf8
20
- //
21
- bool to_utf8(const char *s, uint32_t len, std::string& rslt)
22
- {
23
- icu::UnicodeString ustr(s, len);
24
-
25
- if (ustr.isBogus()) {
26
- return false;
27
- }
28
-
29
- ustr.unescape().toUTF8String(rslt);
30
- return true;
31
- }
32
- }
33
- }