edn_turbo 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,10 @@
1
- #ifndef EDN_RAGEL_PARSER_H
2
- #define EDN_RAGEL_PARSER_H
1
+ #pragma once
3
2
 
4
3
  #include <string>
5
4
  #include <sstream>
6
5
  #include <vector>
7
6
  #include <stack>
8
7
 
9
- #include <ruby/ruby.h>
10
-
11
8
 
12
9
  namespace edn
13
10
  {
@@ -22,6 +19,7 @@ namespace edn
22
19
 
23
20
  extern VALUE EDNT_STRING_TO_I_METHOD;
24
21
  extern VALUE EDNT_STRING_TO_F_METHOD;
22
+ extern VALUE EDNT_READ_METHOD;
25
23
 
26
24
  extern VALUE EDNT_EOF_CONST;
27
25
 
@@ -31,13 +29,18 @@ namespace edn
31
29
  class Parser
32
30
  {
33
31
  public:
34
- Parser() : p(NULL), pe(NULL), eof(NULL), line_number(1) {
32
+ Parser() : p(NULL), pe(NULL), eof(NULL),
33
+ core_io(NULL), read_io(Qnil),
34
+ io_buffer(NULL), io_buffer_len(0),
35
+ line_number(1) {
35
36
  new_meta_list();
36
37
  }
37
- ~Parser() { reset_state(); del_top_meta_list(); }
38
+ ~Parser();
38
39
 
39
40
  // change input source
40
41
  void set_source(const char* src, std::size_t len);
42
+ void set_source(FILE* fp);
43
+ void set_source(VALUE string_io);
41
44
 
42
45
  bool is_eof() const { return (p == eof); }
43
46
 
@@ -54,11 +57,16 @@ namespace edn
54
57
  const char* p;
55
58
  const char* pe;
56
59
  const char* eof;
60
+ FILE* core_io; // for IO streams
61
+ VALUE read_io; // for non-core IO that responds to read()
62
+ char* io_buffer;
63
+ uintmax_t io_buffer_len;
57
64
  std::size_t line_number;
58
65
  std::vector<VALUE> discard;
59
66
  std::stack<std::vector<VALUE>* > metadata;
60
67
 
61
68
  void reset_state();
69
+ void fill_buf();
62
70
 
63
71
  const char* parse_value (const char *p, const char *pe, VALUE& v);
64
72
  const char* parse_string (const char *p, const char *pe, VALUE& v);
@@ -81,12 +89,10 @@ namespace edn
81
89
 
82
90
  eTokenState parse_next(VALUE& value);
83
91
 
84
- // defined in edn_parser_unicode.cc
85
- static bool to_utf8(const char *s, std::size_t len, std::string& rslt);
86
-
87
92
  // defined in edn_parser_util.cc
88
93
  static VALUE integer_to_ruby(const char* str, std::size_t len);
89
94
  static VALUE float_to_ruby (const char* str, std::size_t len);
95
+ static VALUE ruby_io_read(VALUE io);
90
96
 
91
97
  static bool parse_byte_stream (const char *p, const char *pe, VALUE& rslt, bool encode);
92
98
  static bool parse_escaped_char(const char *p, const char *pe, VALUE& rslt);
@@ -117,8 +123,6 @@ namespace edn
117
123
  void error(const std::string& f, const std::string& err, char c) const;
118
124
  void error(const std::string& f, char err_c) const { error(f, "", err_c); }
119
125
  void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
120
- }; // Engine
126
+ }; // Parser
121
127
 
122
128
  } // namespace
123
-
124
- #endif
@@ -2,9 +2,10 @@
2
2
  #include <string>
3
3
  #include <vector>
4
4
  #include <exception>
5
-
6
5
  #include <cstring>
7
6
 
7
+ #include <ruby/ruby.h>
8
+
8
9
  #include "edn_parser.h"
9
10
 
10
11
  //
@@ -222,7 +223,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
222
223
  ((0xc2..0xf5) |
223
224
  '\\'[\"\\/bfnrt] |
224
225
  '\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
225
- '\\'^([\"\\/bfnrtu]))* %parse_chars
226
+ '\\'^([\"\\/bfnrtu]))* %parse_chars
226
227
  ) :>> string_delim @err(close_err) @exit;
227
228
  }%%
228
229
 
@@ -1189,11 +1190,9 @@ edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
1189
1190
  return state;
1190
1191
  }
1191
1192
 
1192
-
1193
1193
  /*
1194
- * Local variables:
1195
- * mode: c
1196
- * c-file-style: ruby
1197
- * indent-tabs-mode: nil
1198
- * End:
1199
- */
1194
+ - * Local variables:
1195
+ - * mode: c
1196
+ - * indent-tabs-mode: nil
1197
+ - * End:
1198
+ - */
@@ -12,6 +12,7 @@
12
12
  #include <ruby/encoding.h>
13
13
 
14
14
  #include "edn_parser.h"
15
+ #include "edn_parser_util.h"
15
16
 
16
17
  namespace edn
17
18
  {
@@ -29,6 +30,18 @@ namespace edn
29
30
  static const std::size_t LD_max_chars = get_max_chars<>((double) 1);
30
31
 
31
32
 
33
+ // parser destructor
34
+ //
35
+ Parser::~Parser()
36
+ {
37
+ reset_state();
38
+ del_top_meta_list();
39
+
40
+ if (io_buffer) {
41
+ free(reinterpret_cast<void*>(io_buffer));
42
+ }
43
+ }
44
+
32
45
  // =================================================================
33
46
  // for token-by-token parsing. If a discard or metadata is parsed,
34
47
  // attempt to get the following value
@@ -37,6 +50,11 @@ namespace edn
37
50
  {
38
51
  VALUE token = EDNT_EOF_CONST;
39
52
 
53
+ // buffer if reading from an IO
54
+ if (core_io || (read_io != Qnil)) {
55
+ fill_buf();
56
+ }
57
+
40
58
  while (!is_eof())
41
59
  {
42
60
  // fetch a token. If it's metadata or discard
@@ -70,6 +88,11 @@ namespace edn
70
88
  }
71
89
  // but clear any metadata on the first
72
90
  metadata.top()->clear();
91
+
92
+ // clean up
93
+ core_io = NULL;
94
+ read_io = Qnil;
95
+ p = pe = eof = NULL;
73
96
  }
74
97
 
75
98
  //
@@ -83,6 +106,78 @@ namespace edn
83
106
  eof = pe;
84
107
  }
85
108
 
109
+ void Parser::set_source(FILE* fp)
110
+ {
111
+ reset_state();
112
+ core_io = fp;
113
+ }
114
+
115
+ void Parser::set_source(VALUE str_io)
116
+ {
117
+ reset_state();
118
+ read_io = str_io;
119
+ }
120
+
121
+ //
122
+ // for IO sources, read and fill a buffer
123
+ void Parser::fill_buf()
124
+ {
125
+ std::string str_buf;
126
+
127
+ // read as much data available
128
+ if (core_io) {
129
+ // ruby core IO types
130
+ char c;
131
+ while (1)
132
+ {
133
+ c = fgetc(core_io);
134
+ if (c == EOF) {
135
+ break;
136
+ }
137
+ str_buf += c;
138
+ }
139
+
140
+ } else if (read_io != Qnil) {
141
+ // StringIO, etc. Call read() from ruby side
142
+ VALUE v = ruby_io_read(read_io);
143
+ if (TYPE(v) == T_STRING) {
144
+ str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
145
+ }
146
+ }
147
+
148
+ // set the buffer to read from
149
+ if (str_buf.length() > 0) {
150
+ // first time when io_buffer is NULL, pe & p = 0
151
+ uintmax_t new_length = ((uintmax_t) (pe - p)) + str_buf.length();
152
+ char* start = NULL;
153
+
154
+ // allocate or extend storage needed
155
+ if (!io_buffer) {
156
+ io_buffer = reinterpret_cast<char*>(malloc(new_length));
157
+ start = io_buffer;
158
+ } else if (io_buffer_len < new_length) {
159
+ // resize the buffer
160
+ realloc(reinterpret_cast<void*>(io_buffer), new_length);
161
+ }
162
+
163
+ if (!start) {
164
+ // appending to the buffer but move the data not yet
165
+ // parsed first to the front
166
+ memmove(io_buffer, p, pe - p);
167
+ start = io_buffer + (pe - p);
168
+ }
169
+
170
+ // and copy
171
+ memcpy(start, str_buf.c_str(), str_buf.length());
172
+ io_buffer_len = new_length;
173
+
174
+ // set ragel state
175
+ p = io_buffer;
176
+ pe = p + new_length;
177
+ eof = pe;
178
+ }
179
+ }
180
+
86
181
 
87
182
  // =================================================================
88
183
  // work-around for idiotic rb_protect convention in order to avoid
@@ -188,6 +283,15 @@ namespace edn
188
283
  }
189
284
 
190
285
 
286
+ //
287
+ // read from a StringIO - expensive!!!
288
+ //
289
+ VALUE Parser::ruby_io_read(VALUE io)
290
+ {
291
+ prot_args args(io, EDNT_READ_METHOD);
292
+ return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
293
+ }
294
+
191
295
  //
192
296
  // copies the string data, unescaping any present values that need to be replaced
193
297
  //
@@ -198,7 +302,7 @@ namespace edn
198
302
  std::string buf;
199
303
 
200
304
  if (encode) {
201
- if (!to_utf8(p_start, p_end - p_start, buf))
305
+ if (!util::to_utf8(p_start, p_end - p_start, buf))
202
306
  return false;
203
307
  }
204
308
  else {
@@ -0,0 +1,11 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+
5
+ namespace edn
6
+ {
7
+ namespace util
8
+ {
9
+ bool to_utf8(const char *s, std::size_t len, std::string& rslt);
10
+ }
11
+ }
@@ -0,0 +1,32 @@
1
+ #include <string>
2
+
3
+ //
4
+ // needed to define this in its own file because icu and ruby have
5
+ // differing definitions for Uchar and the compiler complains
6
+ //
7
+ #include <unicode/utypes.h>
8
+ #include <unicode/ustring.h>
9
+ #include <unicode/ucnv.h>
10
+
11
+ #include "edn_parser_util.h"
12
+
13
+ namespace edn
14
+ {
15
+ namespace util
16
+ {
17
+ //
18
+ // unescapes any values that need to be replaced, saves it to utf8
19
+ //
20
+ bool to_utf8(const char *s, std::size_t len, std::string& rslt)
21
+ {
22
+ icu::UnicodeString ustr(s, len);
23
+
24
+ if (ustr.isBogus()) {
25
+ return false;
26
+ }
27
+
28
+ ustr.unescape().toUTF8String(rslt);
29
+ return true;
30
+ }
31
+ }
32
+ }
@@ -1,12 +1,13 @@
1
1
  #include <signal.h>
2
2
  #include <iostream>
3
3
  #include <clocale>
4
-
5
4
  #include <cstring>
6
5
 
6
+ #include <ruby/ruby.h>
7
+ #include <ruby/io.h>
8
+
7
9
  #include "edn_parser.h"
8
10
 
9
- #include <ruby/ruby.h>
10
11
 
11
12
 
12
13
  namespace edn {
@@ -25,6 +26,7 @@ namespace edn {
25
26
 
26
27
  VALUE EDNT_STRING_TO_I_METHOD = Qnil;
27
28
  VALUE EDNT_STRING_TO_F_METHOD = Qnil;
29
+ VALUE EDNT_READ_METHOD = Qnil;
28
30
 
29
31
  // returned when EOF - defined as a constant in EDN module
30
32
  VALUE EDNT_EOF_CONST = Qnil;
@@ -49,7 +51,7 @@ namespace edn {
49
51
  Data_Get_Struct( self, edn::Parser, p );
50
52
  return p;
51
53
  }
52
-
54
+ static VALUE set_source(VALUE self, VALUE data);
53
55
 
54
56
  //
55
57
  // Called by the constructor - sets the source if passed.
@@ -57,11 +59,8 @@ namespace edn {
57
59
  {
58
60
  Parser* p = get_parser(self);
59
61
 
60
- if (argc > 0)
61
- {
62
- const char* stream = StringValueCStr(argv[0]);
63
- if (stream)
64
- p->set_source( stream, std::strlen(stream) );
62
+ if (argc > 0) {
63
+ set_source( self, argv[0] );
65
64
  }
66
65
  return self;
67
66
  }
@@ -72,9 +71,49 @@ namespace edn {
72
71
  {
73
72
  Parser* p = get_parser(self);
74
73
 
75
- const char* stream = StringValueCStr(data);
76
- if (stream)
77
- p->set_source( stream, std::strlen(stream) );
74
+ switch (TYPE(data))
75
+ {
76
+ case T_STRING:
77
+ {
78
+ const char* stream = StringValueCStr(data);
79
+ if (stream) {
80
+ p->set_source( stream, std::strlen(stream) );
81
+ }
82
+ break;
83
+ }
84
+ case T_FILE:
85
+ {
86
+ // extract the stream pointer
87
+ rb_io_t* fptr = RFILE(data)->fptr;
88
+ if (!fptr) {
89
+ rb_raise(rb_eRuntimeError, "Ruby IO - fptr is NULL");
90
+ }
91
+
92
+ rb_io_check_char_readable(fptr);
93
+
94
+ FILE* fp = rb_io_stdio_file(fptr);
95
+ if (!fp) {
96
+ rb_raise(rb_eRuntimeError, "Ruby IO - fptr->fp is NULL");
97
+ }
98
+
99
+ p->set_source(fp);
100
+ break;
101
+ }
102
+ case T_DATA:
103
+ {
104
+ // StringIO or some other IO not part of the ruby core -
105
+ // this is very inefficient as it'll require read()
106
+ // calls from the ruby side (involves a lot of data
107
+ // wrapping, etc)
108
+ if (rb_respond_to(data, EDNT_READ_METHOD)) {
109
+ p->set_source(data);
110
+ break;
111
+ }
112
+ }
113
+ default:
114
+ rb_raise(rb_eRuntimeError, "set_source expected String, core IO, or IO that responds to read()");
115
+ break;
116
+ }
78
117
 
79
118
  return self;
80
119
  }
@@ -90,9 +129,13 @@ namespace edn {
90
129
  // parses an entire stream
91
130
  static VALUE read(VALUE self, VALUE data)
92
131
  {
132
+ if (TYPE(data) != T_STRING) {
133
+ rb_raise(rb_eTypeError, "Expected String data");
134
+ }
93
135
  const char* stream = StringValueCStr(data);
94
- if (stream)
136
+ if (stream) {
95
137
  return get_parser(self)->parse(stream, std::strlen(stream) );
138
+ }
96
139
  return Qnil;
97
140
  }
98
141
 
@@ -125,8 +168,7 @@ void Init_edn_turbo(void)
125
168
 
126
169
  // pass things back as utf-8
127
170
  if (!setlocale( LC_ALL, "" )) {
128
- std::cerr << "Error setting locale" << std::endl;
129
- return;
171
+ rb_raise(rb_eRuntimeError, "Extension init error calling setlocale() - It appears your system's locale is not configured correctly.\n");
130
172
  }
131
173
 
132
174
  edn::rb_mEDNT = rb_define_module("EDNT");
@@ -152,6 +194,7 @@ void Init_edn_turbo(void)
152
194
 
153
195
  edn::EDNT_STRING_TO_I_METHOD = rb_intern("to_i");
154
196
  edn::EDNT_STRING_TO_F_METHOD = rb_intern("to_f");
197
+ edn::EDNT_READ_METHOD = rb_intern("read");
155
198
 
156
199
  // so we can return EOF directly
157
200
  VALUE edn_module = rb_const_get(rb_cObject, edn::EDN_MODULE_SYMBOL);
@@ -1,4 +1,4 @@
1
1
  module EDNT
2
- VERSION = '0.4.1'
3
- RELEASE_DATE = %q{2016-07-21}
2
+ VERSION = '0.5.0'
3
+ RELEASE_DATE = '2016-11-18'
4
4
  end