pg 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +3 -0
- data.tar.gz.sig +0 -0
- data/.gemtest +0 -0
- data/BSDL +22 -0
- data/ChangeLog +6595 -0
- data/Contributors.rdoc +46 -0
- data/History.rdoc +492 -0
- data/LICENSE +56 -0
- data/Manifest.txt +72 -0
- data/POSTGRES +23 -0
- data/README-OS_X.rdoc +68 -0
- data/README-Windows.rdoc +56 -0
- data/README.ja.rdoc +14 -0
- data/README.rdoc +178 -0
- data/Rakefile +215 -0
- data/Rakefile.cross +298 -0
- data/ext/errorcodes.def +968 -0
- data/ext/errorcodes.rb +45 -0
- data/ext/errorcodes.txt +478 -0
- data/ext/extconf.rb +94 -0
- data/ext/gvl_wrappers.c +17 -0
- data/ext/gvl_wrappers.h +241 -0
- data/ext/pg.c +640 -0
- data/ext/pg.h +365 -0
- data/ext/pg_binary_decoder.c +229 -0
- data/ext/pg_binary_encoder.c +162 -0
- data/ext/pg_coder.c +549 -0
- data/ext/pg_connection.c +4252 -0
- data/ext/pg_copy_coder.c +596 -0
- data/ext/pg_errors.c +95 -0
- data/ext/pg_result.c +1501 -0
- data/ext/pg_text_decoder.c +981 -0
- data/ext/pg_text_encoder.c +682 -0
- data/ext/pg_tuple.c +541 -0
- data/ext/pg_type_map.c +166 -0
- data/ext/pg_type_map_all_strings.c +116 -0
- data/ext/pg_type_map_by_class.c +239 -0
- data/ext/pg_type_map_by_column.c +312 -0
- data/ext/pg_type_map_by_mri_type.c +284 -0
- data/ext/pg_type_map_by_oid.c +355 -0
- data/ext/pg_type_map_in_ruby.c +299 -0
- data/ext/util.c +149 -0
- data/ext/util.h +65 -0
- data/ext/vc/pg.sln +26 -0
- data/ext/vc/pg_18/pg.vcproj +216 -0
- data/ext/vc/pg_19/pg_19.vcproj +209 -0
- data/lib/pg.rb +74 -0
- data/lib/pg/basic_type_mapping.rb +459 -0
- data/lib/pg/binary_decoder.rb +22 -0
- data/lib/pg/coder.rb +83 -0
- data/lib/pg/connection.rb +291 -0
- data/lib/pg/constants.rb +11 -0
- data/lib/pg/exceptions.rb +11 -0
- data/lib/pg/result.rb +31 -0
- data/lib/pg/text_decoder.rb +47 -0
- data/lib/pg/text_encoder.rb +69 -0
- data/lib/pg/tuple.rb +30 -0
- data/lib/pg/type_map_by_column.rb +15 -0
- data/spec/data/expected_trace.out +26 -0
- data/spec/data/random_binary_data +0 -0
- data/spec/helpers.rb +380 -0
- data/spec/pg/basic_type_mapping_spec.rb +508 -0
- data/spec/pg/connection_spec.rb +1872 -0
- data/spec/pg/connection_sync_spec.rb +41 -0
- data/spec/pg/result_spec.rb +491 -0
- data/spec/pg/tuple_spec.rb +280 -0
- data/spec/pg/type_map_by_class_spec.rb +138 -0
- data/spec/pg/type_map_by_column_spec.rb +222 -0
- data/spec/pg/type_map_by_mri_type_spec.rb +136 -0
- data/spec/pg/type_map_by_oid_spec.rb +149 -0
- data/spec/pg/type_map_in_ruby_spec.rb +164 -0
- data/spec/pg/type_map_spec.rb +22 -0
- data/spec/pg/type_spec.rb +949 -0
- data/spec/pg_spec.rb +50 -0
- metadata +322 -0
- metadata.gz.sig +0 -0
data/ext/pg_copy_coder.c
ADDED
@@ -0,0 +1,596 @@
|
|
1
|
+
/*
|
2
|
+
* pg_copycoder.c - PG::Coder class extension
|
3
|
+
*
|
4
|
+
*/
|
5
|
+
|
6
|
+
#include "pg.h"
|
7
|
+
|
8
|
+
#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
|
9
|
+
#define OCTVALUE(c) ((c) - '0')
|
10
|
+
|
11
|
+
VALUE rb_cPG_CopyCoder;
|
12
|
+
VALUE rb_cPG_CopyEncoder;
|
13
|
+
VALUE rb_cPG_CopyDecoder;
|
14
|
+
|
15
|
+
typedef struct {
|
16
|
+
t_pg_coder comp;
|
17
|
+
VALUE typemap;
|
18
|
+
VALUE null_string;
|
19
|
+
char delimiter;
|
20
|
+
} t_pg_copycoder;
|
21
|
+
|
22
|
+
|
23
|
+
static void
|
24
|
+
pg_copycoder_mark( t_pg_copycoder *this )
|
25
|
+
{
|
26
|
+
rb_gc_mark(this->typemap);
|
27
|
+
rb_gc_mark(this->null_string);
|
28
|
+
}
|
29
|
+
|
30
|
+
static VALUE
|
31
|
+
pg_copycoder_encoder_allocate( VALUE klass )
|
32
|
+
{
|
33
|
+
t_pg_copycoder *this;
|
34
|
+
VALUE self = Data_Make_Struct( klass, t_pg_copycoder, pg_copycoder_mark, -1, this );
|
35
|
+
pg_coder_init_encoder( self );
|
36
|
+
this->typemap = pg_typemap_all_strings;
|
37
|
+
this->delimiter = '\t';
|
38
|
+
this->null_string = rb_str_new_cstr("\\N");
|
39
|
+
return self;
|
40
|
+
}
|
41
|
+
|
42
|
+
static VALUE
|
43
|
+
pg_copycoder_decoder_allocate( VALUE klass )
|
44
|
+
{
|
45
|
+
t_pg_copycoder *this;
|
46
|
+
VALUE self = Data_Make_Struct( klass, t_pg_copycoder, pg_copycoder_mark, -1, this );
|
47
|
+
pg_coder_init_decoder( self );
|
48
|
+
this->typemap = pg_typemap_all_strings;
|
49
|
+
this->delimiter = '\t';
|
50
|
+
this->null_string = rb_str_new_cstr("\\N");
|
51
|
+
return self;
|
52
|
+
}
|
53
|
+
|
54
|
+
/*
|
55
|
+
* call-seq:
|
56
|
+
* coder.delimiter = String
|
57
|
+
*
|
58
|
+
* Specifies the character that separates columns within each row (line) of the file.
|
59
|
+
* The default is a tab character in text format, a comma in CSV format.
|
60
|
+
* This must be a single one-byte character. This option is ignored when using binary format.
|
61
|
+
*/
|
62
|
+
static VALUE
|
63
|
+
pg_copycoder_delimiter_set(VALUE self, VALUE delimiter)
|
64
|
+
{
|
65
|
+
t_pg_copycoder *this = DATA_PTR(self);
|
66
|
+
StringValue(delimiter);
|
67
|
+
if(RSTRING_LEN(delimiter) != 1)
|
68
|
+
rb_raise( rb_eArgError, "delimiter size must be one byte");
|
69
|
+
this->delimiter = *RSTRING_PTR(delimiter);
|
70
|
+
return delimiter;
|
71
|
+
}
|
72
|
+
|
73
|
+
/*
|
74
|
+
* call-seq:
|
75
|
+
* coder.delimiter -> String
|
76
|
+
*
|
77
|
+
* The character that separates columns within each row (line) of the file.
|
78
|
+
*/
|
79
|
+
static VALUE
|
80
|
+
pg_copycoder_delimiter_get(VALUE self)
|
81
|
+
{
|
82
|
+
t_pg_copycoder *this = DATA_PTR(self);
|
83
|
+
return rb_str_new(&this->delimiter, 1);
|
84
|
+
}
|
85
|
+
|
86
|
+
/*
|
87
|
+
* Specifies the string that represents a null value. The default is \\N (backslash-N)
|
88
|
+
* in text format, and an unquoted empty string in CSV format. You might prefer an
|
89
|
+
* empty string even in text format for cases where you don't want to distinguish nulls
|
90
|
+
* from empty strings. This option is ignored when using binary format.
|
91
|
+
*/
|
92
|
+
static VALUE
|
93
|
+
pg_copycoder_null_string_set(VALUE self, VALUE null_string)
|
94
|
+
{
|
95
|
+
t_pg_copycoder *this = DATA_PTR(self);
|
96
|
+
StringValue(null_string);
|
97
|
+
this->null_string = null_string;
|
98
|
+
return null_string;
|
99
|
+
}
|
100
|
+
|
101
|
+
/*
|
102
|
+
* The string that represents a null value.
|
103
|
+
*/
|
104
|
+
static VALUE
|
105
|
+
pg_copycoder_null_string_get(VALUE self)
|
106
|
+
{
|
107
|
+
t_pg_copycoder *this = DATA_PTR(self);
|
108
|
+
return this->null_string;
|
109
|
+
}
|
110
|
+
|
111
|
+
/*
|
112
|
+
* call-seq:
|
113
|
+
* coder.type_map = map
|
114
|
+
*
|
115
|
+
* +map+ must be a kind of PG::TypeMap .
|
116
|
+
*
|
117
|
+
* Defaults to a PG::TypeMapAllStrings , so that PG::TextEncoder::String respectively
|
118
|
+
* PG::TextDecoder::String is used for encoding/decoding of all columns.
|
119
|
+
*
|
120
|
+
*/
|
121
|
+
static VALUE
|
122
|
+
pg_copycoder_type_map_set(VALUE self, VALUE type_map)
|
123
|
+
{
|
124
|
+
t_pg_copycoder *this = DATA_PTR( self );
|
125
|
+
|
126
|
+
if ( !rb_obj_is_kind_of(type_map, rb_cTypeMap) ){
|
127
|
+
rb_raise( rb_eTypeError, "wrong elements type %s (expected some kind of PG::TypeMap)",
|
128
|
+
rb_obj_classname( type_map ) );
|
129
|
+
}
|
130
|
+
this->typemap = type_map;
|
131
|
+
|
132
|
+
return type_map;
|
133
|
+
}
|
134
|
+
|
135
|
+
/*
|
136
|
+
* call-seq:
|
137
|
+
* coder.type_map -> PG::TypeMap
|
138
|
+
*
|
139
|
+
*/
|
140
|
+
static VALUE
|
141
|
+
pg_copycoder_type_map_get(VALUE self)
|
142
|
+
{
|
143
|
+
t_pg_copycoder *this = DATA_PTR( self );
|
144
|
+
|
145
|
+
return this->typemap;
|
146
|
+
}
|
147
|
+
|
148
|
+
|
149
|
+
/*
|
150
|
+
* Document-class: PG::TextEncoder::CopyRow < PG::CopyEncoder
|
151
|
+
*
|
152
|
+
* This class encodes one row of arbitrary columns for transmission as COPY data in text format.
|
153
|
+
* See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html]
|
154
|
+
* for description of the format.
|
155
|
+
*
|
156
|
+
* It is intended to be used in conjunction with PG::Connection#put_copy_data .
|
157
|
+
*
|
158
|
+
* The columns are expected as Array of values. The single values are encoded as defined
|
159
|
+
* in the assigned #type_map. If no type_map was assigned, all values are converted to
|
160
|
+
* strings by PG::TextEncoder::String.
|
161
|
+
*
|
162
|
+
* Example with default type map ( TypeMapAllStrings ):
|
163
|
+
* conn.exec "create table my_table (a text,b int,c bool)"
|
164
|
+
* enco = PG::TextEncoder::CopyRow.new
|
165
|
+
* conn.copy_data "COPY my_table FROM STDIN", enco do
|
166
|
+
* conn.put_copy_data ["astring", 7, false]
|
167
|
+
* conn.put_copy_data ["string2", 42, true]
|
168
|
+
* end
|
169
|
+
* This creates +my_table+ and inserts two rows.
|
170
|
+
*
|
171
|
+
* It is possible to manually assign a type encoder for each column per PG::TypeMapByColumn,
|
172
|
+
* or to make use of PG::BasicTypeMapBasedOnResult to assign them based on the table OIDs.
|
173
|
+
*
|
174
|
+
* See also PG::TextDecoder::CopyRow for the decoding direction with
|
175
|
+
* PG::Connection#get_copy_data .
|
176
|
+
*/
|
177
|
+
static int
|
178
|
+
pg_text_enc_copy_row(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate, int enc_idx)
|
179
|
+
{
|
180
|
+
t_pg_copycoder *this = (t_pg_copycoder *)conv;
|
181
|
+
t_pg_coder_enc_func enc_func;
|
182
|
+
static t_pg_coder *p_elem_coder;
|
183
|
+
int i;
|
184
|
+
t_typemap *p_typemap;
|
185
|
+
char *current_out;
|
186
|
+
char *end_capa_ptr;
|
187
|
+
|
188
|
+
p_typemap = DATA_PTR( this->typemap );
|
189
|
+
p_typemap->funcs.fit_to_query( this->typemap, value );
|
190
|
+
|
191
|
+
/* Allocate a new string with embedded capacity and realloc exponential when needed. */
|
192
|
+
PG_RB_STR_NEW( *intermediate, current_out, end_capa_ptr );
|
193
|
+
PG_ENCODING_SET_NOCHECK(*intermediate, enc_idx);
|
194
|
+
|
195
|
+
for( i=0; i<RARRAY_LEN(value); i++){
|
196
|
+
char *ptr1;
|
197
|
+
char *ptr2;
|
198
|
+
int strlen;
|
199
|
+
int backslashs;
|
200
|
+
VALUE subint;
|
201
|
+
VALUE entry;
|
202
|
+
|
203
|
+
entry = rb_ary_entry(value, i);
|
204
|
+
|
205
|
+
if( i > 0 ){
|
206
|
+
PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr );
|
207
|
+
*current_out++ = this->delimiter;
|
208
|
+
}
|
209
|
+
|
210
|
+
switch(TYPE(entry)){
|
211
|
+
case T_NIL:
|
212
|
+
PG_RB_STR_ENSURE_CAPA( *intermediate, RSTRING_LEN(this->null_string), current_out, end_capa_ptr );
|
213
|
+
memcpy( current_out, RSTRING_PTR(this->null_string), RSTRING_LEN(this->null_string) );
|
214
|
+
current_out += RSTRING_LEN(this->null_string);
|
215
|
+
break;
|
216
|
+
default:
|
217
|
+
p_elem_coder = p_typemap->funcs.typecast_query_param(p_typemap, entry, i);
|
218
|
+
enc_func = pg_coder_enc_func(p_elem_coder);
|
219
|
+
|
220
|
+
/* 1st pass for retiving the required memory space */
|
221
|
+
strlen = enc_func(p_elem_coder, entry, NULL, &subint, enc_idx);
|
222
|
+
|
223
|
+
if( strlen == -1 ){
|
224
|
+
/* we can directly use String value in subint */
|
225
|
+
strlen = RSTRING_LEN(subint);
|
226
|
+
|
227
|
+
/* size of string assuming the worst case, that every character must be escaped. */
|
228
|
+
PG_RB_STR_ENSURE_CAPA( *intermediate, strlen * 2, current_out, end_capa_ptr );
|
229
|
+
|
230
|
+
/* Copy string from subint with backslash escaping */
|
231
|
+
for(ptr1 = RSTRING_PTR(subint); ptr1 < RSTRING_PTR(subint) + strlen; ptr1++) {
|
232
|
+
/* Escape backslash itself, newline, carriage return, and the current delimiter character. */
|
233
|
+
if(*ptr1 == '\\' || *ptr1 == '\n' || *ptr1 == '\r' || *ptr1 == this->delimiter){
|
234
|
+
*current_out++ = '\\';
|
235
|
+
}
|
236
|
+
*current_out++ = *ptr1;
|
237
|
+
}
|
238
|
+
} else {
|
239
|
+
/* 2nd pass for writing the data to prepared buffer */
|
240
|
+
/* size of string assuming the worst case, that every character must be escaped. */
|
241
|
+
PG_RB_STR_ENSURE_CAPA( *intermediate, strlen * 2, current_out, end_capa_ptr );
|
242
|
+
|
243
|
+
/* Place the unescaped string at current output position. */
|
244
|
+
strlen = enc_func(p_elem_coder, entry, current_out, &subint, enc_idx);
|
245
|
+
|
246
|
+
ptr1 = current_out;
|
247
|
+
ptr2 = current_out + strlen;
|
248
|
+
|
249
|
+
/* count required backlashs */
|
250
|
+
for(backslashs = 0; ptr1 != ptr2; ptr1++) {
|
251
|
+
/* Escape backslash itself, newline, carriage return, and the current delimiter character. */
|
252
|
+
if(*ptr1 == '\\' || *ptr1 == '\n' || *ptr1 == '\r' || *ptr1 == this->delimiter){
|
253
|
+
backslashs++;
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
ptr1 = current_out + strlen;
|
258
|
+
ptr2 = current_out + strlen + backslashs;
|
259
|
+
current_out = ptr2;
|
260
|
+
|
261
|
+
/* Then store the escaped string on the final position, walking
|
262
|
+
* right to left, until all backslashs are placed. */
|
263
|
+
while( ptr1 != ptr2 ) {
|
264
|
+
*--ptr2 = *--ptr1;
|
265
|
+
if(*ptr1 == '\\' || *ptr1 == '\n' || *ptr1 == '\r' || *ptr1 == this->delimiter){
|
266
|
+
*--ptr2 = '\\';
|
267
|
+
}
|
268
|
+
}
|
269
|
+
}
|
270
|
+
}
|
271
|
+
}
|
272
|
+
PG_RB_STR_ENSURE_CAPA( *intermediate, 1, current_out, end_capa_ptr );
|
273
|
+
*current_out++ = '\n';
|
274
|
+
|
275
|
+
rb_str_set_len( *intermediate, current_out - RSTRING_PTR(*intermediate) );
|
276
|
+
|
277
|
+
return -1;
|
278
|
+
}
|
279
|
+
|
280
|
+
|
281
|
+
/*
|
282
|
+
* Return decimal value for a hexadecimal digit
|
283
|
+
*/
|
284
|
+
static int
|
285
|
+
GetDecimalFromHex(char hex)
|
286
|
+
{
|
287
|
+
if (hex >= '0' && hex <= '9')
|
288
|
+
return hex - '0';
|
289
|
+
else if (hex >= 'a' && hex <= 'f')
|
290
|
+
return hex - 'a' + 10;
|
291
|
+
else if (hex >= 'A' && hex <= 'F')
|
292
|
+
return hex - 'A' + 10;
|
293
|
+
else
|
294
|
+
return -1;
|
295
|
+
}
|
296
|
+
|
297
|
+
/*
|
298
|
+
* Document-class: PG::TextDecoder::CopyRow < PG::CopyDecoder
|
299
|
+
*
|
300
|
+
* This class decodes one row of arbitrary columns received as COPY data in text format.
|
301
|
+
* See the {COPY command}[http://www.postgresql.org/docs/current/static/sql-copy.html]
|
302
|
+
* for description of the format.
|
303
|
+
*
|
304
|
+
* It is intended to be used in conjunction with PG::Connection#get_copy_data .
|
305
|
+
*
|
306
|
+
* The columns are retrieved as Array of values. The single values are decoded as defined
|
307
|
+
* in the assigned #type_map. If no type_map was assigned, all values are converted to
|
308
|
+
* strings by PG::TextDecoder::String.
|
309
|
+
*
|
310
|
+
* Example with default type map ( TypeMapAllStrings ):
|
311
|
+
* conn.exec("CREATE TABLE my_table AS VALUES('astring', 7, FALSE), ('string2', 42, TRUE) ")
|
312
|
+
*
|
313
|
+
* deco = PG::TextDecoder::CopyRow.new
|
314
|
+
* conn.copy_data "COPY my_table TO STDOUT", deco do
|
315
|
+
* while row=conn.get_copy_data
|
316
|
+
* p row
|
317
|
+
* end
|
318
|
+
* end
|
319
|
+
* This prints all rows of +my_table+ :
|
320
|
+
* ["astring", "7", "f"]
|
321
|
+
* ["string2", "42", "t"]
|
322
|
+
*
|
323
|
+
* Example with column based type map:
|
324
|
+
* tm = PG::TypeMapByColumn.new( [
|
325
|
+
* PG::TextDecoder::String.new,
|
326
|
+
* PG::TextDecoder::Integer.new,
|
327
|
+
* PG::TextDecoder::Boolean.new] )
|
328
|
+
* deco = PG::TextDecoder::CopyRow.new( type_map: tm )
|
329
|
+
* conn.copy_data "COPY my_table TO STDOUT", deco do
|
330
|
+
* while row=conn.get_copy_data
|
331
|
+
* p row
|
332
|
+
* end
|
333
|
+
* end
|
334
|
+
* This prints the rows with type casted columns:
|
335
|
+
* ["astring", 7, false]
|
336
|
+
* ["string2", 42, true]
|
337
|
+
*
|
338
|
+
* Instead of manually assigning a type decoder for each column, PG::BasicTypeMapForResults
|
339
|
+
* can be used to assign them based on the table OIDs.
|
340
|
+
*
|
341
|
+
* See also PG::TextEncoder::CopyRow for the encoding direction with
|
342
|
+
* PG::Connection#put_copy_data .
|
343
|
+
*/
|
344
|
+
/*
|
345
|
+
* Parse the current line into separate attributes (fields),
|
346
|
+
* performing de-escaping as needed.
|
347
|
+
*
|
348
|
+
* All fields are gathered into a ruby Array. The de-escaped field data is written
|
349
|
+
* into to a ruby String. This object is reused for non string columns.
|
350
|
+
* For String columns the field value is directly used as return value and no
|
351
|
+
* reuse of the memory is done.
|
352
|
+
*
|
353
|
+
* The parser is thankfully borrowed from the PostgreSQL sources:
|
354
|
+
* src/backend/commands/copy.c
|
355
|
+
*/
|
356
|
+
static VALUE
|
357
|
+
pg_text_dec_copy_row(t_pg_coder *conv, const char *input_line, int len, int _tuple, int _field, int enc_idx)
|
358
|
+
{
|
359
|
+
t_pg_copycoder *this = (t_pg_copycoder *)conv;
|
360
|
+
|
361
|
+
/* Return value: array */
|
362
|
+
VALUE array;
|
363
|
+
|
364
|
+
/* Current field */
|
365
|
+
VALUE field_str;
|
366
|
+
|
367
|
+
char delimc = this->delimiter;
|
368
|
+
int fieldno;
|
369
|
+
int expected_fields;
|
370
|
+
char *output_ptr;
|
371
|
+
const char *cur_ptr;
|
372
|
+
const char *line_end_ptr;
|
373
|
+
char *end_capa_ptr;
|
374
|
+
t_typemap *p_typemap;
|
375
|
+
|
376
|
+
p_typemap = DATA_PTR( this->typemap );
|
377
|
+
expected_fields = p_typemap->funcs.fit_to_copy_get( this->typemap );
|
378
|
+
|
379
|
+
/* The received input string will probably have this->nfields fields. */
|
380
|
+
array = rb_ary_new2(expected_fields);
|
381
|
+
|
382
|
+
/* Allocate a new string with embedded capacity and realloc later with
|
383
|
+
* exponential growing size when needed. */
|
384
|
+
PG_RB_TAINTED_STR_NEW( field_str, output_ptr, end_capa_ptr );
|
385
|
+
|
386
|
+
/* set pointer variables for loop */
|
387
|
+
cur_ptr = input_line;
|
388
|
+
line_end_ptr = input_line + len;
|
389
|
+
|
390
|
+
/* Outer loop iterates over fields */
|
391
|
+
fieldno = 0;
|
392
|
+
for (;;)
|
393
|
+
{
|
394
|
+
int found_delim = 0;
|
395
|
+
const char *start_ptr;
|
396
|
+
const char *end_ptr;
|
397
|
+
int input_len;
|
398
|
+
|
399
|
+
/* Remember start of field on input side */
|
400
|
+
start_ptr = cur_ptr;
|
401
|
+
|
402
|
+
/*
|
403
|
+
* Scan data for field.
|
404
|
+
*
|
405
|
+
* Note that in this loop, we are scanning to locate the end of field
|
406
|
+
* and also speculatively performing de-escaping. Once we find the
|
407
|
+
* end-of-field, we can match the raw field contents against the null
|
408
|
+
* marker string. Only after that comparison fails do we know that
|
409
|
+
* de-escaping is actually the right thing to do; therefore we *must
|
410
|
+
* not* throw any syntax errors before we've done the null-marker
|
411
|
+
* check.
|
412
|
+
*/
|
413
|
+
for (;;)
|
414
|
+
{
|
415
|
+
/* The current character in the input string. */
|
416
|
+
char c;
|
417
|
+
|
418
|
+
end_ptr = cur_ptr;
|
419
|
+
if (cur_ptr >= line_end_ptr)
|
420
|
+
break;
|
421
|
+
c = *cur_ptr++;
|
422
|
+
if (c == delimc){
|
423
|
+
found_delim = 1;
|
424
|
+
break;
|
425
|
+
}
|
426
|
+
if (c == '\n'){
|
427
|
+
break;
|
428
|
+
}
|
429
|
+
if (c == '\\'){
|
430
|
+
if (cur_ptr >= line_end_ptr)
|
431
|
+
break;
|
432
|
+
|
433
|
+
c = *cur_ptr++;
|
434
|
+
switch (c){
|
435
|
+
case '0':
|
436
|
+
case '1':
|
437
|
+
case '2':
|
438
|
+
case '3':
|
439
|
+
case '4':
|
440
|
+
case '5':
|
441
|
+
case '6':
|
442
|
+
case '7':
|
443
|
+
{
|
444
|
+
/* handle \013 */
|
445
|
+
int val;
|
446
|
+
|
447
|
+
val = OCTVALUE(c);
|
448
|
+
if (cur_ptr < line_end_ptr)
|
449
|
+
{
|
450
|
+
c = *cur_ptr;
|
451
|
+
if (ISOCTAL(c))
|
452
|
+
{
|
453
|
+
cur_ptr++;
|
454
|
+
val = (val << 3) + OCTVALUE(c);
|
455
|
+
if (cur_ptr < line_end_ptr)
|
456
|
+
{
|
457
|
+
c = *cur_ptr;
|
458
|
+
if (ISOCTAL(c))
|
459
|
+
{
|
460
|
+
cur_ptr++;
|
461
|
+
val = (val << 3) + OCTVALUE(c);
|
462
|
+
}
|
463
|
+
}
|
464
|
+
}
|
465
|
+
}
|
466
|
+
c = val & 0377;
|
467
|
+
}
|
468
|
+
break;
|
469
|
+
case 'x':
|
470
|
+
/* Handle \x3F */
|
471
|
+
if (cur_ptr < line_end_ptr)
|
472
|
+
{
|
473
|
+
char hexchar = *cur_ptr;
|
474
|
+
int val = GetDecimalFromHex(hexchar);;
|
475
|
+
|
476
|
+
if (val >= 0)
|
477
|
+
{
|
478
|
+
cur_ptr++;
|
479
|
+
if (cur_ptr < line_end_ptr)
|
480
|
+
{
|
481
|
+
int val2;
|
482
|
+
hexchar = *cur_ptr;
|
483
|
+
val2 = GetDecimalFromHex(hexchar);
|
484
|
+
|
485
|
+
if (val2 >= 0)
|
486
|
+
{
|
487
|
+
cur_ptr++;
|
488
|
+
val = (val << 4) + val2;
|
489
|
+
}
|
490
|
+
}
|
491
|
+
c = val & 0xff;
|
492
|
+
}
|
493
|
+
}
|
494
|
+
break;
|
495
|
+
case 'b':
|
496
|
+
c = '\b';
|
497
|
+
break;
|
498
|
+
case 'f':
|
499
|
+
c = '\f';
|
500
|
+
break;
|
501
|
+
case 'n':
|
502
|
+
c = '\n';
|
503
|
+
break;
|
504
|
+
case 'r':
|
505
|
+
c = '\r';
|
506
|
+
break;
|
507
|
+
case 't':
|
508
|
+
c = '\t';
|
509
|
+
break;
|
510
|
+
case 'v':
|
511
|
+
c = '\v';
|
512
|
+
break;
|
513
|
+
|
514
|
+
/*
|
515
|
+
* in all other cases, take the char after '\'
|
516
|
+
* literally
|
517
|
+
*/
|
518
|
+
}
|
519
|
+
}
|
520
|
+
|
521
|
+
PG_RB_STR_ENSURE_CAPA( field_str, 1, output_ptr, end_capa_ptr );
|
522
|
+
/* Add c to output string */
|
523
|
+
*output_ptr++ = c;
|
524
|
+
}
|
525
|
+
|
526
|
+
if (!found_delim && cur_ptr < line_end_ptr)
|
527
|
+
rb_raise( rb_eArgError, "trailing data after linefeed at position: %ld", (long)(cur_ptr - input_line) + 1 );
|
528
|
+
|
529
|
+
|
530
|
+
/* Check whether raw input matched null marker */
|
531
|
+
input_len = end_ptr - start_ptr;
|
532
|
+
if (input_len == RSTRING_LEN(this->null_string) &&
|
533
|
+
strncmp(start_ptr, RSTRING_PTR(this->null_string), input_len) == 0) {
|
534
|
+
rb_ary_push(array, Qnil);
|
535
|
+
} else {
|
536
|
+
VALUE field_value;
|
537
|
+
|
538
|
+
rb_str_set_len( field_str, output_ptr - RSTRING_PTR(field_str) );
|
539
|
+
field_value = p_typemap->funcs.typecast_copy_get( p_typemap, field_str, fieldno, 0, enc_idx );
|
540
|
+
|
541
|
+
rb_ary_push(array, field_value);
|
542
|
+
|
543
|
+
if( field_value == field_str ){
|
544
|
+
/* Our output string will be send to the user, so we can not reuse
|
545
|
+
* it for the next field. */
|
546
|
+
PG_RB_TAINTED_STR_NEW( field_str, output_ptr, end_capa_ptr );
|
547
|
+
}
|
548
|
+
}
|
549
|
+
/* Reset the pointer to the start of the output/buffer string. */
|
550
|
+
output_ptr = RSTRING_PTR(field_str);
|
551
|
+
|
552
|
+
fieldno++;
|
553
|
+
/* Done if we hit EOL instead of a delim */
|
554
|
+
if (!found_delim)
|
555
|
+
break;
|
556
|
+
}
|
557
|
+
|
558
|
+
return array;
|
559
|
+
}
|
560
|
+
|
561
|
+
|
562
|
+
void
|
563
|
+
init_pg_copycoder()
|
564
|
+
{
|
565
|
+
/* Document-class: PG::CopyCoder < PG::Coder
|
566
|
+
*
|
567
|
+
* This is the base class for all type cast classes for COPY data,
|
568
|
+
*/
|
569
|
+
rb_cPG_CopyCoder = rb_define_class_under( rb_mPG, "CopyCoder", rb_cPG_Coder );
|
570
|
+
rb_define_method( rb_cPG_CopyCoder, "type_map=", pg_copycoder_type_map_set, 1 );
|
571
|
+
rb_define_method( rb_cPG_CopyCoder, "type_map", pg_copycoder_type_map_get, 0 );
|
572
|
+
rb_define_method( rb_cPG_CopyCoder, "delimiter=", pg_copycoder_delimiter_set, 1 );
|
573
|
+
rb_define_method( rb_cPG_CopyCoder, "delimiter", pg_copycoder_delimiter_get, 0 );
|
574
|
+
rb_define_method( rb_cPG_CopyCoder, "null_string=", pg_copycoder_null_string_set, 1 );
|
575
|
+
rb_define_method( rb_cPG_CopyCoder, "null_string", pg_copycoder_null_string_get, 0 );
|
576
|
+
|
577
|
+
/* Document-class: PG::CopyEncoder < PG::CopyCoder */
|
578
|
+
rb_cPG_CopyEncoder = rb_define_class_under( rb_mPG, "CopyEncoder", rb_cPG_CopyCoder );
|
579
|
+
rb_define_alloc_func( rb_cPG_CopyEncoder, pg_copycoder_encoder_allocate );
|
580
|
+
/* Document-class: PG::CopyDecoder < PG::CopyCoder */
|
581
|
+
rb_cPG_CopyDecoder = rb_define_class_under( rb_mPG, "CopyDecoder", rb_cPG_CopyCoder );
|
582
|
+
rb_define_alloc_func( rb_cPG_CopyDecoder, pg_copycoder_decoder_allocate );
|
583
|
+
|
584
|
+
/* Make RDoc aware of the encoder classes... */
|
585
|
+
/* rb_mPG_TextEncoder = rb_define_module_under( rb_mPG, "TextEncoder" ); */
|
586
|
+
/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "CopyRow", rb_cPG_CopyEncoder ); */
|
587
|
+
pg_define_coder( "CopyRow", pg_text_enc_copy_row, rb_cPG_CopyEncoder, rb_mPG_TextEncoder );
|
588
|
+
rb_include_module( rb_cPG_CopyEncoder, rb_mPG_BinaryFormatting );
|
589
|
+
|
590
|
+
/* rb_mPG_TextDecoder = rb_define_module_under( rb_mPG, "TextDecoder" ); */
|
591
|
+
/* dummy = rb_define_class_under( rb_mPG_TextDecoder, "CopyRow", rb_cPG_CopyDecoder ); */
|
592
|
+
pg_define_coder( "CopyRow", pg_text_dec_copy_row, rb_cPG_CopyDecoder, rb_mPG_TextDecoder );
|
593
|
+
/* Although CopyRow is a text decoder, data can contain zero bytes and are not zero terminated.
|
594
|
+
* They are handled like binaries. So format is set to 1 (binary). */
|
595
|
+
rb_include_module( rb_cPG_CopyDecoder, rb_mPG_BinaryFormatting );
|
596
|
+
}
|