bson 4.2.2 → 4.12.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/README.md +25 -7
- data/Rakefile +16 -9
- data/ext/bson/{native-endian.h → bson-endian.h} +5 -99
- data/ext/bson/bson-native.h +125 -0
- data/ext/bson/bytebuf.c +133 -0
- data/ext/bson/endian.c +117 -0
- data/ext/bson/init.c +355 -0
- data/ext/bson/libbson-utf8.c +230 -0
- data/ext/bson/read.c +411 -0
- data/ext/bson/util.c +95 -0
- data/ext/bson/write.c +680 -0
- data/lib/bson.rb +6 -3
- data/lib/bson/active_support.rb +17 -0
- data/lib/bson/array.rb +57 -17
- data/lib/bson/binary.rb +185 -13
- data/lib/bson/boolean.rb +12 -3
- data/lib/bson/code.rb +16 -2
- data/lib/bson/code_with_scope.rb +32 -5
- data/lib/bson/config.rb +1 -1
- data/lib/bson/date.rb +12 -2
- data/lib/bson/date_time.rb +2 -2
- data/lib/bson/db_pointer.rb +110 -0
- data/lib/bson/decimal128.rb +17 -3
- data/lib/bson/decimal128/builder.rb +1 -1
- data/lib/bson/document.rb +152 -5
- data/lib/bson/environment.rb +2 -1
- data/lib/bson/error.rb +27 -0
- data/lib/bson/ext_json.rb +383 -0
- data/lib/bson/false_class.rb +1 -1
- data/lib/bson/float.rb +48 -2
- data/lib/bson/hash.rb +68 -17
- data/lib/bson/int32.rb +52 -13
- data/lib/bson/int64.rb +59 -15
- data/lib/bson/integer.rb +36 -2
- data/lib/bson/json.rb +1 -1
- data/lib/bson/max_key.rb +13 -1
- data/lib/bson/min_key.rb +13 -1
- data/lib/bson/nil_class.rb +4 -2
- data/lib/bson/object.rb +28 -1
- data/lib/bson/object_id.rb +16 -2
- data/lib/bson/open_struct.rb +1 -1
- data/lib/bson/regexp.rb +27 -4
- data/lib/bson/registry.rb +3 -3
- data/lib/bson/specialized.rb +4 -2
- data/lib/bson/string.rb +5 -3
- data/lib/bson/symbol.rb +99 -7
- data/lib/bson/time.rb +63 -4
- data/lib/bson/time_with_zone.rb +54 -0
- data/lib/bson/timestamp.rb +44 -6
- data/lib/bson/true_class.rb +1 -1
- data/lib/bson/undefined.rb +12 -1
- data/lib/bson/version.rb +2 -2
- data/spec/bson/array_spec.rb +18 -1
- data/spec/bson/binary_spec.rb +100 -3
- data/spec/bson/binary_uuid_spec.rb +189 -0
- data/spec/bson/boolean_spec.rb +1 -1
- data/spec/bson/byte_buffer_read_spec.rb +197 -0
- data/spec/bson/byte_buffer_spec.rb +121 -381
- data/spec/bson/byte_buffer_write_spec.rb +854 -0
- data/spec/bson/code_spec.rb +1 -1
- data/spec/bson/code_with_scope_spec.rb +1 -1
- data/spec/bson/date_spec.rb +1 -1
- data/spec/bson/date_time_spec.rb +54 -1
- data/spec/bson/decimal128_spec.rb +35 -35
- data/spec/bson/document_as_spec.rb +46 -0
- data/spec/bson/document_spec.rb +197 -30
- data/spec/bson/ext_json_parse_spec.rb +308 -0
- data/spec/bson/false_class_spec.rb +1 -1
- data/spec/bson/float_spec.rb +37 -1
- data/spec/bson/hash_as_spec.rb +57 -0
- data/spec/bson/hash_spec.rb +209 -1
- data/spec/bson/int32_spec.rb +180 -6
- data/spec/bson/int64_spec.rb +199 -6
- data/spec/bson/integer_spec.rb +29 -3
- data/spec/bson/json_spec.rb +1 -1
- data/spec/bson/max_key_spec.rb +1 -1
- data/spec/bson/min_key_spec.rb +1 -1
- data/spec/bson/nil_class_spec.rb +1 -1
- data/spec/bson/object_id_spec.rb +1 -1
- data/spec/bson/object_spec.rb +1 -1
- data/spec/bson/open_struct_spec.rb +1 -1
- data/spec/bson/raw_spec.rb +34 -2
- data/spec/bson/regexp_spec.rb +1 -1
- data/spec/bson/registry_spec.rb +1 -1
- data/spec/bson/string_spec.rb +19 -1
- data/spec/bson/symbol_raw_spec.rb +45 -0
- data/spec/bson/symbol_spec.rb +63 -3
- data/spec/bson/time_spec.rb +205 -2
- data/spec/bson/time_with_zone_spec.rb +68 -0
- data/spec/bson/timestamp_spec.rb +56 -1
- data/spec/bson/true_class_spec.rb +1 -1
- data/spec/bson/undefined_spec.rb +1 -1
- data/spec/bson_spec.rb +1 -1
- data/spec/{support → runners}/common_driver.rb +1 -1
- data/spec/runners/corpus.rb +185 -0
- data/spec/{support/corpus.rb → runners/corpus_legacy.rb} +41 -59
- data/spec/spec_helper.rb +40 -3
- data/spec/{bson/driver_bson_spec.rb → spec_tests/common_driver_spec.rb} +1 -0
- data/spec/{bson/corpus_spec.rb → spec_tests/corpus_legacy_spec.rb} +10 -7
- data/spec/spec_tests/corpus_spec.rb +124 -0
- data/spec/spec_tests/data/corpus/README.md +15 -0
- data/spec/spec_tests/data/corpus/array.json +49 -0
- data/spec/spec_tests/data/corpus/binary.json +113 -0
- data/spec/spec_tests/data/corpus/boolean.json +27 -0
- data/spec/spec_tests/data/corpus/code.json +67 -0
- data/spec/spec_tests/data/corpus/code_w_scope.json +78 -0
- data/spec/spec_tests/data/corpus/datetime.json +42 -0
- data/spec/spec_tests/data/corpus/dbpointer.json +56 -0
- data/spec/spec_tests/data/corpus/dbref.json +31 -0
- data/spec/spec_tests/data/corpus/decimal128-1.json +317 -0
- data/spec/spec_tests/data/corpus/decimal128-2.json +793 -0
- data/spec/spec_tests/data/corpus/decimal128-3.json +1771 -0
- data/spec/spec_tests/data/corpus/decimal128-4.json +117 -0
- data/spec/spec_tests/data/corpus/decimal128-5.json +402 -0
- data/spec/spec_tests/data/corpus/decimal128-6.json +119 -0
- data/spec/spec_tests/data/corpus/decimal128-7.json +323 -0
- data/spec/spec_tests/data/corpus/document.json +36 -0
- data/spec/spec_tests/data/corpus/double.json +87 -0
- data/spec/spec_tests/data/corpus/int32.json +43 -0
- data/spec/spec_tests/data/corpus/int64.json +43 -0
- data/spec/spec_tests/data/corpus/maxkey.json +12 -0
- data/spec/spec_tests/data/corpus/minkey.json +12 -0
- data/spec/spec_tests/data/corpus/multi-type-deprecated.json +15 -0
- data/spec/spec_tests/data/corpus/multi-type.json +11 -0
- data/spec/spec_tests/data/corpus/null.json +12 -0
- data/spec/spec_tests/data/corpus/oid.json +28 -0
- data/spec/spec_tests/data/corpus/regex.json +65 -0
- data/spec/spec_tests/data/corpus/string.json +72 -0
- data/spec/spec_tests/data/corpus/symbol.json +80 -0
- data/spec/spec_tests/data/corpus/timestamp.json +34 -0
- data/spec/spec_tests/data/corpus/top.json +236 -0
- data/spec/spec_tests/data/corpus/undefined.json +15 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/array.json +8 -2
- data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/binary.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/boolean.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code_w_scope.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/document.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/double.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/datetime.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/dbpointer.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/int64.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/symbol.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/int32.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/maxkey.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/minkey.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/null.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/oid.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/regex.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/string.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/timestamp.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/top.json +0 -0
- data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/undefined.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-1.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-2.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-3.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-4.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-5.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-6.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-7.json +0 -0
- data/spec/support/shared_examples.rb +3 -5
- data/spec/support/spec_config.rb +16 -0
- data/spec/support/utils.rb +10 -0
- metadata +227 -124
- metadata.gz.sig +0 -0
- data/ext/bson/bson_native.c +0 -762
metadata.gz.sig
CHANGED
Binary file
|
data/ext/bson/bson_native.c
DELETED
@@ -1,762 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Copyright (C) 2009-2016 MongoDB Inc.
|
3
|
-
*
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
-
* you may not use this file except in compliance with the License.
|
6
|
-
* You may obtain a copy of the License at
|
7
|
-
*
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
-
*
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
-
* See the License for the specific language governing permissions and
|
14
|
-
* limitations under the License.
|
15
|
-
*/
|
16
|
-
#include <ruby.h>
|
17
|
-
#include <ruby/encoding.h>
|
18
|
-
#include <stdbool.h>
|
19
|
-
#include <time.h>
|
20
|
-
#include <unistd.h>
|
21
|
-
#include "native-endian.h"
|
22
|
-
|
23
|
-
#define BSON_BYTE_BUFFER_SIZE 1024
|
24
|
-
|
25
|
-
#ifndef HOST_NAME_HASH_MAX
|
26
|
-
#define HOST_NAME_HASH_MAX 256
|
27
|
-
#endif
|
28
|
-
|
29
|
-
typedef struct {
|
30
|
-
size_t size;
|
31
|
-
size_t write_position;
|
32
|
-
size_t read_position;
|
33
|
-
char buffer[BSON_BYTE_BUFFER_SIZE];
|
34
|
-
char *b_ptr;
|
35
|
-
} byte_buffer_t;
|
36
|
-
|
37
|
-
#define READ_PTR(byte_buffer_ptr) \
|
38
|
-
(byte_buffer_ptr->b_ptr + byte_buffer_ptr->read_position)
|
39
|
-
|
40
|
-
#define READ_SIZE(byte_buffer_ptr) \
|
41
|
-
(byte_buffer_ptr->write_position - byte_buffer_ptr->read_position)
|
42
|
-
|
43
|
-
#define WRITE_PTR(byte_buffer_ptr) \
|
44
|
-
(byte_buffer_ptr->b_ptr + byte_buffer_ptr->write_position)
|
45
|
-
|
46
|
-
#define ENSURE_BSON_WRITE(buffer_ptr, length) \
|
47
|
-
{ if (buffer_ptr->write_position + length > buffer_ptr->size) rb_bson_expand_buffer(buffer_ptr, length); }
|
48
|
-
|
49
|
-
#define ENSURE_BSON_READ(buffer_ptr, length) \
|
50
|
-
{ if (buffer_ptr->read_position + length > buffer_ptr->write_position) \
|
51
|
-
rb_raise(rb_eRangeError, "Attempted to read %zu bytes, but only %zu bytes remain", (size_t)length, READ_SIZE(buffer_ptr)); }
|
52
|
-
|
53
|
-
static VALUE rb_bson_byte_buffer_allocate(VALUE klass);
|
54
|
-
static VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self);
|
55
|
-
static VALUE rb_bson_byte_buffer_length(VALUE self);
|
56
|
-
static VALUE rb_bson_byte_buffer_get_byte(VALUE self);
|
57
|
-
static VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i);
|
58
|
-
static VALUE rb_bson_byte_buffer_get_cstring(VALUE self);
|
59
|
-
static VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self);
|
60
|
-
static VALUE rb_bson_byte_buffer_get_double(VALUE self);
|
61
|
-
static VALUE rb_bson_byte_buffer_get_int32(VALUE self);
|
62
|
-
static VALUE rb_bson_byte_buffer_get_int64(VALUE self);
|
63
|
-
static VALUE rb_bson_byte_buffer_get_string(VALUE self);
|
64
|
-
static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte);
|
65
|
-
static VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes);
|
66
|
-
static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string);
|
67
|
-
static VALUE rb_bson_byte_buffer_put_decimal128(VALUE self, VALUE low, VALUE high);
|
68
|
-
static VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f);
|
69
|
-
static VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i);
|
70
|
-
static VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i);
|
71
|
-
static VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string);
|
72
|
-
static VALUE rb_bson_byte_buffer_read_position(VALUE self);
|
73
|
-
static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i);
|
74
|
-
static VALUE rb_bson_byte_buffer_rewind(VALUE self);
|
75
|
-
static VALUE rb_bson_byte_buffer_write_position(VALUE self);
|
76
|
-
static VALUE rb_bson_byte_buffer_to_s(VALUE self);
|
77
|
-
static VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self);
|
78
|
-
|
79
|
-
static size_t rb_bson_byte_buffer_memsize(const void *ptr);
|
80
|
-
static void rb_bson_byte_buffer_free(void *ptr);
|
81
|
-
static void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length);
|
82
|
-
static void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id);
|
83
|
-
static bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null);
|
84
|
-
|
85
|
-
static const rb_data_type_t rb_byte_buffer_data_type = {
|
86
|
-
"bson/byte_buffer",
|
87
|
-
{ NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize }
|
88
|
-
};
|
89
|
-
|
90
|
-
/**
|
91
|
-
* Holds the machine id hash for object id generation.
|
92
|
-
*/
|
93
|
-
static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX];
|
94
|
-
|
95
|
-
/**
|
96
|
-
* The counter for incrementing object ids.
|
97
|
-
*/
|
98
|
-
static uint32_t rb_bson_object_id_counter;
|
99
|
-
|
100
|
-
/**
|
101
|
-
* Initialize the bson_native extension.
|
102
|
-
*/
|
103
|
-
void Init_bson_native()
|
104
|
-
{
|
105
|
-
char rb_bson_machine_id[256];
|
106
|
-
|
107
|
-
VALUE rb_bson_module = rb_define_module("BSON");
|
108
|
-
VALUE rb_byte_buffer_class = rb_define_class_under(rb_bson_module, "ByteBuffer", rb_cObject);
|
109
|
-
VALUE rb_bson_object_id_class = rb_const_get(rb_bson_module, rb_intern("ObjectId"));
|
110
|
-
VALUE rb_bson_object_id_generator_class = rb_const_get(rb_bson_object_id_class, rb_intern("Generator"));
|
111
|
-
VALUE rb_digest_class = rb_const_get(rb_cObject, rb_intern("Digest"));
|
112
|
-
VALUE rb_md5_class = rb_const_get(rb_digest_class, rb_intern("MD5"));
|
113
|
-
|
114
|
-
rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate);
|
115
|
-
rb_define_method(rb_byte_buffer_class, "initialize", rb_bson_byte_buffer_initialize, -1);
|
116
|
-
rb_define_method(rb_byte_buffer_class, "length", rb_bson_byte_buffer_length, 0);
|
117
|
-
rb_define_method(rb_byte_buffer_class, "get_byte", rb_bson_byte_buffer_get_byte, 0);
|
118
|
-
rb_define_method(rb_byte_buffer_class, "get_bytes", rb_bson_byte_buffer_get_bytes, 1);
|
119
|
-
rb_define_method(rb_byte_buffer_class, "get_cstring", rb_bson_byte_buffer_get_cstring, 0);
|
120
|
-
rb_define_method(rb_byte_buffer_class, "get_decimal128_bytes", rb_bson_byte_buffer_get_decimal128_bytes, 0);
|
121
|
-
rb_define_method(rb_byte_buffer_class, "get_double", rb_bson_byte_buffer_get_double, 0);
|
122
|
-
rb_define_method(rb_byte_buffer_class, "get_int32", rb_bson_byte_buffer_get_int32, 0);
|
123
|
-
rb_define_method(rb_byte_buffer_class, "get_int64", rb_bson_byte_buffer_get_int64, 0);
|
124
|
-
rb_define_method(rb_byte_buffer_class, "get_string", rb_bson_byte_buffer_get_string, 0);
|
125
|
-
rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1);
|
126
|
-
rb_define_method(rb_byte_buffer_class, "put_bytes", rb_bson_byte_buffer_put_bytes, 1);
|
127
|
-
rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1);
|
128
|
-
rb_define_method(rb_byte_buffer_class, "put_decimal128", rb_bson_byte_buffer_put_decimal128, 2);
|
129
|
-
rb_define_method(rb_byte_buffer_class, "put_double", rb_bson_byte_buffer_put_double, 1);
|
130
|
-
rb_define_method(rb_byte_buffer_class, "put_int32", rb_bson_byte_buffer_put_int32, 1);
|
131
|
-
rb_define_method(rb_byte_buffer_class, "put_int64", rb_bson_byte_buffer_put_int64, 1);
|
132
|
-
rb_define_method(rb_byte_buffer_class, "put_string", rb_bson_byte_buffer_put_string, 1);
|
133
|
-
rb_define_method(rb_byte_buffer_class, "read_position", rb_bson_byte_buffer_read_position, 0);
|
134
|
-
rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2);
|
135
|
-
rb_define_method(rb_byte_buffer_class, "rewind!", rb_bson_byte_buffer_rewind, 0);
|
136
|
-
rb_define_method(rb_byte_buffer_class, "write_position", rb_bson_byte_buffer_write_position, 0);
|
137
|
-
rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0);
|
138
|
-
rb_define_method(rb_bson_object_id_generator_class, "next_object_id", rb_bson_object_id_generator_next, -1);
|
139
|
-
|
140
|
-
// Get the object id machine id and hash it.
|
141
|
-
rb_require("digest/md5");
|
142
|
-
gethostname(rb_bson_machine_id, sizeof(rb_bson_machine_id));
|
143
|
-
rb_bson_machine_id[255] = '\0';
|
144
|
-
rb_bson_generate_machine_id(rb_md5_class, rb_bson_machine_id);
|
145
|
-
|
146
|
-
// Set the object id counter to a random number
|
147
|
-
rb_bson_object_id_counter = FIX2INT(rb_funcall(rb_mKernel, rb_intern("rand"), 1, INT2FIX(0x1000000)));
|
148
|
-
}
|
149
|
-
|
150
|
-
void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id)
|
151
|
-
{
|
152
|
-
VALUE digest = rb_funcall(rb_md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id));
|
153
|
-
memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest));
|
154
|
-
}
|
155
|
-
|
156
|
-
/**
|
157
|
-
* Allocates a bson byte buffer that wraps a byte_buffer_t.
|
158
|
-
*/
|
159
|
-
VALUE rb_bson_byte_buffer_allocate(VALUE klass)
|
160
|
-
{
|
161
|
-
byte_buffer_t *b;
|
162
|
-
VALUE obj = TypedData_Make_Struct(klass, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
163
|
-
b->b_ptr = b->buffer;
|
164
|
-
b->size = BSON_BYTE_BUFFER_SIZE;
|
165
|
-
return obj;
|
166
|
-
}
|
167
|
-
|
168
|
-
/**
|
169
|
-
* Initialize a byte buffer.
|
170
|
-
*/
|
171
|
-
VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self)
|
172
|
-
{
|
173
|
-
VALUE bytes;
|
174
|
-
rb_scan_args(argc, argv, "01", &bytes);
|
175
|
-
|
176
|
-
if (!NIL_P(bytes)) {
|
177
|
-
rb_bson_byte_buffer_put_bytes(self, bytes);
|
178
|
-
}
|
179
|
-
|
180
|
-
return self;
|
181
|
-
}
|
182
|
-
|
183
|
-
/**
|
184
|
-
* Get the length of the buffer.
|
185
|
-
*/
|
186
|
-
VALUE rb_bson_byte_buffer_length(VALUE self)
|
187
|
-
{
|
188
|
-
byte_buffer_t *b;
|
189
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
190
|
-
return UINT2NUM(READ_SIZE(b));
|
191
|
-
}
|
192
|
-
|
193
|
-
/**
|
194
|
-
* Get a single byte from the buffer.
|
195
|
-
*/
|
196
|
-
VALUE rb_bson_byte_buffer_get_byte(VALUE self)
|
197
|
-
{
|
198
|
-
byte_buffer_t *b;
|
199
|
-
VALUE byte;
|
200
|
-
|
201
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
202
|
-
ENSURE_BSON_READ(b, 1);
|
203
|
-
byte = rb_str_new(READ_PTR(b), 1);
|
204
|
-
b->read_position += 1;
|
205
|
-
return byte;
|
206
|
-
}
|
207
|
-
|
208
|
-
/**
|
209
|
-
* Get bytes from the buffer.
|
210
|
-
*/
|
211
|
-
VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
|
212
|
-
{
|
213
|
-
byte_buffer_t *b;
|
214
|
-
VALUE bytes;
|
215
|
-
const uint32_t length = FIX2LONG(i);
|
216
|
-
|
217
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
218
|
-
ENSURE_BSON_READ(b, length);
|
219
|
-
bytes = rb_str_new(READ_PTR(b), length);
|
220
|
-
b->read_position += length;
|
221
|
-
return bytes;
|
222
|
-
}
|
223
|
-
|
224
|
-
/**
|
225
|
-
* Get a cstring from the buffer.
|
226
|
-
*/
|
227
|
-
VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
|
228
|
-
{
|
229
|
-
byte_buffer_t *b;
|
230
|
-
VALUE string;
|
231
|
-
int length;
|
232
|
-
|
233
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
234
|
-
length = (int)strlen(READ_PTR(b));
|
235
|
-
ENSURE_BSON_READ(b, length);
|
236
|
-
string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
|
237
|
-
b->read_position += length + 1;
|
238
|
-
return string;
|
239
|
-
}
|
240
|
-
|
241
|
-
/**
|
242
|
-
* Get the 16 bytes representing the decimal128 from the buffer.
|
243
|
-
*/
|
244
|
-
VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self)
|
245
|
-
{
|
246
|
-
byte_buffer_t *b;
|
247
|
-
VALUE bytes;
|
248
|
-
|
249
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
250
|
-
ENSURE_BSON_READ(b, 16);
|
251
|
-
bytes = rb_str_new(READ_PTR(b), 16);
|
252
|
-
b->read_position += 16;
|
253
|
-
return bytes;
|
254
|
-
}
|
255
|
-
|
256
|
-
/**
|
257
|
-
* Get a double from the buffer.
|
258
|
-
*/
|
259
|
-
VALUE rb_bson_byte_buffer_get_double(VALUE self)
|
260
|
-
{
|
261
|
-
byte_buffer_t *b;
|
262
|
-
double d;
|
263
|
-
|
264
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
265
|
-
ENSURE_BSON_READ(b, 8);
|
266
|
-
memcpy(&d, READ_PTR(b), 8);
|
267
|
-
b->read_position += 8;
|
268
|
-
return DBL2NUM(BSON_DOUBLE_FROM_LE(d));
|
269
|
-
}
|
270
|
-
|
271
|
-
/**
|
272
|
-
* Get a int32 from the buffer.
|
273
|
-
*/
|
274
|
-
VALUE rb_bson_byte_buffer_get_int32(VALUE self)
|
275
|
-
{
|
276
|
-
byte_buffer_t *b;
|
277
|
-
int32_t i32;
|
278
|
-
|
279
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
280
|
-
ENSURE_BSON_READ(b, 4);
|
281
|
-
memcpy(&i32, READ_PTR(b), 4);
|
282
|
-
b->read_position += 4;
|
283
|
-
return INT2NUM(BSON_UINT32_FROM_LE(i32));
|
284
|
-
}
|
285
|
-
|
286
|
-
/**
|
287
|
-
* Get a int64 from the buffer.
|
288
|
-
*/
|
289
|
-
VALUE rb_bson_byte_buffer_get_int64(VALUE self)
|
290
|
-
{
|
291
|
-
byte_buffer_t *b;
|
292
|
-
int64_t i64;
|
293
|
-
|
294
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
295
|
-
ENSURE_BSON_READ(b, 8);
|
296
|
-
memcpy(&i64, READ_PTR(b), 8);
|
297
|
-
b->read_position += 8;
|
298
|
-
return LL2NUM(BSON_UINT64_FROM_LE(i64));
|
299
|
-
}
|
300
|
-
|
301
|
-
/**
|
302
|
-
* Get a string from the buffer.
|
303
|
-
*/
|
304
|
-
VALUE rb_bson_byte_buffer_get_string(VALUE self)
|
305
|
-
{
|
306
|
-
byte_buffer_t *b;
|
307
|
-
int32_t length;
|
308
|
-
int32_t length_le;
|
309
|
-
VALUE string;
|
310
|
-
|
311
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
312
|
-
ENSURE_BSON_READ(b, 4);
|
313
|
-
memcpy(&length, READ_PTR(b), 4);
|
314
|
-
length_le = BSON_UINT32_FROM_LE(length);
|
315
|
-
b->read_position += 4;
|
316
|
-
ENSURE_BSON_READ(b, length_le);
|
317
|
-
string = rb_enc_str_new(READ_PTR(b), length_le - 1, rb_utf8_encoding());
|
318
|
-
b->read_position += length_le;
|
319
|
-
return string;
|
320
|
-
}
|
321
|
-
|
322
|
-
/**
|
323
|
-
* Writes a byte to the byte buffer.
|
324
|
-
*/
|
325
|
-
VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte)
|
326
|
-
{
|
327
|
-
byte_buffer_t *b;
|
328
|
-
const char *str = RSTRING_PTR(byte);
|
329
|
-
|
330
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
331
|
-
ENSURE_BSON_WRITE(b, 1);
|
332
|
-
memcpy(WRITE_PTR(b), str, 1);
|
333
|
-
b->write_position += 1;
|
334
|
-
|
335
|
-
return self;
|
336
|
-
}
|
337
|
-
|
338
|
-
/**
|
339
|
-
* Writes bytes to the byte buffer.
|
340
|
-
*/
|
341
|
-
VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes)
|
342
|
-
{
|
343
|
-
byte_buffer_t *b;
|
344
|
-
const char *str = RSTRING_PTR(bytes);
|
345
|
-
const size_t length = RSTRING_LEN(bytes);
|
346
|
-
|
347
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
348
|
-
ENSURE_BSON_WRITE(b, length);
|
349
|
-
memcpy(WRITE_PTR(b), str, length);
|
350
|
-
b->write_position += length;
|
351
|
-
return self;
|
352
|
-
}
|
353
|
-
|
354
|
-
/**
|
355
|
-
* Writes a cstring to the byte buffer.
|
356
|
-
*/
|
357
|
-
VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string)
|
358
|
-
{
|
359
|
-
byte_buffer_t *b;
|
360
|
-
char *c_str = RSTRING_PTR(string);
|
361
|
-
size_t length = RSTRING_LEN(string) + 1;
|
362
|
-
|
363
|
-
if (!rb_bson_utf8_validate(c_str, length - 1, false)) {
|
364
|
-
rb_raise(rb_eArgError, "String %s is not a valid UTF-8 CString.", c_str);
|
365
|
-
}
|
366
|
-
|
367
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
368
|
-
ENSURE_BSON_WRITE(b, length);
|
369
|
-
memcpy(WRITE_PTR(b), c_str, length);
|
370
|
-
b->write_position += length;
|
371
|
-
return self;
|
372
|
-
}
|
373
|
-
|
374
|
-
/**
|
375
|
-
* Writes a 128 bit decimal to the byte buffer.
|
376
|
-
*/
|
377
|
-
VALUE rb_bson_byte_buffer_put_decimal128(VALUE self, VALUE low, VALUE high)
|
378
|
-
{
|
379
|
-
byte_buffer_t *b;
|
380
|
-
const int64_t low64 = BSON_UINT64_TO_LE(NUM2ULL(low));
|
381
|
-
const int64_t high64 = BSON_UINT64_TO_LE(NUM2ULL(high));
|
382
|
-
|
383
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
384
|
-
ENSURE_BSON_WRITE(b, 8);
|
385
|
-
memcpy(WRITE_PTR(b), &low64, 8);
|
386
|
-
b->write_position += 8;
|
387
|
-
|
388
|
-
ENSURE_BSON_WRITE(b, 8);
|
389
|
-
memcpy(WRITE_PTR(b), &high64, 8);
|
390
|
-
b->write_position += 8;
|
391
|
-
|
392
|
-
return self;
|
393
|
-
}
|
394
|
-
|
395
|
-
/**
|
396
|
-
* Writes a 64 bit double to the buffer.
|
397
|
-
*/
|
398
|
-
VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f)
|
399
|
-
{
|
400
|
-
byte_buffer_t *b;
|
401
|
-
const double d = BSON_DOUBLE_TO_LE(NUM2DBL(f));
|
402
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
403
|
-
ENSURE_BSON_WRITE(b, 8);
|
404
|
-
memcpy(WRITE_PTR(b), &d, 8);
|
405
|
-
b->write_position += 8;
|
406
|
-
|
407
|
-
return self;
|
408
|
-
}
|
409
|
-
|
410
|
-
/**
|
411
|
-
* Writes a 32 bit integer to the byte buffer.
|
412
|
-
*/
|
413
|
-
VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i)
|
414
|
-
{
|
415
|
-
byte_buffer_t *b;
|
416
|
-
const int32_t i32 = BSON_UINT32_TO_LE(NUM2INT(i));
|
417
|
-
|
418
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
419
|
-
ENSURE_BSON_WRITE(b, 4);
|
420
|
-
memcpy(WRITE_PTR(b), &i32, 4);
|
421
|
-
b->write_position += 4;
|
422
|
-
|
423
|
-
return self;
|
424
|
-
}
|
425
|
-
|
426
|
-
/**
|
427
|
-
* Writes a 64 bit integer to the byte buffer.
|
428
|
-
*/
|
429
|
-
VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i)
|
430
|
-
{
|
431
|
-
byte_buffer_t *b;
|
432
|
-
const int64_t i64 = BSON_UINT64_TO_LE(NUM2LL(i));
|
433
|
-
|
434
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
435
|
-
ENSURE_BSON_WRITE(b, 8);
|
436
|
-
memcpy(WRITE_PTR(b), &i64, 8);
|
437
|
-
b->write_position += 8;
|
438
|
-
|
439
|
-
return self;
|
440
|
-
}
|
441
|
-
|
442
|
-
/**
|
443
|
-
* Writes a string to the byte buffer.
|
444
|
-
*/
|
445
|
-
VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string)
|
446
|
-
{
|
447
|
-
byte_buffer_t *b;
|
448
|
-
int32_t length_le;
|
449
|
-
|
450
|
-
char *str = RSTRING_PTR(string);
|
451
|
-
const int32_t length = RSTRING_LEN(string) + 1;
|
452
|
-
length_le = BSON_UINT32_TO_LE(length);
|
453
|
-
|
454
|
-
if (!rb_bson_utf8_validate(str, length - 1, true)) {
|
455
|
-
rb_raise(rb_eArgError, "String %s is not valid UTF-8.", str);
|
456
|
-
}
|
457
|
-
|
458
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
459
|
-
ENSURE_BSON_WRITE(b, length + 4);
|
460
|
-
memcpy(WRITE_PTR(b), &length_le, 4);
|
461
|
-
b->write_position += 4;
|
462
|
-
memcpy(WRITE_PTR(b), str, length);
|
463
|
-
b->write_position += length;
|
464
|
-
|
465
|
-
return self;
|
466
|
-
}
|
467
|
-
|
468
|
-
/**
|
469
|
-
* Get the read position.
|
470
|
-
*/
|
471
|
-
VALUE rb_bson_byte_buffer_read_position(VALUE self)
|
472
|
-
{
|
473
|
-
byte_buffer_t *b;
|
474
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
475
|
-
return INT2NUM(b->read_position);
|
476
|
-
}
|
477
|
-
|
478
|
-
/**
|
479
|
-
* Replace a 32 bit integer int the byte buffer.
|
480
|
-
*/
|
481
|
-
VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i)
|
482
|
-
{
|
483
|
-
byte_buffer_t *b;
|
484
|
-
const int32_t position = NUM2LONG(index);
|
485
|
-
const int32_t i32 = BSON_UINT32_TO_LE(NUM2LONG(i));
|
486
|
-
|
487
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
488
|
-
|
489
|
-
memcpy(READ_PTR(b) + position, &i32, 4);
|
490
|
-
|
491
|
-
return self;
|
492
|
-
}
|
493
|
-
|
494
|
-
/**
|
495
|
-
* Reset the read position to the beginning of the byte buffer.
|
496
|
-
*/
|
497
|
-
VALUE rb_bson_byte_buffer_rewind(VALUE self)
|
498
|
-
{
|
499
|
-
byte_buffer_t *b;
|
500
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
501
|
-
b->read_position = 0;
|
502
|
-
|
503
|
-
return self;
|
504
|
-
}
|
505
|
-
|
506
|
-
/**
|
507
|
-
* Get the write position.
|
508
|
-
*/
|
509
|
-
VALUE rb_bson_byte_buffer_write_position(VALUE self)
|
510
|
-
{
|
511
|
-
byte_buffer_t *b;
|
512
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
513
|
-
return INT2NUM(b->write_position);
|
514
|
-
}
|
515
|
-
|
516
|
-
/**
|
517
|
-
* Convert the buffer to a string.
|
518
|
-
*/
|
519
|
-
VALUE rb_bson_byte_buffer_to_s(VALUE self)
|
520
|
-
{
|
521
|
-
byte_buffer_t *b;
|
522
|
-
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
523
|
-
return rb_str_new(READ_PTR(b), READ_SIZE(b));
|
524
|
-
}
|
525
|
-
|
526
|
-
/**
|
527
|
-
* Get the size of the byte_buffer_t in memory.
|
528
|
-
*/
|
529
|
-
size_t rb_bson_byte_buffer_memsize(const void *ptr)
|
530
|
-
{
|
531
|
-
return ptr ? sizeof(byte_buffer_t) : 0;
|
532
|
-
}
|
533
|
-
|
534
|
-
/**
|
535
|
-
* Free the memory for the byte buffer.
|
536
|
-
*/
|
537
|
-
void rb_bson_byte_buffer_free(void *ptr)
|
538
|
-
{
|
539
|
-
byte_buffer_t *b = ptr;
|
540
|
-
if (b->b_ptr != b->buffer) {
|
541
|
-
xfree(b->b_ptr);
|
542
|
-
}
|
543
|
-
xfree(b);
|
544
|
-
}
|
545
|
-
|
546
|
-
/**
|
547
|
-
* Expand the byte buffer linearly.
|
548
|
-
*/
|
549
|
-
void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length)
|
550
|
-
{
|
551
|
-
const size_t required_size = buffer_ptr->write_position - buffer_ptr->read_position + length;
|
552
|
-
if (required_size <= buffer_ptr->size) {
|
553
|
-
memmove(buffer_ptr->b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
|
554
|
-
buffer_ptr->write_position -= buffer_ptr->read_position;
|
555
|
-
buffer_ptr->read_position = 0;
|
556
|
-
} else {
|
557
|
-
char *new_b_ptr;
|
558
|
-
const size_t new_size = required_size * 2;
|
559
|
-
new_b_ptr = ALLOC_N(char, new_size);
|
560
|
-
memcpy(new_b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
|
561
|
-
if (buffer_ptr->b_ptr != buffer_ptr->buffer) {
|
562
|
-
xfree(buffer_ptr->b_ptr);
|
563
|
-
}
|
564
|
-
buffer_ptr->b_ptr = new_b_ptr;
|
565
|
-
buffer_ptr->size = new_size;
|
566
|
-
buffer_ptr->write_position -= buffer_ptr->read_position;
|
567
|
-
buffer_ptr->read_position = 0;
|
568
|
-
}
|
569
|
-
}
|
570
|
-
|
571
|
-
/**
|
572
|
-
* Generate the next object id.
|
573
|
-
*/
|
574
|
-
VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self)
|
575
|
-
{
|
576
|
-
char bytes[12];
|
577
|
-
uint32_t t;
|
578
|
-
uint32_t c;
|
579
|
-
uint16_t pid = BSON_UINT16_TO_BE(getpid());
|
580
|
-
|
581
|
-
if (argc == 0 || (argc == 1 && *args == Qnil)) {
|
582
|
-
t = BSON_UINT32_TO_BE((int) time(NULL));
|
583
|
-
}
|
584
|
-
else {
|
585
|
-
t = BSON_UINT32_TO_BE(NUM2ULONG(rb_funcall(*args, rb_intern("to_i"), 0)));
|
586
|
-
}
|
587
|
-
|
588
|
-
c = BSON_UINT32_TO_BE(rb_bson_object_id_counter << 8);
|
589
|
-
|
590
|
-
memcpy(&bytes, &t, 4);
|
591
|
-
memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
|
592
|
-
memcpy(&bytes[7], &pid, 2);
|
593
|
-
memcpy(&bytes[9], &c, 3);
|
594
|
-
rb_bson_object_id_counter++;
|
595
|
-
return rb_str_new(bytes, 12);
|
596
|
-
}
|
597
|
-
|
598
|
-
/**
|
599
|
-
* Taken from libbson.
|
600
|
-
*/
|
601
|
-
static void _bson_utf8_get_sequence(const char *utf8, uint8_t *seq_length, uint8_t *first_mask)
|
602
|
-
{
|
603
|
-
unsigned char c = *(const unsigned char *)utf8;
|
604
|
-
uint8_t m;
|
605
|
-
uint8_t n;
|
606
|
-
|
607
|
-
/*
|
608
|
-
* See the following[1] for a description of what the given multi-byte
|
609
|
-
* sequences will be based on the bits set of the first byte. We also need
|
610
|
-
* to mask the first byte based on that. All subsequent bytes are masked
|
611
|
-
* against 0x3F.
|
612
|
-
*
|
613
|
-
* [1] http://www.joelonsoftware.com/articles/Unicode.html
|
614
|
-
*/
|
615
|
-
|
616
|
-
if ((c & 0x80) == 0) {
|
617
|
-
n = 1;
|
618
|
-
m = 0x7F;
|
619
|
-
} else if ((c & 0xE0) == 0xC0) {
|
620
|
-
n = 2;
|
621
|
-
m = 0x1F;
|
622
|
-
} else if ((c & 0xF0) == 0xE0) {
|
623
|
-
n = 3;
|
624
|
-
m = 0x0F;
|
625
|
-
} else if ((c & 0xF8) == 0xF0) {
|
626
|
-
n = 4;
|
627
|
-
m = 0x07;
|
628
|
-
} else if ((c & 0xFC) == 0xF8) {
|
629
|
-
n = 5;
|
630
|
-
m = 0x03;
|
631
|
-
} else if ((c & 0xFE) == 0xFC) {
|
632
|
-
n = 6;
|
633
|
-
m = 0x01;
|
634
|
-
} else {
|
635
|
-
n = 0;
|
636
|
-
m = 0;
|
637
|
-
}
|
638
|
-
|
639
|
-
*seq_length = n;
|
640
|
-
*first_mask = m;
|
641
|
-
}
|
642
|
-
|
643
|
-
/**
|
644
|
-
* Taken from libbson.
|
645
|
-
*/
|
646
|
-
bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null)
|
647
|
-
{
|
648
|
-
uint32_t c;
|
649
|
-
uint8_t first_mask;
|
650
|
-
uint8_t seq_length;
|
651
|
-
unsigned i;
|
652
|
-
unsigned j;
|
653
|
-
|
654
|
-
if (!utf8) {
|
655
|
-
return false;
|
656
|
-
}
|
657
|
-
|
658
|
-
for (i = 0; i < utf8_len; i += seq_length) {
|
659
|
-
_bson_utf8_get_sequence(&utf8[i], &seq_length, &first_mask);
|
660
|
-
|
661
|
-
/*
|
662
|
-
* Ensure we have a valid multi-byte sequence length.
|
663
|
-
*/
|
664
|
-
if (!seq_length) {
|
665
|
-
return false;
|
666
|
-
}
|
667
|
-
|
668
|
-
/*
|
669
|
-
* Ensure we have enough bytes left.
|
670
|
-
*/
|
671
|
-
if ((utf8_len - i) < seq_length) {
|
672
|
-
return false;
|
673
|
-
}
|
674
|
-
|
675
|
-
/*
|
676
|
-
* Also calculate the next char as a unichar so we can
|
677
|
-
* check code ranges for non-shortest form.
|
678
|
-
*/
|
679
|
-
c = utf8 [i] & first_mask;
|
680
|
-
|
681
|
-
/*
|
682
|
-
* Check the high-bits for each additional sequence byte.
|
683
|
-
*/
|
684
|
-
for (j = i + 1; j < (i + seq_length); j++) {
|
685
|
-
c = (c << 6) | (utf8 [j] & 0x3F);
|
686
|
-
if ((utf8[j] & 0xC0) != 0x80) {
|
687
|
-
return false;
|
688
|
-
}
|
689
|
-
}
|
690
|
-
|
691
|
-
/*
|
692
|
-
* Check for NULL bytes afterwards.
|
693
|
-
*
|
694
|
-
* Hint: if you want to optimize this function, starting here to do
|
695
|
-
* this in the same pass as the data above would probably be a good
|
696
|
-
* idea. You would add a branch into the inner loop, but save possibly
|
697
|
-
* on cache-line bouncing on larger strings. Just a thought.
|
698
|
-
*/
|
699
|
-
if (!allow_null) {
|
700
|
-
for (j = 0; j < seq_length; j++) {
|
701
|
-
if (((i + j) > utf8_len) || !utf8[i + j]) {
|
702
|
-
return false;
|
703
|
-
}
|
704
|
-
}
|
705
|
-
}
|
706
|
-
|
707
|
-
/*
|
708
|
-
* Code point wont fit in utf-16, not allowed.
|
709
|
-
*/
|
710
|
-
if (c > 0x0010FFFF) {
|
711
|
-
return false;
|
712
|
-
}
|
713
|
-
|
714
|
-
/*
|
715
|
-
* Byte is in reserved range for UTF-16 high-marks
|
716
|
-
* for surrogate pairs.
|
717
|
-
*/
|
718
|
-
if ((c & 0xFFFFF800) == 0xD800) {
|
719
|
-
return false;
|
720
|
-
}
|
721
|
-
|
722
|
-
/*
|
723
|
-
* Check non-shortest form unicode.
|
724
|
-
*/
|
725
|
-
switch (seq_length) {
|
726
|
-
case 1:
|
727
|
-
if (c <= 0x007F) {
|
728
|
-
continue;
|
729
|
-
}
|
730
|
-
return false;
|
731
|
-
|
732
|
-
case 2:
|
733
|
-
if ((c >= 0x0080) && (c <= 0x07FF)) {
|
734
|
-
continue;
|
735
|
-
} else if (c == 0) {
|
736
|
-
/* Two-byte representation for NULL. */
|
737
|
-
continue;
|
738
|
-
}
|
739
|
-
return false;
|
740
|
-
|
741
|
-
case 3:
|
742
|
-
if (((c >= 0x0800) && (c <= 0x0FFF)) ||
|
743
|
-
((c >= 0x1000) && (c <= 0xFFFF))) {
|
744
|
-
continue;
|
745
|
-
}
|
746
|
-
return false;
|
747
|
-
|
748
|
-
case 4:
|
749
|
-
if (((c >= 0x10000) && (c <= 0x3FFFF)) ||
|
750
|
-
((c >= 0x40000) && (c <= 0xFFFFF)) ||
|
751
|
-
((c >= 0x100000) && (c <= 0x10FFFF))) {
|
752
|
-
continue;
|
753
|
-
}
|
754
|
-
return false;
|
755
|
-
|
756
|
-
default:
|
757
|
-
return false;
|
758
|
-
}
|
759
|
-
}
|
760
|
-
|
761
|
-
return true;
|
762
|
-
}
|