bson_ext 1.8.6 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c65812858113d36ab4eabd70baa49338ef93887b
4
- data.tar.gz: bc9d5786498123d9f5d2e1d8142d0853935aec15
3
+ metadata.gz: f0154dfba09c2e43e8f5cb9d1889f0b08a41408f
4
+ data.tar.gz: e1b85f77e8ddb9faa027a218e32d87730ef08b9c
5
5
  SHA512:
6
- metadata.gz: 07538e80ea4776323e77d1420ceee868d7b644ae1783dbfd4c810ed33a20390ff89b6df58e4ba695f197d376b70e77a1ec9d632eb65c7404e7058310fb543dac
7
- data.tar.gz: b2955ad06298b40c67ca63c3665a07bcd4391ae4f11a7324148fe078f87e767313f164901826ad54c8f4a53898648a333153761971b5b93329016f987999eaa5
6
+ metadata.gz: 7872d36e308936e291f0b899b15501060731e21cc96ba6d0aeaf0495d3362167fa411ac6ba5635c2bd5b94b50b1ad041951d96cf858a0a10c7afc812bae502b3
7
+ data.tar.gz: c0558e9f81070a16c0a010a20e133fd18c60fa7feac74e3366f0f57d86303bab8fe887e6bddc532a05a923b59bbcfdfa6c8efc2b1d27e76b5286d9e95ce26d8a
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
@@ -1 +1,4 @@
1
- `�����byv���򰢉��
1
+ Wkԃw/'a�}��ag]:1��k���zC�nu�e�hR��~�
2
+ j�֙X�&�Ӑx]��;�@ԛ�,/g��=
3
+ qW{X�T�'
4
+ X� ��v�����6"�'>�3�r(��>j���a�:�q�����&N���.�u�)ŜGǔ)Nv��ǵ��
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.8.6
1
+ 1.9.0
@@ -1,3 +1,19 @@
1
+ /*
2
+ * Copyright (C) 2013 10gen Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
1
17
  #include <stdlib.h>
2
18
  #include <string.h>
3
19
 
@@ -1,3 +1,19 @@
1
+ /*
2
+ * Copyright (C) 2013 10gen Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
1
17
  #ifndef _BSON_BUFFER_H
2
18
  #define _BSON_BUFFER_H
3
19
 
data/ext/cbson/cbson.c CHANGED
@@ -1,3 +1,19 @@
1
+ /*
2
+ * Copyright (C) 2013 10gen Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
1
17
  /*
2
18
  * This file contains C implementations of some of the functions needed by the
3
19
  * bson module. If possible, these implementations should be used to speed up
@@ -98,13 +114,14 @@ static int max_bson_size;
98
114
  #define STR_NEW(p,n) rb_str_new((p), (n))
99
115
  #endif
100
116
 
101
- static void write_utf8(bson_buffer_t buffer, VALUE string, char check_null) {
102
- result_t status = check_string((unsigned char*)RSTRING_PTR(string), RSTRING_LEN(string),
103
- 1, check_null);
117
+ static void write_utf8(bson_buffer_t buffer, VALUE string, int allow_null) {
118
+ result_t status = validate_utf8_encoding(
119
+ (const char*)RSTRING_PTR(string), RSTRING_LEN(string), allow_null);
120
+
104
121
  if (status == HAS_NULL) {
105
122
  bson_buffer_free(buffer);
106
123
  rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
107
- } else if (status == NOT_UTF_8) {
124
+ } else if (status == INVALID_UTF8) {
108
125
  bson_buffer_free(buffer);
109
126
  rb_raise(InvalidStringEncoding, "String not valid UTF-8");
110
127
  }
@@ -186,7 +203,7 @@ static VALUE pack_extra(bson_buffer_t buffer, VALUE check_keys) {
186
203
 
187
204
  static void write_name_and_type(bson_buffer_t buffer, VALUE name, char type) {
188
205
  SAFE_WRITE(buffer, &type, 1);
189
- write_utf8(buffer, name, 1);
206
+ write_utf8(buffer, name, 0);
190
207
  SAFE_WRITE(buffer, &zero, 1);
191
208
  }
192
209
 
@@ -315,7 +332,7 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
315
332
  write_name_and_type(buffer, key, 0x02);
316
333
  length = RSTRING_LENINT(value) + 1;
317
334
  SAFE_WRITE(buffer, (char*)&length, 4);
318
- write_utf8(buffer, value, 0);
335
+ write_utf8(buffer, value, 1);
319
336
  SAFE_WRITE(buffer, &zero, 1);
320
337
  break;
321
338
  }
@@ -447,7 +464,7 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
447
464
  write_name_and_type(buffer, key, 0x02);
448
465
  length = RSTRING_LENINT(str) + 1;
449
466
  SAFE_WRITE(buffer, (char*)&length, 4);
450
- write_utf8(buffer, str, 0);
467
+ write_utf8(buffer, str, 1);
451
468
  SAFE_WRITE(buffer, &zero, 1);
452
469
  break;
453
470
  }
@@ -488,7 +505,7 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
488
505
 
489
506
  write_name_and_type(buffer, key, 0x0B);
490
507
 
491
- write_utf8(buffer, pattern, 1);
508
+ write_utf8(buffer, pattern, 0);
492
509
  SAFE_WRITE(buffer, &zero, 1);
493
510
 
494
511
  if (flags & IGNORECASE) {
@@ -541,6 +558,7 @@ static void write_doc(bson_buffer_t buffer, VALUE hash, VALUE check_keys, VALUE
541
558
  bson_buffer_position length_location = bson_buffer_save_space(buffer, 4);
542
559
  bson_buffer_position length;
543
560
  int allow_id;
561
+ int max_size;
544
562
  int (*write_function)(VALUE, VALUE, VALUE) = NULL;
545
563
  VALUE id_str = rb_str_new2("_id");
546
564
  VALUE id_sym = ID2SYM(rb_intern("_id"));
@@ -601,12 +619,12 @@ static void write_doc(bson_buffer_t buffer, VALUE hash, VALUE check_keys, VALUE
601
619
  length = bson_buffer_get_position(buffer) - start_position;
602
620
 
603
621
  // make sure that length doesn't exceed the max size (determined by server, defaults to 4mb)
604
- if (length > bson_buffer_get_max_size(buffer)) {
605
- bson_buffer_free(buffer);
606
- rb_raise(InvalidDocument,
607
- "Document too large: This BSON document is limited to %d bytes.",
608
- bson_buffer_get_max_size(buffer));
609
- return;
622
+ max_size = bson_buffer_get_max_size(buffer);
623
+ if (length > max_size) {
624
+ bson_buffer_free(buffer);
625
+ rb_raise(InvalidDocument,
626
+ "Document too large: This BSON document is limited to %d bytes.",
627
+ max_size);
610
628
  }
611
629
  SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&length, 4);
612
630
  }
@@ -616,10 +634,10 @@ static VALUE method_serialize(VALUE self, VALUE doc, VALUE check_keys,
616
634
 
617
635
  VALUE result;
618
636
  bson_buffer_t buffer = bson_buffer_new();
619
- bson_buffer_set_max_size(buffer, FIX2INT(max_size));
620
637
  if (buffer == NULL) {
621
638
  rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
622
639
  }
640
+ bson_buffer_set_max_size(buffer, FIX2INT(max_size));
623
641
 
624
642
  write_doc(buffer, doc, check_keys, move_id);
625
643
 
@@ -1,102 +1,98 @@
1
- #include "encoding_helpers.h"
2
-
3
1
  /*
4
- * Portions Copyright 2001 Unicode, Inc.
2
+ * Copyright 2013 10gen Inc.
5
3
  *
6
- * Disclaimer
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
7
  *
8
- * This source code is provided as is by Unicode, Inc. No claims are
9
- * made as to fitness for any particular purpose. No warranties of any
10
- * kind are expressed or implied. The recipient agrees to determine
11
- * applicability of information provided. If this file has been
12
- * purchased on magnetic or optical media from Unicode, Inc., the
13
- * sole remedy for any claim will be exchange of defective media
14
- * within 90 days of receipt.
8
+ * http://www.apache.org/licenses/LICENSE-2.0
15
9
  *
16
- * Limitations on Rights to Redistribute This Code
17
- *
18
- * Unicode, Inc. hereby grants the right to freely use the information
19
- * supplied in this file in the creation of products supporting the
20
- * Unicode Standard, and to make copies of this file in any form
21
- * for internal or external distribution as long as this notice
22
- * remains attached.
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
23
15
  */
24
16
 
25
- /*
26
- * Index into the table below with the first byte of a UTF-8 sequence to
27
- * get the number of trailing bytes that are supposed to follow it.
28
- */
29
- static const char trailingBytesForUTF8[256] = {
30
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
31
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
32
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
34
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
35
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
36
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
37
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
38
- };
39
17
 
40
- /* --------------------------------------------------------------------- */
18
+ #include <string.h>
19
+ #include "encoding_helpers.h"
41
20
 
42
- /*
43
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
44
- * This must be called with the length pre-determined by the first byte.
45
- * The length can be set by:
46
- * length = trailingBytesForUTF8[*source]+1;
47
- * and the sequence is illegal right away if there aren't that many bytes
48
- * available.
49
- * If presented with a length > 4, this returns 0. The Unicode
50
- * definition of UTF-8 goes up to 4-byte sequences.
51
- */
52
- static unsigned char isLegalUTF8(const unsigned char* source, int length) {
53
- unsigned char a;
54
- const unsigned char* srcptr = source + length;
55
- switch (length) {
56
- default: return 0;
57
- /* Everything else falls through when "true"... */
58
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
59
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
60
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
61
- switch (*source) {
62
- /* no fall-through in this inner switch */
63
- case 0xE0: if (a < 0xA0) return 0; break;
64
- case 0xF0: if (a < 0x90) return 0; break;
65
- case 0xF4: if (a > 0x8F) return 0; break;
66
- default: if (a < 0x80) return 0;
67
- }
68
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
69
- if (*source > 0xF4) return 0;
70
- }
71
- return 1;
21
+
22
+ static void
23
+ get_utf8_sequence (const char *utf8,
24
+ unsigned char *seq_length,
25
+ unsigned char *first_mask)
26
+ {
27
+ unsigned char c = *(const unsigned char *)utf8;
28
+ unsigned char m;
29
+ unsigned char n;
30
+
31
+ /*
32
+ * See the following[1] for a description of what the given multi-byte
33
+ * sequences will be based on the bits set of the first byte. We also need
34
+ * to mask the first byte based on that. All subsequent bytes are masked
35
+ * against 0x3F.
36
+ *
37
+ * [1] http://www.joelonsoftware.com/articles/Unicode.html
38
+ */
39
+
40
+ if ((c & 0x80) == 0) {
41
+ n = 1;
42
+ m = 0x7F;
43
+ } else if ((c & 0xE0) == 0xC0) {
44
+ n = 2;
45
+ m = 0x1F;
46
+ } else if ((c & 0xF0) == 0xE0) {
47
+ n = 3;
48
+ m = 0x0F;
49
+ } else if ((c & 0xF8) == 0xF0) {
50
+ n = 4;
51
+ m = 0x07;
52
+ } else if ((c & 0xFC) == 0xF8) {
53
+ n = 5;
54
+ m = 0x03;
55
+ } else if ((c & 0xFE) == 0xFC) {
56
+ n = 6;
57
+ m = 0x01;
58
+ } else {
59
+ n = 0;
60
+ m = 0;
61
+ }
62
+
63
+ *seq_length = n;
64
+ *first_mask = m;
72
65
  }
73
66
 
74
- result_t check_string(const unsigned char* string, const long length,
75
- const char check_utf8, const char check_null) {
76
- int position = 0;
77
- /* By default we go character by character. Will be different for checking
78
- * UTF-8 */
79
- int sequence_length = 1;
80
67
 
81
- if (!check_utf8 && !check_null) {
82
- return VALID;
83
- }
68
+ result_t
69
+ validate_utf8_encoding (const char *utf8,
70
+ size_t utf8_len,
71
+ int allow_null)
72
+ {
73
+ unsigned char first_mask;
74
+ unsigned char seq_length;
75
+ unsigned i;
76
+ unsigned j;
84
77
 
85
- while (position < length) {
86
- if (check_null && *(string + position) == 0) {
87
- return HAS_NULL;
88
- }
89
- if (check_utf8) {
90
- sequence_length = trailingBytesForUTF8[*(string + position)] + 1;
91
- if ((position + sequence_length) > length) {
92
- return NOT_UTF_8;
93
- }
94
- if (!isLegalUTF8(string + position, sequence_length)) {
95
- return NOT_UTF_8;
78
+ for (i = 0; i < utf8_len; i += seq_length) {
79
+ get_utf8_sequence(&utf8[i], &seq_length, &first_mask);
80
+ if (!seq_length) {
81
+ return INVALID_UTF8;
82
+ }
83
+ for (j = i + 1; j < (i + seq_length); j++) {
84
+ if ((utf8[j] & 0xC0) != 0x80) {
85
+ return INVALID_UTF8;
86
+ }
87
+ }
88
+ if (!allow_null) {
89
+ for (j = 0; j < seq_length; j++) {
90
+ if (((i + j) > utf8_len) || !utf8[i + j]) {
91
+ return HAS_NULL;
96
92
  }
97
- }
98
- position += sequence_length;
99
- }
93
+ }
94
+ }
95
+ }
100
96
 
101
- return VALID;
97
+ return VALID_UTF8;
102
98
  }
@@ -1,13 +1,50 @@
1
+ /*
2
+ * Copyright 2013 10gen Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+
1
18
  #ifndef ENCODING_HELPERS_H
2
19
  #define ENCODING_HELPERS_H
3
20
 
21
+ #include <unistd.h>
22
+
4
23
  typedef enum {
5
- VALID,
6
- NOT_UTF_8,
24
+ VALID_UTF8,
25
+ INVALID_UTF8,
7
26
  HAS_NULL
8
27
  } result_t;
9
28
 
10
- result_t check_string(const unsigned char* string, const long length,
11
- const char check_utf8, const char check_null);
29
+ /**
30
+ * validate_utf8_encoding:
31
+ * @utf8: A UTF-8 encoded string.
32
+ * @utf8_len: The length of @utf8 in bytes.
33
+ * @allow_null: 1 If '\0' is allowed within @utf8, excluding trailing \0.
34
+ *
35
+ * Validates that @utf8 is a valid UTF-8 string.
36
+ *
37
+ * If @allow_null is 1, then '\0' is allowed within @utf8_len bytes of @utf8.
38
+ * Generally, this is bad practice since the main point of UTF-8 strings is
39
+ * that they can be used with strlen() and friends. However, some languages
40
+ * such as Python can send UTF-8 encoded strings with NUL's in them.
41
+ *
42
+ * Returns: enum indicating validity of @utf8.
43
+ */
44
+ result_t
45
+ validate_utf8_encoding (const char *utf8,
46
+ size_t utf8_len,
47
+ int allow_null);
48
+
12
49
 
13
- #endif
50
+ #endif /* ENCODING_HELPERS_H */
data/ext/cbson/version.h CHANGED
@@ -1 +1,17 @@
1
- #define VERSION "1.8.6"
1
+ /*
2
+ * Copyright (C) 2013 10gen Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #define VERSION "1.9.0"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bson_ext
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.6
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tyler Brock
@@ -33,7 +33,7 @@ cert_chain:
33
33
  8v7zLF2XliYbfurYIwkcXs8yPn8ggApBIy9bX6VJxRs/l2+UvqzaHIFaFy/F8/GP
34
34
  RNTuXsVG5NDACo7Q
35
35
  -----END CERTIFICATE-----
36
- date: 2013-05-16 00:00:00.000000000 Z
36
+ date: 2013-06-11 00:00:00.000000000 Z
37
37
  dependencies:
38
38
  - !ruby/object:Gem::Dependency
39
39
  name: bson
@@ -41,14 +41,14 @@ dependencies:
41
41
  requirements:
42
42
  - - ~>
43
43
  - !ruby/object:Gem::Version
44
- version: 1.8.6
44
+ version: 1.9.0
45
45
  type: :runtime
46
46
  prerelease: false
47
47
  version_requirements: !ruby/object:Gem::Requirement
48
48
  requirements:
49
49
  - - ~>
50
50
  - !ruby/object:Gem::Version
51
- version: 1.8.6
51
+ version: 1.9.0
52
52
  description: C extensions to accelerate the Ruby BSON serialization. For more information
53
53
  about BSON, see http://bsonspec.org. For information about MongoDB, see http://www.mongodb.org.
54
54
  email: mongodb-dev@googlegroups.com
metadata.gz.sig CHANGED
Binary file