yarp 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -1
  3. data/CONTRIBUTING.md +7 -0
  4. data/Makefile +5 -1
  5. data/config.yml +308 -166
  6. data/docs/configuration.md +0 -1
  7. data/docs/encoding.md +5 -5
  8. data/docs/mapping.md +91 -91
  9. data/docs/serialization.md +25 -22
  10. data/ext/yarp/api_node.c +1210 -483
  11. data/ext/yarp/extension.c +22 -8
  12. data/ext/yarp/extension.h +2 -2
  13. data/include/yarp/ast.h +692 -183
  14. data/include/yarp/defines.h +2 -1
  15. data/include/yarp/diagnostic.h +200 -3
  16. data/include/yarp/enc/yp_encoding.h +10 -10
  17. data/include/yarp/node.h +0 -4
  18. data/include/yarp/parser.h +19 -19
  19. data/include/yarp/regexp.h +1 -1
  20. data/include/yarp/unescape.h +4 -4
  21. data/include/yarp/util/yp_buffer.h +3 -0
  22. data/include/yarp/util/yp_char.h +16 -16
  23. data/include/yarp/util/yp_constant_pool.h +12 -5
  24. data/include/yarp/util/yp_newline_list.h +5 -5
  25. data/include/yarp/util/yp_string.h +4 -4
  26. data/include/yarp/util/yp_string_list.h +0 -3
  27. data/include/yarp/util/yp_strpbrk.h +1 -1
  28. data/include/yarp/version.h +2 -2
  29. data/include/yarp.h +5 -4
  30. data/lib/yarp/desugar_visitor.rb +59 -122
  31. data/lib/yarp/mutation_visitor.rb +22 -12
  32. data/lib/yarp/node.rb +3081 -501
  33. data/lib/yarp/parse_result/comments.rb +172 -0
  34. data/lib/yarp/parse_result/newlines.rb +60 -0
  35. data/lib/yarp/pattern.rb +239 -0
  36. data/lib/yarp/serialize.rb +152 -129
  37. data/lib/yarp.rb +109 -49
  38. data/src/diagnostic.c +254 -2
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1871 -1466
  47. data/src/prettyprint.c +463 -230
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +352 -184
  50. data/src/unescape.c +152 -122
  51. data/src/util/yp_buffer.c +7 -2
  52. data/src/util/yp_char.c +35 -40
  53. data/src/util/yp_constant_pool.c +45 -12
  54. data/src/util/yp_memchr.c +1 -1
  55. data/src/util/yp_newline_list.c +10 -5
  56. data/src/util/yp_string.c +22 -20
  57. data/src/util/yp_string_list.c +4 -7
  58. data/src/util/yp_strncasecmp.c +3 -6
  59. data/src/util/yp_strpbrk.c +8 -8
  60. data/src/yarp.c +1288 -1021
  61. data/yarp.gemspec +4 -1
  62. metadata +6 -3
data/src/util/yp_char.c CHANGED
@@ -13,8 +13,8 @@
13
13
  #define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
14
14
  #define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
15
15
 
16
- static const unsigned char yp_char_table[256] = {
17
- //0 1 2 3 4 5 6 7 8 9 A B C D E F
16
+ static const uint8_t yp_byte_table[256] = {
17
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
18
18
  0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
19
19
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
20
20
  3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
@@ -33,7 +33,7 @@ static const unsigned char yp_char_table[256] = {
33
33
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
34
34
  };
35
35
 
36
- static const unsigned char yp_number_table[256] = {
36
+ static const uint8_t yp_number_table[256] = {
37
37
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
38
38
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
39
39
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
@@ -54,20 +54,20 @@ static const unsigned char yp_number_table[256] = {
54
54
  };
55
55
 
56
56
  static inline size_t
57
- yp_strspn_char_kind(const char *string, ptrdiff_t length, unsigned char kind) {
57
+ yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
58
58
  if (length <= 0) return 0;
59
59
 
60
60
  size_t size = 0;
61
61
  size_t maximum = (size_t) length;
62
62
 
63
- while (size < maximum && (yp_char_table[(unsigned char) string[size]] & kind)) size++;
63
+ while (size < maximum && (yp_byte_table[string[size]] & kind)) size++;
64
64
  return size;
65
65
  }
66
66
 
67
67
  // Returns the number of characters at the start of the string that are
68
68
  // whitespace. Disallows searching past the given maximum number of characters.
69
69
  size_t
70
- yp_strspn_whitespace(const char *string, ptrdiff_t length) {
70
+ yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
71
71
  return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
72
72
  }
73
73
 
@@ -75,20 +75,15 @@ yp_strspn_whitespace(const char *string, ptrdiff_t length) {
75
75
  // whitespace while also tracking the location of each newline. Disallows
76
76
  // searching past the given maximum number of characters.
77
77
  size_t
78
- yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline) {
78
+ yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) {
79
79
  if (length <= 0) return 0;
80
80
 
81
81
  size_t size = 0;
82
82
  size_t maximum = (size_t) length;
83
83
 
84
- while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) {
84
+ while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
85
85
  if (string[size] == '\n') {
86
- if (stop_at_newline) {
87
- return size + 1;
88
- }
89
- else {
90
- yp_newline_list_append(newline_list, string + size);
91
- }
86
+ yp_newline_list_append(newline_list, string + size);
92
87
  }
93
88
 
94
89
  size++;
@@ -100,42 +95,42 @@ yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_l
100
95
  // Returns the number of characters at the start of the string that are inline
101
96
  // whitespace. Disallows searching past the given maximum number of characters.
102
97
  size_t
103
- yp_strspn_inline_whitespace(const char *string, ptrdiff_t length) {
98
+ yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
104
99
  return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
105
100
  }
106
101
 
107
102
  // Returns the number of characters at the start of the string that are regexp
108
103
  // options. Disallows searching past the given maximum number of characters.
109
104
  size_t
110
- yp_strspn_regexp_option(const char *string, ptrdiff_t length) {
105
+ yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
111
106
  return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
112
107
  }
113
108
 
114
109
  static inline bool
115
- yp_char_is_char_kind(const char c, unsigned char kind) {
116
- return (yp_char_table[(unsigned char) c] & kind) != 0;
110
+ yp_char_is_char_kind(const uint8_t b, uint8_t kind) {
111
+ return (yp_byte_table[b] & kind) != 0;
117
112
  }
118
113
 
119
114
  // Returns true if the given character is a whitespace character.
120
115
  bool
121
- yp_char_is_whitespace(const char c) {
122
- return yp_char_is_char_kind(c, YP_CHAR_BIT_WHITESPACE);
116
+ yp_char_is_whitespace(const uint8_t b) {
117
+ return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE);
123
118
  }
124
119
 
125
120
  // Returns true if the given character is an inline whitespace character.
126
121
  bool
127
- yp_char_is_inline_whitespace(const char c) {
128
- return yp_char_is_char_kind(c, YP_CHAR_BIT_INLINE_WHITESPACE);
122
+ yp_char_is_inline_whitespace(const uint8_t b) {
123
+ return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
129
124
  }
130
125
 
131
126
  static inline size_t
132
- yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind) {
127
+ yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
133
128
  if (length <= 0) return 0;
134
129
 
135
130
  size_t size = 0;
136
131
  size_t maximum = (size_t) length;
137
132
 
138
- while (size < maximum && (yp_number_table[(unsigned char) string[size]] & kind)) size++;
133
+ while (size < maximum && (yp_number_table[string[size]] & kind)) size++;
139
134
  return size;
140
135
  }
141
136
 
@@ -143,7 +138,7 @@ yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind)
143
138
  // digits or underscores. Disallows searching past the given maximum number of
144
139
  // characters.
145
140
  size_t
146
- yp_strspn_binary_number(const char *string, ptrdiff_t length) {
141
+ yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length) {
147
142
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
148
143
  }
149
144
 
@@ -151,14 +146,14 @@ yp_strspn_binary_number(const char *string, ptrdiff_t length) {
151
146
  // digits or underscores. Disallows searching past the given maximum number of
152
147
  // characters.
153
148
  size_t
154
- yp_strspn_octal_number(const char *string, ptrdiff_t length) {
149
+ yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length) {
155
150
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
156
151
  }
157
152
 
158
153
  // Returns the number of characters at the start of the string that are decimal
159
154
  // digits. Disallows searching past the given maximum number of characters.
160
155
  size_t
161
- yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
156
+ yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
162
157
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
163
158
  }
164
159
 
@@ -166,7 +161,7 @@ yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
166
161
  // digits or underscores. Disallows searching past the given maximum number of
167
162
  // characters.
168
163
  size_t
169
- yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
164
+ yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length) {
170
165
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
171
166
  }
172
167
 
@@ -174,7 +169,7 @@ yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
174
169
  // hexadecimal digits. Disallows searching past the given maximum number of
175
170
  // characters.
176
171
  size_t
177
- yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
172
+ yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
178
173
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
179
174
  }
180
175
 
@@ -182,37 +177,37 @@ yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
182
177
  // hexadecimal digits or underscores. Disallows searching past the given maximum
183
178
  // number of characters.
184
179
  size_t
185
- yp_strspn_hexadecimal_number(const char *string, ptrdiff_t length) {
180
+ yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length) {
186
181
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
187
182
  }
188
183
 
189
184
  static inline bool
190
- yp_char_is_number_kind(const char c, unsigned char kind) {
191
- return (yp_number_table[(unsigned char) c] & kind) != 0;
185
+ yp_char_is_number_kind(const uint8_t b, uint8_t kind) {
186
+ return (yp_number_table[b] & kind) != 0;
192
187
  }
193
188
 
194
189
  // Returns true if the given character is a binary digit.
195
190
  bool
196
- yp_char_is_binary_digit(const char c) {
197
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_BINARY_DIGIT);
191
+ yp_char_is_binary_digit(const uint8_t b) {
192
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT);
198
193
  }
199
194
 
200
195
  // Returns true if the given character is an octal digit.
201
196
  bool
202
- yp_char_is_octal_digit(const char c) {
203
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_OCTAL_DIGIT);
197
+ yp_char_is_octal_digit(const uint8_t b) {
198
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT);
204
199
  }
205
200
 
206
201
  // Returns true if the given character is a decimal digit.
207
202
  bool
208
- yp_char_is_decimal_digit(const char c) {
209
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_DECIMAL_DIGIT);
203
+ yp_char_is_decimal_digit(const uint8_t b) {
204
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT);
210
205
  }
211
206
 
212
207
  // Returns true if the given character is a hexadecimal digit.
213
208
  bool
214
- yp_char_is_hexadecimal_digit(const char c) {
215
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
209
+ yp_char_is_hexadecimal_digit(const uint8_t b) {
210
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
216
211
  }
217
212
 
218
213
  #undef YP_CHAR_BIT_WHITESPACE
@@ -48,12 +48,12 @@ yp_constant_id_list_free(yp_constant_id_list_t *list) {
48
48
  // A relatively simple hash function (djb2) that is used to hash strings. We are
49
49
  // optimizing here for simplicity and speed.
50
50
  static inline size_t
51
- yp_constant_pool_hash(const char *start, size_t length) {
51
+ yp_constant_pool_hash(const uint8_t *start, size_t length) {
52
52
  // This is a prime number used as the initial value for the hash function.
53
53
  size_t value = 5381;
54
54
 
55
55
  for (size_t index = 0; index < length; index++) {
56
- value = ((value << 5) + value) + ((unsigned char) start[index]);
56
+ value = ((value << 5) + value) + start[index];
57
57
  }
58
58
 
59
59
  return value;
@@ -106,12 +106,11 @@ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
106
106
  return true;
107
107
  }
108
108
 
109
- // Insert a constant into a constant pool. Returns the id of the constant, or 0
110
- // if any potential calls to resize fail.
111
- yp_constant_id_t
112
- yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t length) {
109
+ // Insert a constant into a constant pool and return its index in the pool.
110
+ static size_t
111
+ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
113
112
  if (pool->size >= (pool->capacity / 4 * 3)) {
114
- if (!yp_constant_pool_resize(pool)) return 0;
113
+ if (!yp_constant_pool_resize(pool)) return pool->capacity;
115
114
  }
116
115
 
117
116
  size_t hash = yp_constant_pool_hash(start, length);
@@ -122,26 +121,60 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t leng
122
121
  // If there is a collision, then we need to check if the content is the
123
122
  // same as the content we are trying to insert. If it is, then we can
124
123
  // return the id of the existing constant.
125
- if ((constant->length == length) && strncmp(constant->start, start, length) == 0) {
126
- return pool->constants[index].id;
124
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
125
+ return index;
127
126
  }
128
127
 
129
128
  index = (index + 1) % pool->capacity;
130
129
  }
131
130
 
132
- yp_constant_id_t id = (yp_constant_id_t)++pool->size;
131
+ pool->size++;
132
+ assert(pool->size < ((size_t) (1 << 31)));
133
+
133
134
  pool->constants[index] = (yp_constant_t) {
134
- .id = id,
135
+ .id = (unsigned int) (pool->size & 0x7FFFFFFF),
135
136
  .start = start,
136
137
  .length = length,
137
138
  .hash = hash
138
139
  };
139
140
 
140
- return id;
141
+ return index;
142
+ }
143
+
144
+ // Insert a constant into a constant pool. Returns the id of the constant, or 0
145
+ // if any potential calls to resize fail.
146
+ yp_constant_id_t
147
+ yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
148
+ size_t index = yp_constant_pool_insert(pool, start, length);
149
+ return index == pool->capacity ? 0 : ((yp_constant_id_t) pool->constants[index].id);
150
+ }
151
+
152
+ // Insert a constant into a constant pool from memory that is now owned by the
153
+ // constant pool. Returns the id of the constant, or 0 if any potential calls to
154
+ // resize fail.
155
+ yp_constant_id_t
156
+ yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
157
+ size_t index = yp_constant_pool_insert(pool, start, length);
158
+ if (index == pool->capacity) return 0;
159
+
160
+ yp_constant_t *constant = &pool->constants[index];
161
+ constant->owned = true;
162
+ return ((yp_constant_id_t) constant->id);
141
163
  }
142
164
 
143
165
  // Free the memory associated with a constant pool.
144
166
  void
145
167
  yp_constant_pool_free(yp_constant_pool_t *pool) {
168
+ // For each constant in the current constant pool, free the contents if the
169
+ // contents are owned.
170
+ for (uint32_t index = 0; index < pool->capacity; index++) {
171
+ yp_constant_t *constant = &pool->constants[index];
172
+
173
+ // If an id is set on this constant, then we know we have content here.
174
+ if (constant->id != 0 && constant->owned) {
175
+ free((void *) constant->start);
176
+ }
177
+ }
178
+
146
179
  free(pool->constants);
147
180
  }
data/src/util/yp_memchr.c CHANGED
@@ -8,7 +8,7 @@
8
8
  void *
9
9
  yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
10
10
  if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
11
- const char *source = (const char *) memory;
11
+ const uint8_t *source = (const uint8_t *) memory;
12
12
  size_t index = 0;
13
13
 
14
14
  while (index < number) {
@@ -3,7 +3,7 @@
3
3
  // Initialize a new newline list with the given capacity. Returns true if the
4
4
  // allocation of the offsets succeeds, otherwise returns false.
5
5
  bool
6
- yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity) {
6
+ yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity) {
7
7
  list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
8
8
  if (list->offsets == NULL) return false;
9
9
 
@@ -23,16 +23,21 @@ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity
23
23
  // Append a new offset to the newline list. Returns true if the reallocation of
24
24
  // the offsets succeeds (if one was necessary), otherwise returns false.
25
25
  bool
26
- yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
26
+ yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
27
27
  if (list->size == list->capacity) {
28
+ size_t *original_offsets = list->offsets;
29
+
28
30
  list->capacity = (list->capacity * 3) / 2;
29
- list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t));
31
+ list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
32
+ memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
33
+ free(original_offsets);
30
34
  if (list->offsets == NULL) return false;
31
35
  }
32
36
 
33
37
  assert(*cursor == '\n');
34
38
  assert(cursor >= list->start);
35
39
  size_t newline_offset = (size_t) (cursor - list->start + 1);
40
+
36
41
  assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
37
42
  list->offsets[list->size++] = newline_offset;
38
43
 
@@ -41,7 +46,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
41
46
 
42
47
  // Conditionally append a new offset to the newline list, if the value passed in is a newline.
43
48
  bool
44
- yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
49
+ yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor) {
45
50
  if (*cursor != '\n') {
46
51
  return true;
47
52
  }
@@ -105,7 +110,7 @@ yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
105
110
  // list, the line and column of the closest offset less than the given offset
106
111
  // are returned.
107
112
  yp_line_column_t
108
- yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor) {
113
+ yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) {
109
114
  assert(cursor >= list->start);
110
115
  size_t offset = (size_t) (cursor - list->start);
111
116
  yp_line_column_t result;
data/src/util/yp_string.c CHANGED
@@ -12,18 +12,19 @@
12
12
 
13
13
  // Initialize a shared string that is based on initial input.
14
14
  void
15
- yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
15
+ yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end) {
16
16
  assert(start <= end);
17
+
17
18
  *string = (yp_string_t) {
18
19
  .type = YP_STRING_SHARED,
19
- .source = (char*) start,
20
+ .source = start,
20
21
  .length = (size_t) (end - start)
21
22
  };
22
23
  }
23
24
 
24
25
  // Initialize an owned string that is responsible for freeing allocated memory.
25
26
  void
26
- yp_string_owned_init(yp_string_t *string, char *source, size_t length) {
27
+ yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) {
27
28
  *string = (yp_string_t) {
28
29
  .type = YP_STRING_OWNED,
29
30
  .source = source,
@@ -36,13 +37,13 @@ void
36
37
  yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
37
38
  *string = (yp_string_t) {
38
39
  .type = YP_STRING_CONSTANT,
39
- .source = (char*) source,
40
+ .source = (const uint8_t *) source,
40
41
  .length = length
41
42
  };
42
43
  }
43
44
 
44
45
  static void
45
- yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) {
46
+ yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t length) {
46
47
  *string = (yp_string_t) {
47
48
  .type = YP_STRING_MAPPED,
48
49
  .source = source,
@@ -67,13 +68,13 @@ yp_string_ensure_owned(yp_string_t *string) {
67
68
  if (string->type == YP_STRING_OWNED) return;
68
69
 
69
70
  size_t length = yp_string_length(string);
70
- const char *source = yp_string_source(string);
71
+ const uint8_t *source = yp_string_source(string);
71
72
 
72
- char *memory = malloc(length);
73
+ uint8_t *memory = malloc(length);
73
74
  if (!memory) return;
74
75
 
75
76
  yp_string_owned_init(string, memory, length);
76
- memcpy(string->source, source, length);
77
+ memcpy((void *) string->source, source, length);
77
78
  }
78
79
 
79
80
  // Returns the length associated with the string.
@@ -83,7 +84,7 @@ yp_string_length(const yp_string_t *string) {
83
84
  }
84
85
 
85
86
  // Returns the start pointer associated with the string.
86
- YP_EXPORTED_FUNCTION const char *
87
+ YP_EXPORTED_FUNCTION const uint8_t *
87
88
  yp_string_source(const yp_string_t *string) {
88
89
  return string->source;
89
90
  }
@@ -91,15 +92,16 @@ yp_string_source(const yp_string_t *string) {
91
92
  // Free the associated memory of the given string.
92
93
  YP_EXPORTED_FUNCTION void
93
94
  yp_string_free(yp_string_t *string) {
95
+ void *memory = (void *) string->source;
96
+
94
97
  if (string->type == YP_STRING_OWNED) {
95
- free(string->source);
98
+ free(memory);
96
99
  } else if (string->type == YP_STRING_MAPPED && string->length) {
97
- void *memory = (void *) string->source;
98
- #if defined(_WIN32)
100
+ #if defined(_WIN32)
99
101
  UnmapViewOfFile(memory);
100
- #else
102
+ #else
101
103
  munmap(memory, string->length);
102
- #endif
104
+ #endif
103
105
  }
104
106
  }
105
107
 
@@ -126,8 +128,8 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
126
128
  // the source to a constant empty string and return.
127
129
  if (file_size == 0) {
128
130
  CloseHandle(file);
129
- char empty_string[] = "";
130
- yp_string_mapped_init_internal(string, empty_string, 0);
131
+ uint8_t empty[] = "";
132
+ yp_string_mapped_init_internal(string, empty, 0);
131
133
  return true;
132
134
  }
133
135
 
@@ -140,7 +142,7 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
140
142
  }
141
143
 
142
144
  // Map the file into memory.
143
- char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
145
+ uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
144
146
  CloseHandle(mapping);
145
147
  CloseHandle(file);
146
148
 
@@ -169,12 +171,12 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
169
171
 
170
172
  // mmap the file descriptor to virtually get the contents
171
173
  size_t size = (size_t) sb.st_size;
172
- char *source = NULL;
174
+ uint8_t *source = NULL;
173
175
 
174
176
  if (size == 0) {
175
177
  close(fd);
176
- char empty_string[] = "";
177
- yp_string_mapped_init_internal(string, empty_string, 0);
178
+ uint8_t empty[] = "";
179
+ yp_string_mapped_init_internal(string, empty, 0);
178
180
  return true;
179
181
  }
180
182
 
@@ -1,11 +1,5 @@
1
1
  #include "yarp/util/yp_string_list.h"
2
2
 
3
- // Allocate a new yp_string_list_t.
4
- yp_string_list_t *
5
- yp_string_list_alloc(void) {
6
- return (yp_string_list_t *) malloc(sizeof(yp_string_list_t));
7
- }
8
-
9
3
  // Initialize a yp_string_list_t with its default values.
10
4
  void
11
5
  yp_string_list_init(yp_string_list_t *string_list) {
@@ -18,8 +12,11 @@ yp_string_list_init(yp_string_list_t *string_list) {
18
12
  void
19
13
  yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
20
14
  if (string_list->length + 1 > string_list->capacity) {
15
+ yp_string_t *original_string = string_list->strings;
21
16
  string_list->capacity *= 2;
22
- string_list->strings = (yp_string_t *) realloc(string_list->strings, string_list->capacity * sizeof(yp_string_t));
17
+ string_list->strings = (yp_string_t *) malloc(string_list->capacity * sizeof(yp_string_t));
18
+ memcpy(string_list->strings, original_string, (string_list->length) * sizeof(yp_string_t));
19
+ free(original_string);
23
20
  }
24
21
 
25
22
  string_list->strings[string_list->length++] = *string;
@@ -1,18 +1,15 @@
1
1
  #include <ctype.h>
2
2
  #include <stddef.h>
3
+ #include <stdint.h>
3
4
 
4
5
  int
5
- yp_strncasecmp(const char *string1, const char *string2, size_t length) {
6
+ yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
6
7
  size_t offset = 0;
7
8
  int difference = 0;
8
9
 
9
10
  while (offset < length && string1[offset] != '\0') {
10
11
  if (string2[offset] == '\0') return string1[offset];
11
-
12
- unsigned char left = (unsigned char) string1[offset];
13
- unsigned char right = (unsigned char) string2[offset];
14
-
15
- if ((difference = tolower(left) - tolower(right)) != 0) return difference;
12
+ if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
16
13
  offset++;
17
14
  }
18
15
 
@@ -1,12 +1,12 @@
1
1
  #include "yarp/util/yp_strpbrk.h"
2
2
 
3
3
  // This is the slow path that does care about the encoding.
4
- static inline const char *
5
- yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
4
+ static inline const uint8_t *
5
+ yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
6
6
  size_t index = 0;
7
7
 
8
8
  while (index < maximum) {
9
- if (strchr(charset, source[index]) != NULL) {
9
+ if (strchr((const char *) charset, source[index]) != NULL) {
10
10
  return source + index;
11
11
  }
12
12
 
@@ -22,12 +22,12 @@ yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *chars
22
22
  }
23
23
 
24
24
  // This is the fast path that does not care about the encoding.
25
- static inline const char *
26
- yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
25
+ static inline const uint8_t *
26
+ yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
27
27
  size_t index = 0;
28
28
 
29
29
  while (index < maximum) {
30
- if (strchr(charset, source[index]) != NULL) {
30
+ if (strchr((const char *) charset, source[index]) != NULL) {
31
31
  return source + index;
32
32
  }
33
33
 
@@ -54,8 +54,8 @@ yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum)
54
54
  // characters that are trailing bytes of multi-byte characters. For example, in
55
55
  // Shift-JIS, the backslash character can be a trailing byte. In that case we
56
56
  // need to take a slower path and iterate one multi-byte character at a time.
57
- const char *
58
- yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
57
+ const uint8_t *
58
+ yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
59
59
  if (length <= 0) {
60
60
  return NULL;
61
61
  } else if (parser->encoding_changed && parser->encoding.multibyte) {