yarp 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/util/yp_buffer.c CHANGED
@@ -63,8 +63,13 @@ yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length) {
63
63
  // Append a string to the buffer.
64
64
  void
65
65
  yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length) {
66
- const void *source = value;
67
- yp_buffer_append(buffer, source, length);
66
+ yp_buffer_append(buffer, value, length);
67
+ }
68
+
69
+ // Append a list of bytes to the buffer.
70
+ void
71
+ yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length) {
72
+ yp_buffer_append(buffer, (const char *) value, length);
68
73
  }
69
74
 
70
75
  // Append a single byte to the buffer.
data/src/util/yp_char.c CHANGED
@@ -13,8 +13,8 @@
13
13
  #define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
14
14
  #define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
15
15
 
16
- static const unsigned char yp_char_table[256] = {
17
- //0 1 2 3 4 5 6 7 8 9 A B C D E F
16
+ static const uint8_t yp_byte_table[256] = {
17
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
18
18
  0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
19
19
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
20
20
  3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
@@ -33,7 +33,7 @@ static const unsigned char yp_char_table[256] = {
33
33
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
34
34
  };
35
35
 
36
- static const unsigned char yp_number_table[256] = {
36
+ static const uint8_t yp_number_table[256] = {
37
37
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
38
38
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
39
39
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
@@ -54,20 +54,20 @@ static const unsigned char yp_number_table[256] = {
54
54
  };
55
55
 
56
56
  static inline size_t
57
- yp_strspn_char_kind(const char *string, ptrdiff_t length, unsigned char kind) {
57
+ yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
58
58
  if (length <= 0) return 0;
59
59
 
60
60
  size_t size = 0;
61
61
  size_t maximum = (size_t) length;
62
62
 
63
- while (size < maximum && (yp_char_table[(unsigned char) string[size]] & kind)) size++;
63
+ while (size < maximum && (yp_byte_table[string[size]] & kind)) size++;
64
64
  return size;
65
65
  }
66
66
 
67
67
  // Returns the number of characters at the start of the string that are
68
68
  // whitespace. Disallows searching past the given maximum number of characters.
69
69
  size_t
70
- yp_strspn_whitespace(const char *string, ptrdiff_t length) {
70
+ yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
71
71
  return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
72
72
  }
73
73
 
@@ -75,13 +75,13 @@ yp_strspn_whitespace(const char *string, ptrdiff_t length) {
75
75
  // whitespace while also tracking the location of each newline. Disallows
76
76
  // searching past the given maximum number of characters.
77
77
  size_t
78
- yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline) {
78
+ yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list, bool stop_at_newline) {
79
79
  if (length <= 0) return 0;
80
80
 
81
81
  size_t size = 0;
82
82
  size_t maximum = (size_t) length;
83
83
 
84
- while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) {
84
+ while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
85
85
  if (string[size] == '\n') {
86
86
  if (stop_at_newline) {
87
87
  return size + 1;
@@ -100,42 +100,42 @@ yp_strspn_whitespace_newlines(const char *string, ptrdiff_t length, yp_newline_l
100
100
  // Returns the number of characters at the start of the string that are inline
101
101
  // whitespace. Disallows searching past the given maximum number of characters.
102
102
  size_t
103
- yp_strspn_inline_whitespace(const char *string, ptrdiff_t length) {
103
+ yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
104
104
  return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
105
105
  }
106
106
 
107
107
  // Returns the number of characters at the start of the string that are regexp
108
108
  // options. Disallows searching past the given maximum number of characters.
109
109
  size_t
110
- yp_strspn_regexp_option(const char *string, ptrdiff_t length) {
110
+ yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
111
111
  return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
112
112
  }
113
113
 
114
114
  static inline bool
115
- yp_char_is_char_kind(const char c, unsigned char kind) {
116
- return (yp_char_table[(unsigned char) c] & kind) != 0;
115
+ yp_char_is_char_kind(const uint8_t b, uint8_t kind) {
116
+ return (yp_byte_table[b] & kind) != 0;
117
117
  }
118
118
 
119
119
  // Returns true if the given character is a whitespace character.
120
120
  bool
121
- yp_char_is_whitespace(const char c) {
122
- return yp_char_is_char_kind(c, YP_CHAR_BIT_WHITESPACE);
121
+ yp_char_is_whitespace(const uint8_t b) {
122
+ return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE);
123
123
  }
124
124
 
125
125
  // Returns true if the given character is an inline whitespace character.
126
126
  bool
127
- yp_char_is_inline_whitespace(const char c) {
128
- return yp_char_is_char_kind(c, YP_CHAR_BIT_INLINE_WHITESPACE);
127
+ yp_char_is_inline_whitespace(const uint8_t b) {
128
+ return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
129
129
  }
130
130
 
131
131
  static inline size_t
132
- yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind) {
132
+ yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
133
133
  if (length <= 0) return 0;
134
134
 
135
135
  size_t size = 0;
136
136
  size_t maximum = (size_t) length;
137
137
 
138
- while (size < maximum && (yp_number_table[(unsigned char) string[size]] & kind)) size++;
138
+ while (size < maximum && (yp_number_table[string[size]] & kind)) size++;
139
139
  return size;
140
140
  }
141
141
 
@@ -143,7 +143,7 @@ yp_strspn_number_kind(const char *string, ptrdiff_t length, unsigned char kind)
143
143
  // digits or underscores. Disallows searching past the given maximum number of
144
144
  // characters.
145
145
  size_t
146
- yp_strspn_binary_number(const char *string, ptrdiff_t length) {
146
+ yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length) {
147
147
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_BINARY_NUMBER);
148
148
  }
149
149
 
@@ -151,14 +151,14 @@ yp_strspn_binary_number(const char *string, ptrdiff_t length) {
151
151
  // digits or underscores. Disallows searching past the given maximum number of
152
152
  // characters.
153
153
  size_t
154
- yp_strspn_octal_number(const char *string, ptrdiff_t length) {
154
+ yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length) {
155
155
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_OCTAL_NUMBER);
156
156
  }
157
157
 
158
158
  // Returns the number of characters at the start of the string that are decimal
159
159
  // digits. Disallows searching past the given maximum number of characters.
160
160
  size_t
161
- yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
161
+ yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
162
162
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
163
163
  }
164
164
 
@@ -166,7 +166,7 @@ yp_strspn_decimal_digit(const char *string, ptrdiff_t length) {
166
166
  // digits or underscores. Disallows searching past the given maximum number of
167
167
  // characters.
168
168
  size_t
169
- yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
169
+ yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length) {
170
170
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_NUMBER);
171
171
  }
172
172
 
@@ -174,7 +174,7 @@ yp_strspn_decimal_number(const char *string, ptrdiff_t length) {
174
174
  // hexadecimal digits. Disallows searching past the given maximum number of
175
175
  // characters.
176
176
  size_t
177
- yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
177
+ yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
178
178
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
179
179
  }
180
180
 
@@ -182,37 +182,37 @@ yp_strspn_hexadecimal_digit(const char *string, ptrdiff_t length) {
182
182
  // hexadecimal digits or underscores. Disallows searching past the given maximum
183
183
  // number of characters.
184
184
  size_t
185
- yp_strspn_hexadecimal_number(const char *string, ptrdiff_t length) {
185
+ yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length) {
186
186
  return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
187
187
  }
188
188
 
189
189
  static inline bool
190
- yp_char_is_number_kind(const char c, unsigned char kind) {
191
- return (yp_number_table[(unsigned char) c] & kind) != 0;
190
+ yp_char_is_number_kind(const uint8_t b, uint8_t kind) {
191
+ return (yp_number_table[b] & kind) != 0;
192
192
  }
193
193
 
194
194
  // Returns true if the given character is a binary digit.
195
195
  bool
196
- yp_char_is_binary_digit(const char c) {
197
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_BINARY_DIGIT);
196
+ yp_char_is_binary_digit(const uint8_t b) {
197
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT);
198
198
  }
199
199
 
200
200
  // Returns true if the given character is an octal digit.
201
201
  bool
202
- yp_char_is_octal_digit(const char c) {
203
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_OCTAL_DIGIT);
202
+ yp_char_is_octal_digit(const uint8_t b) {
203
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT);
204
204
  }
205
205
 
206
206
  // Returns true if the given character is a decimal digit.
207
207
  bool
208
- yp_char_is_decimal_digit(const char c) {
209
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_DECIMAL_DIGIT);
208
+ yp_char_is_decimal_digit(const uint8_t b) {
209
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT);
210
210
  }
211
211
 
212
212
  // Returns true if the given character is a hexadecimal digit.
213
213
  bool
214
- yp_char_is_hexadecimal_digit(const char c) {
215
- return yp_char_is_number_kind(c, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
214
+ yp_char_is_hexadecimal_digit(const uint8_t b) {
215
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
216
216
  }
217
217
 
218
218
  #undef YP_CHAR_BIT_WHITESPACE
@@ -48,12 +48,12 @@ yp_constant_id_list_free(yp_constant_id_list_t *list) {
48
48
  // A relatively simple hash function (djb2) that is used to hash strings. We are
49
49
  // optimizing here for simplicity and speed.
50
50
  static inline size_t
51
- yp_constant_pool_hash(const char *start, size_t length) {
51
+ yp_constant_pool_hash(const uint8_t *start, size_t length) {
52
52
  // This is a prime number used as the initial value for the hash function.
53
53
  size_t value = 5381;
54
54
 
55
55
  for (size_t index = 0; index < length; index++) {
56
- value = ((value << 5) + value) + ((unsigned char) start[index]);
56
+ value = ((value << 5) + value) + start[index];
57
57
  }
58
58
 
59
59
  return value;
@@ -109,7 +109,7 @@ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
109
109
  // Insert a constant into a constant pool. Returns the id of the constant, or 0
110
110
  // if any potential calls to resize fail.
111
111
  yp_constant_id_t
112
- yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t length) {
112
+ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
113
113
  if (pool->size >= (pool->capacity / 4 * 3)) {
114
114
  if (!yp_constant_pool_resize(pool)) return 0;
115
115
  }
@@ -122,7 +122,7 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const char *start, size_t leng
122
122
  // If there is a collision, then we need to check if the content is the
123
123
  // same as the content we are trying to insert. If it is, then we can
124
124
  // return the id of the existing constant.
125
- if ((constant->length == length) && strncmp(constant->start, start, length) == 0) {
125
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
126
126
  return pool->constants[index].id;
127
127
  }
128
128
 
data/src/util/yp_memchr.c CHANGED
@@ -8,7 +8,7 @@
8
8
  void *
9
9
  yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
10
10
  if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
11
- const char *source = (const char *) memory;
11
+ const uint8_t *source = (const uint8_t *) memory;
12
12
  size_t index = 0;
13
13
 
14
14
  while (index < number) {
@@ -3,7 +3,7 @@
3
3
  // Initialize a new newline list with the given capacity. Returns true if the
4
4
  // allocation of the offsets succeeds, otherwise returns false.
5
5
  bool
6
- yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity) {
6
+ yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity) {
7
7
  list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
8
8
  if (list->offsets == NULL) return false;
9
9
 
@@ -23,21 +23,32 @@ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity
23
23
  // Append a new offset to the newline list. Returns true if the reallocation of
24
24
  // the offsets succeeds (if one was necessary), otherwise returns false.
25
25
  bool
26
- yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
26
+ yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
27
27
  if (list->size == list->capacity) {
28
28
  list->capacity = (list->capacity * 3) / 2;
29
29
  list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t));
30
30
  if (list->offsets == NULL) return false;
31
31
  }
32
32
 
33
+ assert(*cursor == '\n');
33
34
  assert(cursor >= list->start);
34
35
  size_t newline_offset = (size_t) (cursor - list->start + 1);
36
+
35
37
  assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
36
38
  list->offsets[list->size++] = newline_offset;
37
39
 
38
40
  return true;
39
41
  }
40
42
 
43
+ // Conditionally append a new offset to the newline list, if the value passed in is a newline.
44
+ bool
45
+ yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor) {
46
+ if (*cursor != '\n') {
47
+ return true;
48
+ }
49
+ return yp_newline_list_append(list, cursor);
50
+ }
51
+
41
52
  // Returns the line and column of the given offset, assuming we don't have any
42
53
  // information about the previous index that we found.
43
54
  static yp_line_column_t
@@ -95,7 +106,7 @@ yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
95
106
  // list, the line and column of the closest offset less than the given offset
96
107
  // are returned.
97
108
  yp_line_column_t
98
- yp_newline_list_line_column(yp_newline_list_t *list, const char *cursor) {
109
+ yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) {
99
110
  assert(cursor >= list->start);
100
111
  size_t offset = (size_t) (cursor - list->start);
101
112
  yp_line_column_t result;
data/src/util/yp_string.c CHANGED
@@ -12,18 +12,19 @@
12
12
 
13
13
  // Initialize a shared string that is based on initial input.
14
14
  void
15
- yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
15
+ yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end) {
16
16
  assert(start <= end);
17
+
17
18
  *string = (yp_string_t) {
18
19
  .type = YP_STRING_SHARED,
19
- .source = (char*) start,
20
+ .source = start,
20
21
  .length = (size_t) (end - start)
21
22
  };
22
23
  }
23
24
 
24
25
  // Initialize an owned string that is responsible for freeing allocated memory.
25
26
  void
26
- yp_string_owned_init(yp_string_t *string, char *source, size_t length) {
27
+ yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) {
27
28
  *string = (yp_string_t) {
28
29
  .type = YP_STRING_OWNED,
29
30
  .source = source,
@@ -36,13 +37,13 @@ void
36
37
  yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
37
38
  *string = (yp_string_t) {
38
39
  .type = YP_STRING_CONSTANT,
39
- .source = (char*) source,
40
+ .source = (const uint8_t *) source,
40
41
  .length = length
41
42
  };
42
43
  }
43
44
 
44
45
  static void
45
- yp_string_mapped_init_internal(yp_string_t *string, char *source, size_t length) {
46
+ yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t length) {
46
47
  *string = (yp_string_t) {
47
48
  .type = YP_STRING_MAPPED,
48
49
  .source = source,
@@ -67,13 +68,13 @@ yp_string_ensure_owned(yp_string_t *string) {
67
68
  if (string->type == YP_STRING_OWNED) return;
68
69
 
69
70
  size_t length = yp_string_length(string);
70
- const char *source = yp_string_source(string);
71
+ const uint8_t *source = yp_string_source(string);
71
72
 
72
- char *memory = malloc(length);
73
+ uint8_t *memory = malloc(length);
73
74
  if (!memory) return;
74
75
 
75
76
  yp_string_owned_init(string, memory, length);
76
- memcpy(string->source, source, length);
77
+ memcpy((void *) string->source, source, length);
77
78
  }
78
79
 
79
80
  // Returns the length associated with the string.
@@ -83,7 +84,7 @@ yp_string_length(const yp_string_t *string) {
83
84
  }
84
85
 
85
86
  // Returns the start pointer associated with the string.
86
- YP_EXPORTED_FUNCTION const char *
87
+ YP_EXPORTED_FUNCTION const uint8_t *
87
88
  yp_string_source(const yp_string_t *string) {
88
89
  return string->source;
89
90
  }
@@ -91,15 +92,16 @@ yp_string_source(const yp_string_t *string) {
91
92
  // Free the associated memory of the given string.
92
93
  YP_EXPORTED_FUNCTION void
93
94
  yp_string_free(yp_string_t *string) {
95
+ void *memory = (void *) string->source;
96
+
94
97
  if (string->type == YP_STRING_OWNED) {
95
- free(string->source);
98
+ free(memory);
96
99
  } else if (string->type == YP_STRING_MAPPED && string->length) {
97
- void *memory = (void *) string->source;
98
- #if defined(_WIN32)
100
+ #if defined(_WIN32)
99
101
  UnmapViewOfFile(memory);
100
- #else
102
+ #else
101
103
  munmap(memory, string->length);
102
- #endif
104
+ #endif
103
105
  }
104
106
  }
105
107
 
@@ -126,8 +128,8 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
126
128
  // the source to a constant empty string and return.
127
129
  if (file_size == 0) {
128
130
  CloseHandle(file);
129
- char empty_string[] = "";
130
- yp_string_mapped_init_internal(string, empty_string, 0);
131
+ uint8_t empty[] = "";
132
+ yp_string_mapped_init_internal(string, empty, 0);
131
133
  return true;
132
134
  }
133
135
 
@@ -140,7 +142,7 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
140
142
  }
141
143
 
142
144
  // Map the file into memory.
143
- char *source = (char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
145
+ uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
144
146
  CloseHandle(mapping);
145
147
  CloseHandle(file);
146
148
 
@@ -169,12 +171,12 @@ yp_string_mapped_init(yp_string_t *string, const char *filepath) {
169
171
 
170
172
  // mmap the file descriptor to virtually get the contents
171
173
  size_t size = (size_t) sb.st_size;
172
- char *source = NULL;
174
+ uint8_t *source = NULL;
173
175
 
174
176
  if (size == 0) {
175
177
  close(fd);
176
- char empty_string[] = "";
177
- yp_string_mapped_init_internal(string, empty_string, 0);
178
+ uint8_t empty[] = "";
179
+ yp_string_mapped_init_internal(string, empty, 0);
178
180
  return true;
179
181
  }
180
182
 
@@ -1,11 +1,5 @@
1
1
  #include "yarp/util/yp_string_list.h"
2
2
 
3
- // Allocate a new yp_string_list_t.
4
- yp_string_list_t *
5
- yp_string_list_alloc(void) {
6
- return (yp_string_list_t *) malloc(sizeof(yp_string_list_t));
7
- }
8
-
9
3
  // Initialize a yp_string_list_t with its default values.
10
4
  void
11
5
  yp_string_list_init(yp_string_list_t *string_list) {
@@ -1,18 +1,15 @@
1
1
  #include <ctype.h>
2
2
  #include <stddef.h>
3
+ #include <stdint.h>
3
4
 
4
5
  int
5
- yp_strncasecmp(const char *string1, const char *string2, size_t length) {
6
+ yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
6
7
  size_t offset = 0;
7
8
  int difference = 0;
8
9
 
9
10
  while (offset < length && string1[offset] != '\0') {
10
11
  if (string2[offset] == '\0') return string1[offset];
11
-
12
- unsigned char left = (unsigned char) string1[offset];
13
- unsigned char right = (unsigned char) string2[offset];
14
-
15
- if ((difference = tolower(left) - tolower(right)) != 0) return difference;
12
+ if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
16
13
  offset++;
17
14
  }
18
15
 
@@ -1,12 +1,12 @@
1
1
  #include "yarp/util/yp_strpbrk.h"
2
2
 
3
3
  // This is the slow path that does care about the encoding.
4
- static inline const char *
5
- yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
4
+ static inline const uint8_t *
5
+ yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
6
6
  size_t index = 0;
7
7
 
8
8
  while (index < maximum) {
9
- if (strchr(charset, source[index]) != NULL) {
9
+ if (strchr((const char *) charset, source[index]) != NULL) {
10
10
  return source + index;
11
11
  }
12
12
 
@@ -22,12 +22,12 @@ yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *chars
22
22
  }
23
23
 
24
24
  // This is the fast path that does not care about the encoding.
25
- static inline const char *
26
- yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
25
+ static inline const uint8_t *
26
+ yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
27
27
  size_t index = 0;
28
28
 
29
29
  while (index < maximum) {
30
- if (strchr(charset, source[index]) != NULL) {
30
+ if (strchr((const char *) charset, source[index]) != NULL) {
31
31
  return source + index;
32
32
  }
33
33
 
@@ -54,8 +54,8 @@ yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum)
54
54
  // characters that are trailing bytes of multi-byte characters. For example, in
55
55
  // Shift-JIS, the backslash character can be a trailing byte. In that case we
56
56
  // need to take a slower path and iterate one multi-byte character at a time.
57
- const char *
58
- yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
57
+ const uint8_t *
58
+ yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
59
59
  if (length <= 0) {
60
60
  return NULL;
61
61
  } else if (parser->encoding_changed && parser->encoding.multibyte) {