natalie_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
@@ -0,0 +1,1447 @@
1
+ #pragma once
2
+
3
+ #include <algorithm>
4
+ #include <assert.h>
5
+ #include <ctype.h>
6
+ #include <limits.h>
7
+ #include <stdarg.h>
8
+ #include <stdio.h>
9
+ #include <string.h>
10
+
11
+ namespace TM {
12
+
13
+ class String {
14
+ public:
15
+ static constexpr int STRING_GROW_FACTOR = 2;
16
+
17
+ /**
18
+ * Constructs an empty String.
19
+ *
20
+ * ```
21
+ * auto str = String();
22
+ * assert_eq(0, str.size());
23
+ * ```
24
+ */
25
+ String() { }
26
+
27
+ /**
28
+ * Constructs a new String by copying the contents
29
+ * from an existing C string.
30
+ *
31
+ * ```
32
+ * auto cstr = "foo";
33
+ * auto str = String(cstr);
34
+ * assert_eq(3, str.size());
35
+ * assert_neq(str.c_str(), cstr);
36
+ * ```
37
+ */
38
+ String(const char *str) {
39
+ assert(str);
40
+ set_str(str);
41
+ }
42
+
43
+ /**
44
+ * Constructs a new String by copying the contents
45
+ * from an existing C string with a given length.
46
+ *
47
+ * The given C string can contain null characters.
48
+ * The full given length gets copied regardless of nulls.
49
+ *
50
+ * ```
51
+ * auto cstr = "foo\0bar";
52
+ * auto str = String(cstr, 7);
53
+ * assert_eq(7, str.size());
54
+ * assert_eq('\0', str[3]);
55
+ * assert_eq('r', str[6]);
56
+ * ```
57
+ */
58
+ String(const char *str, size_t length) {
59
+ assert(str);
60
+ set_str(str, length);
61
+ }
62
+
63
+ /**
64
+ * Constructs a new String by copying the contents
65
+ * from an existing String.
66
+ *
67
+ * ```
68
+ * auto str1 = String { "foo" };
69
+ * auto str2 = String { str1 };
70
+ * assert_str_eq("foo", str2);
71
+ * ```
72
+ */
73
+ String(const String &other) {
74
+ set_str(other.c_str(), other.size());
75
+ }
76
+
77
+ /**
78
+ * Constructs a new String by copying the contents
79
+ * from an existing String pointer.
80
+ *
81
+ * ```
82
+ * auto str1 = String { "foo" };
83
+ * auto str2 = String { &str1 };
84
+ * assert_str_eq("foo", str2);
85
+ * ```
86
+ */
87
+ String(const String *other) {
88
+ assert(other);
89
+ set_str(other->c_str(), other->size());
90
+ }
91
+
92
+ /**
93
+ * Constructs a new String with the single given charater.
94
+ *
95
+ * ```
96
+ * auto str = String { 'x' };
97
+ * assert_str_eq("x", str);
98
+ * ```
99
+ */
100
+ String(char c) {
101
+ char buf[2] = { c, 0 };
102
+ set_str(buf);
103
+ }
104
+
105
+ /**
106
+ * Constructs a new String of the specified length
107
+ * by filling it with the given character.
108
+ *
109
+ * ```
110
+ * auto str = String { 10, 'x' };
111
+ * assert_str_eq("xxxxxxxxxx", str);
112
+ * ```
113
+ */
114
+ String(size_t length, char c) {
115
+ char buf[length];
116
+ memset(buf, c, sizeof(char) * length);
117
+ set_str(buf, length);
118
+ }
119
+
120
+ /**
121
+ * Constructs a new String by converting the given number.
122
+ *
123
+ * ```
124
+ * auto str = String { (long long)10 };
125
+ * assert_str_eq("10", str);
126
+ * ```
127
+ */
128
+ String(long long number) {
129
+ int length = snprintf(NULL, 0, "%lli", number);
130
+ char buf[length + 1];
131
+ snprintf(buf, length + 1, "%lli", number);
132
+ set_str(buf);
133
+ }
134
+
135
+ /**
136
+ * Constructs a new String by converting the given number.
137
+ *
138
+ * ```
139
+ * auto str = String { (int)10 };
140
+ * assert_str_eq("10", str);
141
+ * ```
142
+ */
143
+ String(int number) {
144
+ int length = snprintf(NULL, 0, "%d", number);
145
+ char buf[length + 1];
146
+ snprintf(buf, length + 1, "%d", number);
147
+ set_str(buf);
148
+ }
149
+
150
+ /**
151
+ * Constructs a new String by converting the given
152
+ * double-precision float.
153
+ *
154
+ * ```
155
+ * auto str = String { 4.1 };
156
+ * assert_str_eq("4.1000", str);
157
+ * ```
158
+ *
159
+ * You can optionally specify the decimal precision.
160
+ *
161
+ * ```
162
+ * auto str = String { 4.1, 1 };
163
+ * assert_str_eq("4.1", str);
164
+ * ```
165
+ */
166
+ String(double number, int precision = 4) {
167
+ int length = snprintf(NULL, 0, "%.*f", precision, number);
168
+ char buf[length + 1];
169
+ snprintf(buf, length + 1, "%.*f", precision, number);
170
+ set_str(buf);
171
+ }
172
+
173
+ enum class HexFormat {
174
+ UppercaseAndPrefixed,
175
+ Uppercase,
176
+ LowercaseAndPrefixed,
177
+ Lowercase,
178
+ };
179
+
180
+ /**
181
+ * Creates a new String by converting the given
182
+ * number to hexadecimal format. By default, the
183
+ * result will be uppercase and prefixed with "0X".
184
+ *
185
+ * ```
186
+ * auto str = String::hex(254);
187
+ * assert_str_eq("0XFE", str);
188
+ * ```
189
+ *
190
+ * You can optionally specify the format, one of:
191
+ * - HexFormat::UppercaseAndPrefixed
192
+ * - HexFormat::Uppercase
193
+ * - HexFormat::LowercaseAndPrefixed
194
+ * - HexFormat::Lowercase
195
+ *
196
+ * ```
197
+ * auto str = String::hex(254, String::HexFormat::Lowercase);
198
+ * assert_str_eq("fe", str);
199
+ * ```
200
+ */
201
+ static String hex(long long number, HexFormat format = HexFormat::UppercaseAndPrefixed) {
202
+ bool uppercase = format == HexFormat::UppercaseAndPrefixed || format == HexFormat::Uppercase;
203
+ bool prefixed = format == HexFormat::UppercaseAndPrefixed || format == HexFormat::LowercaseAndPrefixed;
204
+ const char *format_str = uppercase ? "%llX" : "%llx";
205
+ int length = snprintf(NULL, 0, format_str, number);
206
+ char buf[length + 1];
207
+ snprintf(buf, length + 1, format_str, number);
208
+ auto str = String(buf);
209
+ if (prefixed)
210
+ str.prepend(uppercase ? "0X" : "0x");
211
+ return str;
212
+ }
213
+
214
+ virtual ~String() {
215
+ delete[] m_str;
216
+ }
217
+
218
+ /**
219
+ * Replaces the String data by copying from an another String.
220
+ *
221
+ * ```
222
+ * auto str1 = String { "foo" };
223
+ * auto str2 = String { str1 };
224
+ * assert_str_eq("foo", str2);
225
+ * ```
226
+ */
227
+ String &operator=(const String &other) {
228
+ set_str(other.c_str(), other.size());
229
+ return *this;
230
+ }
231
+
232
+ /**
233
+ * Appends two Strings together and returns the result.
234
+ *
235
+ * ```
236
+ * auto str1 = String { "foo" };
237
+ * auto str2 = String { "bar" };
238
+ * assert_str_eq("foobar", str1 + str2);
239
+ *
240
+ * assert_str_eq("12", String("1") + "2");
241
+ * ```
242
+ */
243
+ String operator+(const String &other) const {
244
+ auto new_string = String(*this);
245
+ new_string.append(other);
246
+ return new_string;
247
+ }
248
+
249
+ /**
250
+ * Replaces the String data by copying from an a C string.
251
+ *
252
+ * ```
253
+ * auto cstr = "foo";
254
+ * auto str = String { cstr };
255
+ * assert_str_eq("foo", str);
256
+ * ```
257
+ */
258
+ String &operator=(const char *other) {
259
+ if (other[0] == '\0') {
260
+ truncate(0);
261
+ return *this;
262
+ }
263
+ set_str(other);
264
+ return *this;
265
+ }
266
+
267
+ /**
268
+ * Returns the character at the specified index.
269
+ *
270
+ * ```
271
+ * auto str = String { "abc" };
272
+ * assert_eq('b', str.at(1));
273
+ * ```
274
+ *
275
+ * This method aborts if the given index is beyond the end of the String.
276
+ *
277
+ * ```should_abort
278
+ * auto str = String { "abc" };
279
+ * str.at(10);
280
+ * ```
281
+ */
282
+ char at(size_t index) const {
283
+ assert(index < m_length);
284
+ return m_str[index];
285
+ }
286
+
287
+ /**
288
+ * Returns the character at the specified index.
289
+ *
290
+ * ```
291
+ * auto str = String { "abc" };
292
+ * assert_eq('b', str[1]);
293
+ * ```
294
+ *
295
+ * WARNING: This method does *not* check that the given
296
+ * index is within the bounds of the string data!
297
+ */
298
+ char operator[](size_t index) const {
299
+ return m_str[index];
300
+ }
301
+
302
+ /**
303
+ * Returns a reference to the character at the specified index.
304
+ *
305
+ * ```
306
+ * auto str = String { "abc" };
307
+ * assert_eq('b', str[1]);
308
+ * ```
309
+ *
310
+ * This allows you to set the character at the given index.
311
+ *
312
+ * ```
313
+ * auto str = String { "abc" };
314
+ * str[1] = 'r';
315
+ * assert_eq('r', str[1]);
316
+ * ```
317
+ *
318
+ * WARNING: This method does *not* check that the given
319
+ * index is within the bounds of the string data!
320
+ */
321
+ char &operator[](size_t index) {
322
+ return m_str[index];
323
+ }
324
+
325
+ /**
326
+ * Returns the last character in the string.
327
+ *
328
+ * ```
329
+ * auto str = String { "abc" };
330
+ * assert_eq('c', str.last_char());
331
+ * ```
332
+ *
333
+ * This method aborts if the String is zero-length.
334
+ *
335
+ * ```should_abort
336
+ * auto str = String { "" };
337
+ * str.last_char();
338
+ * ```
339
+ */
340
+ char last_char() const {
341
+ assert(m_length > 0);
342
+ return m_str[m_length - 1];
343
+ }
344
+
345
+ /**
346
+ * Removes the last character from the string and returns it.
347
+ *
348
+ * ```
349
+ * auto str = String { "abc" };
350
+ * assert_eq('c', str.pop_char());
351
+ * assert_eq('b', str.pop_char());
352
+ * assert_eq(1, str.size());
353
+ * ```
354
+ *
355
+ * This method aborts if the String is zero-length.
356
+ *
357
+ * ```should_abort
358
+ * auto str = String { "" };
359
+ * str.pop_char();
360
+ * ```
361
+ */
362
+ char pop_char() {
363
+ assert(m_length > 0);
364
+ return m_str[--m_length];
365
+ }
366
+
367
+ /**
368
+ * Returns a new copy of the String.
369
+ *
370
+ * ```
371
+ * auto str1 = String { "abc" };
372
+ * auto str2 = str1.clone();
373
+ * assert_str_eq("abc", str2);
374
+ * ```
375
+ */
376
+ String clone() const { return String { *this }; }
377
+
378
+ /**
379
+ * Returns a String created from a substring of this one.
380
+ * Pass a start index and the desired length.
381
+ *
382
+ * ```
383
+ * auto str1 = String { "abc" };
384
+ * auto str2 = str1.substring(1, 2);
385
+ * assert_str_eq("bc", str2);
386
+ * ```
387
+ *
388
+ * This method aborts if the given start index is past the end.
389
+ *
390
+ * ```should_abort
391
+ * auto str = String { "abc" };
392
+ * str.substring(3, 1);
393
+ * ```
394
+ *
395
+ * ...and if the resulting end index (start + length) is past the end.
396
+ *
397
+ * ```should_abort
398
+ * auto str = String { "abc" };
399
+ * str.substring(1, 3);
400
+ * ```
401
+ */
402
+ String substring(size_t start, size_t length) const {
403
+ assert(start < m_length);
404
+ assert(start + length <= m_length);
405
+ return String(c_str() + start, length);
406
+ }
407
+
408
+ /**
409
+ * Returns a String created from a substring of this one.
410
+ * Pass a start index. (All characters to the end of the string
411
+ * will be included.)
412
+ *
413
+ * ```
414
+ * auto str1 = String { "abc" };
415
+ * auto str2 = str1.substring(1);
416
+ * assert_str_eq("bc", str2);
417
+ * ```
418
+ *
419
+ * This method aborts if the given start index is past the end.
420
+ *
421
+ * ```should_abort
422
+ * auto str = String { "abc" };
423
+ * str.substring(3);
424
+ * ```
425
+ */
426
+ String substring(size_t start) const {
427
+ return substring(start, m_length - start);
428
+ }
429
+
430
+ /**
431
+ * Returns a C string pointer to the internal data.
432
+ *
433
+ * ```
434
+ * auto str = String { "abc" };
435
+ * auto cstr = str.c_str();
436
+ * assert_eq(0, strcmp(cstr, "abc"));
437
+ * ```
438
+ */
439
+ const char *c_str() const { return m_str ? m_str : ""; }
440
+
441
+ /**
442
+ * Returns the number of bytes in the String.
443
+ *
444
+ * ```
445
+ * auto str = String { "πŸ€–" }; // 4-byte emoji
446
+ * assert_eq(4, str.length());
447
+ * ```
448
+ */
449
+ size_t length() const { return m_length; }
450
+
451
+ /**
452
+ * Returns the number of bytes in the String.
453
+ *
454
+ * ```
455
+ * auto str = String { "πŸ€–" }; // 4-byte emoji
456
+ * assert_eq(4, str.size());
457
+ * ```
458
+ */
459
+ size_t size() const { return m_length; }
460
+
461
+ /**
462
+ * Returns the number of bytes available in internal storage.
463
+ *
464
+ * ```
465
+ * auto str = String { "abc" };
466
+ * str.append_char('d');
467
+ * assert_eq(6, str.capacity()); // the capacity is doubled
468
+ * ```
469
+ */
470
+ size_t capacity() const { return m_capacity; }
471
+
472
+ /**
473
+ * Overwrites the String with the given C string.
474
+ *
475
+ * ```
476
+ * auto str = String { "abc" };
477
+ * str.set_str("xyz");
478
+ * assert_str_eq("xyz", str);
479
+ * ```
480
+ */
481
+ void set_str(const char *str) {
482
+ assert(str);
483
+ auto old_str = m_str;
484
+ m_length = strlen(str);
485
+ m_capacity = m_length;
486
+ m_str = new char[m_length + 1];
487
+ memcpy(m_str, str, sizeof(char) * (m_length + 1));
488
+ if (old_str)
489
+ delete[] old_str;
490
+ }
491
+
492
+ /**
493
+ * Overwrites the String with the given C string with
494
+ * specified length.
495
+ *
496
+ * The given C string can contain null characters.
497
+ * The full given length gets copied regardless of nulls.
498
+ *
499
+ * ```
500
+ * auto str = String { "abc" };
501
+ * str.set_str("def\0ghi", 7);
502
+ * assert_eq(7, str.size());
503
+ * assert_eq('d', str[0]);
504
+ * assert_eq('\0', str[3]);
505
+ * assert_eq('i', str[6]);
506
+ * ```
507
+ */
508
+ void set_str(const char *str, size_t length) {
509
+ assert(str);
510
+ auto old_str = m_str;
511
+ m_str = new char[length + 1];
512
+ memcpy(m_str, str, sizeof(char) * length);
513
+ m_str[length] = 0;
514
+ m_length = length;
515
+ m_capacity = length;
516
+ if (old_str)
517
+ delete[] old_str;
518
+ }
519
+
520
+ /**
521
+ * Inserts the given character character at the front
522
+ * of the string, shifting all the other strings down by one.
523
+ *
524
+ * ```
525
+ * auto str = String { "23" };
526
+ * str.prepend_char('1');
527
+ * assert_str_eq("123", str);
528
+ * ```
529
+ */
530
+ void prepend_char(char c) {
531
+ size_t total_length = m_length + 1;
532
+ grow_at_least(total_length);
533
+ memmove(m_str + 1, m_str, m_length + 1); // 1 extra for null terminator
534
+ m_str[0] = c;
535
+ m_length = total_length;
536
+ }
537
+
538
+ /**
539
+ * Converts the given number and prepends the resulting string.
540
+ *
541
+ * ```
542
+ * auto str = String { "abc" };
543
+ * str.prepend(123);
544
+ * assert_str_eq("123abc", str);
545
+ * ```
546
+ */
547
+ void prepend(long long i) {
548
+ int length = snprintf(NULL, 0, "%lli", i);
549
+ char buf[length + 1];
550
+ snprintf(buf, length + 1, "%lli", i);
551
+ prepend(buf);
552
+ }
553
+
554
+ /**
555
+ * Prepends the given C string.
556
+ *
557
+ * ```
558
+ * auto str = String { "def" };
559
+ * str.prepend("abc");
560
+ * assert_str_eq("abcdef", str);
561
+ * ```
562
+ */
563
+ void prepend(const char *str) {
564
+ if (!str) return;
565
+ size_t new_length = strlen(str);
566
+ if (new_length == 0) return;
567
+ char buf[m_length + 1];
568
+ memcpy(buf, c_str(), sizeof(char) * (m_length + 1));
569
+ set_str(str);
570
+ append(buf);
571
+ }
572
+
573
+ /**
574
+ * Prepends the given String.
575
+ *
576
+ * ```
577
+ * auto str1 = String { "def" };
578
+ * auto str2 = String { "abc" };
579
+ * str1.prepend(str2);
580
+ * assert_str_eq("abcdef", str1);
581
+ * ```
582
+ */
583
+ void prepend(const String &str) {
584
+ size_t new_length = str.size();
585
+ if (new_length == 0) return;
586
+ char buf[new_length + m_length + 1];
587
+ memcpy(buf, str.c_str(), sizeof(char) * new_length);
588
+ memcpy(buf + new_length, c_str(), sizeof(char) * (m_length + 1));
589
+ set_str(buf);
590
+ }
591
+
592
+ /**
593
+ * Inserts at the specified index the character given.
594
+ *
595
+ * ```
596
+ * auto str = String { "xyz" };
597
+ * str.insert(0, '-');
598
+ * str.insert(2, '-');
599
+ * str.insert(4, '-');
600
+ * assert_str_eq("-x-y-z", str);
601
+ * ```
602
+ *
603
+ * This method aborts if the index is past the end.
604
+ *
605
+ * ```should_abort
606
+ * auto str = String { "xxx" };
607
+ * str.insert(3, '-');
608
+ * ```
609
+ */
610
+ void insert(size_t index, char c) {
611
+ assert(index < m_length);
612
+ grow_at_least(m_length + 1);
613
+ size_t nbytes = m_length - index + 1; // 1 extra for null terminator
614
+ memmove(m_str + index + 1, m_str + index, nbytes);
615
+ m_str[index] = c;
616
+ m_length++;
617
+ }
618
+
619
+ /**
620
+ * Adds the given character at the end of the string.
621
+ *
622
+ * ```
623
+ * auto str = String { "ab" };
624
+ * str.append_char('c');
625
+ * assert_str_eq("abc", str);
626
+ * ```
627
+ */
628
+ void append_char(char c) {
629
+ size_t total_length = m_length + 1;
630
+ grow_at_least(total_length);
631
+ m_str[total_length - 1] = c;
632
+ m_str[total_length] = 0;
633
+ m_length = total_length;
634
+ }
635
+
636
+ /**
637
+ * Adds the given signed character at the end of the string.
638
+ *
639
+ * ```
640
+ * auto str = String { "ab" };
641
+ * str.append((signed char)'c');
642
+ * assert_str_eq("abc", str);
643
+ * ```
644
+ */
645
+ void append(signed char c) {
646
+ size_t total_length = m_length + 1;
647
+ grow_at_least(total_length);
648
+ m_str[total_length - 1] = c;
649
+ m_str[total_length] = 0;
650
+ m_length = total_length;
651
+ }
652
+
653
+ /**
654
+ * Adds the given unsigned character at the end of the string.
655
+ *
656
+ * ```
657
+ * auto str = String { "ab" };
658
+ * str.append((unsigned char)'c');
659
+ * assert_str_eq("abc", str);
660
+ * ```
661
+ */
662
+ void append(unsigned char c) {
663
+ append(static_cast<signed char>(c));
664
+ }
665
+
666
+ /**
667
+ * Converts the given number and append the resulting string.
668
+ *
669
+ * ```
670
+ * auto str = String { "a" };
671
+ * str.append((size_t)123);
672
+ * assert_str_eq("a123", str);
673
+ * ```
674
+ */
675
+ void append(size_t i) {
676
+ int length = snprintf(NULL, 0, "%zu", i);
677
+ char buf[length + 1];
678
+ snprintf(buf, length + 1, "%zu", i);
679
+ append(buf);
680
+ }
681
+
682
+ /**
683
+ * Converts the given number and append the resulting string.
684
+ *
685
+ * ```
686
+ * auto str = String { "a" };
687
+ * str.append((long long)123);
688
+ * assert_str_eq("a123", str);
689
+ * ```
690
+ */
691
+ void append(long long i) {
692
+ int length = snprintf(NULL, 0, "%lli", i);
693
+ char buf[length + 1];
694
+ snprintf(buf, length + 1, "%lli", i);
695
+ append(buf);
696
+ }
697
+
698
+ /**
699
+ * Converts the given number and append the resulting string.
700
+ *
701
+ * ```
702
+ * auto str = String { "a" };
703
+ * str.append((int)123);
704
+ * assert_str_eq("a123", str);
705
+ * ```
706
+ */
707
+ void append(int i) {
708
+ int length = snprintf(NULL, 0, "%i", i);
709
+ char buf[length + 1];
710
+ snprintf(buf, length + 1, "%i", i);
711
+ append(buf);
712
+ }
713
+
714
+ /**
715
+ * Appends the given C string.
716
+ *
717
+ * ```
718
+ * auto str = String { "a" };
719
+ * str.append("bc");
720
+ * assert_str_eq("abc", str);
721
+ * ```
722
+ */
723
+ void append(const char *str) {
724
+ if (!str) return;
725
+ size_t length = strlen(str);
726
+ if (length == 0) return;
727
+ append(str, length);
728
+ }
729
+
730
+ /**
731
+ * Appends the given C string with specified length.
732
+ *
733
+ * The given C string can contain null characters.
734
+ * The full given length gets append regardless of nulls.
735
+ *
736
+ * ```
737
+ * auto str = String { "x" };
738
+ * str.append("abc\0def", 7);
739
+ * assert_eq(8, str.size());
740
+ * assert_eq('x', str[0]);
741
+ * assert_eq('\0', str[4]);
742
+ * assert_eq('f', str[7]);
743
+ * ```
744
+ */
745
+ void append(const char *str, size_t length) {
746
+ if (!str) return;
747
+ if (length == 0) return;
748
+ size_t total_length = m_length + length;
749
+ grow_at_least(total_length);
750
+ memcpy(m_str + m_length, str, length);
751
+ m_str[total_length] = 0;
752
+ m_length = total_length;
753
+ }
754
+
755
+ /**
756
+ * Appends the given arguments, formatting using the specified format.
757
+ *
758
+ * This is roughly equivalent to constructing a C string using sprintf()
759
+ * and then appending the result to this String.
760
+ *
761
+ * ```
762
+ * auto str = String { "x" };
763
+ * str.append_sprintf("y%c%d", 'z', 1);
764
+ * assert_str_eq("xyz1", str);
765
+ * ```
766
+ */
767
+ void append_sprintf(const char *format, ...) {
768
+ va_list args;
769
+ va_start(args, format);
770
+ append_vsprintf(format, args);
771
+ va_end(args);
772
+ }
773
+
774
+ /**
775
+ * Appends the given va_list args, formatting using the specified format.
776
+ */
777
+ void append_vsprintf(const char *format, va_list args) {
778
+ va_list args_copy;
779
+ va_copy(args_copy, args);
780
+ int fmt_length = vsnprintf(nullptr, 0, format, args_copy);
781
+ va_end(args_copy);
782
+ char buf[fmt_length + 1];
783
+ vsnprintf(buf, fmt_length + 1, format, args);
784
+ append(buf);
785
+ }
786
+
787
+ /**
788
+ * Appends the given String.
789
+ *
790
+ * ```
791
+ * auto str1 = String { "x" };
792
+ * auto str2 = String { "yz" };
793
+ * str1.append(str2);
794
+ * assert_str_eq("xyz", str1);
795
+ * ```
796
+ */
797
+ void append(const String &str) {
798
+ if (str.size() == 0) return;
799
+ size_t total_length = m_length + str.size();
800
+ grow_at_least(total_length);
801
+ memcpy(m_str + m_length, str.c_str(), sizeof(char) * str.size());
802
+ m_length = total_length;
803
+ m_str[m_length] = 0;
804
+ }
805
+
806
+ /**
807
+ * Repeatedly adds the given number of the given character.
808
+ *
809
+ * ```
810
+ * auto str = String { "x" };
811
+ * str.append(2, 'y');
812
+ * assert_str_eq("xyy", str);
813
+ * ```
814
+ */
815
+ void append(size_t n, char c) {
816
+ size_t total_length = m_length + n;
817
+ grow_at_least(total_length);
818
+ memset(m_str + m_length, c, sizeof(char) * n);
819
+ m_length = total_length;
820
+ m_str[m_length] = 0;
821
+ }
822
+
823
+ /**
824
+ * Returns true if this and the given String are equivalent.
825
+ *
826
+ * ```
827
+ * auto str1 = String { "abc" };
828
+ * auto str2 = String { "abc" };
829
+ * assert(str1 == str2);
830
+ * auto str3 = String { "xyz" };
831
+ * assert_not(str1 == str3);
832
+ * ```
833
+ */
834
+ bool operator==(const String &other) const {
835
+ if (size() != other.size())
836
+ return false;
837
+ return memcmp(c_str(), other.c_str(), sizeof(char) * m_length) == 0;
838
+ }
839
+
840
+ bool operator!=(const String &other) const {
841
+ return !operator==(other);
842
+ }
843
+
844
+ /**
845
+ * Returns true if this and the given C string are equivalent.
846
+ *
847
+ * ```
848
+ * auto str = String { "abc" };
849
+ * auto cstr1 = "abc";
850
+ * assert(str == cstr1);
851
+ * auto cstr2 = "xyz";
852
+ * assert_not(str == cstr2);
853
+ * ```
854
+ */
855
+ bool operator==(const char *other) const {
856
+ assert(other);
857
+ if (size() != strlen(other))
858
+ return false;
859
+ return memcmp(c_str(), other, sizeof(char) * m_length) == 0;
860
+ }
861
+
862
+ bool operator!=(const char *other) const {
863
+ return !(*this == other);
864
+ }
865
+
866
+ /**
867
+ * Returns true if this is alphanumerically greater than the given String.
868
+ *
869
+ * ```
870
+ * auto str1 = String { "def" };
871
+ * auto str2 = String { "abc" };
872
+ * assert(str1 > str2);
873
+ * assert_not(str2 > str1);
874
+ * ```
875
+ */
876
+ bool operator>(const String &other) const {
877
+ // FIXME: cannot use strcmp here
878
+ return strcmp(c_str(), other.c_str()) > 0;
879
+ }
880
+
881
+ /**
882
+ * Returns true if this is alphanumerically less than the given String.
883
+ *
884
+ * ```
885
+ * auto str1 = String { "abc" };
886
+ * auto str2 = String { "def" };
887
+ * assert(str1 < str2);
888
+ * assert_not(str2 < str1);
889
+ * ```
890
+ */
891
+ bool operator<(const String &other) const {
892
+ // FIXME: cannot use strcmp here
893
+ return strcmp(c_str(), other.c_str()) < 0;
894
+ }
895
+
896
+ /**
897
+ * Returns -1, 0, or 1 by comparing this String to the given String.
898
+ * -1 is returned if this String is alphanumerically less than the other one.
899
+ * 0 is returned if they are equivalent.
900
+ * 1 is returned if this String is alphanumerically greater than the other one.
901
+ *
902
+ * ```
903
+ * auto str1 = String { "def" };
904
+ * auto str2 = String { "abc" };
905
+ * assert_eq(1, str1.cmp(str2));
906
+ * assert_eq(-1, str2.cmp(str1));
907
+ * auto str3 = String { "abc" };
908
+ * assert_eq(0, str2.cmp(str3));
909
+ * ```
910
+ */
911
+ int cmp(const String &other) const {
912
+ if (m_length == 0) {
913
+ if (other.m_length == 0)
914
+ return 0;
915
+ return -1;
916
+ }
917
+ size_t i;
918
+ for (i = 0; i < std::min(m_length, other.m_length); ++i) {
919
+ auto c1 = (unsigned char)(*this)[i], c2 = (unsigned char)other[i];
920
+ if (c1 < c2)
921
+ return -1;
922
+ else if (c1 > c2)
923
+ return 1;
924
+ }
925
+ // "x" (len 1) <=> "xx" (len 2)
926
+ // 1 - 2 = -1
927
+ return m_length - other.m_length;
928
+ }
929
+
930
+ /**
931
+ * Finds the given String inside this one and return its starting index.
932
+ * If not found, return -1.
933
+ *
934
+ * ```
935
+ * auto str1 = String { "hello world" };
936
+ * auto str2 = String { "lo" };
937
+ * assert_eq(3, str1.find(str2));
938
+ * auto str3 = String { "xx" };
939
+ * assert_eq(-1, str1.find(str3));
940
+ * ```
941
+ */
942
+ ssize_t find(const String &needle) const {
943
+ if (m_length < needle.size() || needle.is_empty())
944
+ return -1;
945
+ assert(m_str);
946
+ size_t max_index = m_length - needle.size();
947
+ size_t byte_count = sizeof(char) * needle.size();
948
+ for (size_t index = 0; index <= max_index; ++index) {
949
+ if (memcmp(m_str + index, needle.c_str(), byte_count) == 0)
950
+ return index;
951
+ }
952
+ return -1;
953
+ }
954
+
955
+ /**
956
+ * Finds the given charcter inside this String and return its starting index.
957
+ * If not found, return -1.
958
+ *
959
+ * ```
960
+ * auto str = String { "hello world" };
961
+ * assert_eq(6, str.find('w'));
962
+ * assert_eq(-1, str.find('x'));
963
+ * ```
964
+ */
965
+ ssize_t find(const char c) const {
966
+ for (size_t i = 0; i < m_length; ++i) {
967
+ if (c == m_str[i]) return i;
968
+ }
969
+ return -1;
970
+ }
971
+
972
+ /**
973
+ * Truncates this String to the specified length.
974
+ *
975
+ * ```
976
+ * auto str = String { "abcdef" };
977
+ * str.truncate(3);
978
+ * assert_str_eq("abc", str);
979
+ * ```
980
+ *
981
+ * This method aborts if the given length is longer
982
+ * than the String currently is.
983
+ *
984
+ * ```should_abort
985
+ * auto str = String { "abc" };
986
+ * str.truncate(4);
987
+ * ```
988
+ */
989
+ void truncate(size_t length) {
990
+ assert(length <= m_length);
991
+ if (length == 0) {
992
+ delete[] m_str;
993
+ m_str = nullptr;
994
+ m_length = 0;
995
+ m_capacity = 0;
996
+ } else {
997
+ m_str[length] = 0;
998
+ m_length = length;
999
+ }
1000
+ }
1001
+
1002
+ /**
1003
+ * Truncates this String to a length of zero.
1004
+ *
1005
+ * ```
1006
+ * auto str = String { "abcdef" };
1007
+ * str.clear();
1008
+ * assert_eq(0, str.size());
1009
+ * ```
1010
+ */
1011
+ void clear() { truncate(0); }
1012
+
1013
+ /**
1014
+ * Removes one character from the end of the String,
1015
+ * if this String is not already empty.
1016
+ *
1017
+ * ```
1018
+ * auto str = String { "ab" };
1019
+ * str.chomp();
1020
+ * assert_str_eq("a", str);
1021
+ * str.chomp();
1022
+ * assert_str_eq("", str);
1023
+ * str.chomp();
1024
+ * assert_str_eq("", str);
1025
+ * ```
1026
+ */
1027
+ void chomp() {
1028
+ if (m_length == 0) return;
1029
+ truncate(m_length - 1);
1030
+ }
1031
+
1032
+ /**
1033
+ * Removes any trailing whitespace, including tabs and newlines,
1034
+ * from the end of the String.
1035
+ *
1036
+ * ```
1037
+ * auto str = String { "a \t\n " };
1038
+ * str.strip_trailing_whitespace();
1039
+ * assert_str_eq("a", str);
1040
+ * ```
1041
+ */
1042
+ void strip_trailing_whitespace() {
1043
+ while (m_length > 0) {
1044
+ switch (m_str[m_length - 1]) {
1045
+ case ' ':
1046
+ case '\t':
1047
+ case '\n':
1048
+ case '\r':
1049
+ chomp();
1050
+ break;
1051
+ default:
1052
+ return;
1053
+ }
1054
+ }
1055
+ }
1056
+
1057
+ /**
1058
+ * Removes any trailing spaces from the end of the String.
1059
+ *
1060
+ * ```
1061
+ * auto str = String { "a\n " };
1062
+ * str.strip_trailing_spaces();
1063
+ * assert_str_eq("a\n", str);
1064
+ * ```
1065
+ */
1066
+ void strip_trailing_spaces() {
1067
+ while (m_length > 0) {
1068
+ if (m_str[m_length - 1] == ' ')
1069
+ chomp();
1070
+ else
1071
+ return;
1072
+ }
1073
+ }
1074
+
1075
+ /**
1076
+ * Removes all occurrences of the given character
1077
+ * from the String.
1078
+ *
1079
+ * ```
1080
+ * auto str = String { "abcabac" };
1081
+ * str.remove('a');
1082
+ * assert_str_eq("bcbc", str);
1083
+ * ```
1084
+ */
1085
+ void remove(char character) {
1086
+ size_t i;
1087
+ assert(m_str);
1088
+ for (i = 0; i < m_length; ++i) {
1089
+ if (m_str[i] == character) {
1090
+ for (size_t j = i; j < m_length; ++j)
1091
+ m_str[j] = m_str[j + 1];
1092
+
1093
+ --m_length;
1094
+ --i;
1095
+ }
1096
+ }
1097
+ m_str[m_length] = '\0';
1098
+ }
1099
+
1100
+ /**
1101
+ * Retruns true if the String has a length of zero.
1102
+ *
1103
+ * ```
1104
+ * auto str1 = String { "abc" };
1105
+ * auto str2 = String { "" };
1106
+ * assert_not(str1.is_empty());
1107
+ * assert(str2.is_empty());
1108
+ * ```
1109
+ */
1110
+ bool is_empty() const { return m_length == 0; }
1111
+
1112
+ /**
1113
+ * Returns a new String that is the result of incrementing
1114
+ * the last character of this String. If the the last character
1115
+ * is z/Z/9, then the next-to-last character is incremented
1116
+ * (or a new one is prepended) and the last character is reset.
1117
+ *
1118
+ * ```
1119
+ * assert_str_eq("b", String("a").successive());
1120
+ * assert_str_eq("az", String("ay").successive());
1121
+ * assert_str_eq("ba", String("az").successive());
1122
+ * assert_str_eq("aaa", String("zz").successive());
1123
+ * assert_str_eq("AAA", String("ZZ").successive());
1124
+ * assert_str_eq("1", String("0").successive());
1125
+ * assert_str_eq("100", String("99").successive());
1126
+ * assert_str_eq("d000", String("c999").successive());
1127
+ * ```
1128
+ */
1129
+ String successive() {
1130
+ auto result = String { *this };
1131
+ assert(m_length > 0);
1132
+ size_t index = size() - 1;
1133
+ char last_char = m_str[index];
1134
+ if (last_char == 'z') {
1135
+ result.increment_successive_char('a', 'z', 'a');
1136
+ } else if (last_char == 'Z') {
1137
+ result.increment_successive_char('A', 'Z', 'A');
1138
+ } else if (last_char == '9') {
1139
+ result.increment_successive_char('0', '9', '1');
1140
+ } else {
1141
+ result.m_str[index]++;
1142
+ }
1143
+ return result;
1144
+ }
1145
+
1146
+ /**
1147
+ * Returns a new String by appending the given arguments according
1148
+ * to the given format. This is a safer version of of String::sprintf
1149
+ * that does not rely on format specifiers matching the argument type.
1150
+ *
1151
+ * ```
1152
+ * auto cstr = "hello";
1153
+ * unsigned char c = 'w'; // must specify signed or unsigned char
1154
+ * int num = 999;
1155
+ * auto str = String::format("{} {}orld {}", cstr, c, num);
1156
+ * assert_str_eq("hello world 999", str);
1157
+ * ```
1158
+ */
1159
+ template <typename... Args>
1160
+ static String format(const char *fmt, Args... args) {
1161
+ String out {};
1162
+ format(out, fmt, args...);
1163
+ return out;
1164
+ }
1165
+
1166
+ static void format(String &out, const char *fmt) {
1167
+ for (const char *c = fmt; *c != 0; c++) {
1168
+ out.append_char(*c);
1169
+ }
1170
+ }
1171
+
1172
+ template <typename T, typename... Args>
1173
+ static void format(String &out, const char *fmt, T first, Args... rest) {
1174
+ for (const char *c = fmt; *c != 0; c++) {
1175
+ if (*c == '{' && *(c + 1) == '}') {
1176
+ c++;
1177
+ out.append(first);
1178
+ format(out, c + 1, rest...);
1179
+ return;
1180
+ } else {
1181
+ out.append_char(*c);
1182
+ }
1183
+ }
1184
+ }
1185
+
1186
+ /**
1187
+ * Returns a new String where every character is converted to uppercase.
1188
+ *
1189
+ * ```
1190
+ * auto str = String("hElLo");
1191
+ * assert_str_eq("HELLO", str.uppercase());
1192
+ * ```
1193
+ */
1194
+ String uppercase() const {
1195
+ auto new_str = String(this);
1196
+ for (size_t i = 0; i < new_str.m_length; ++i) {
1197
+ new_str.m_str[i] = toupper(new_str.m_str[i]);
1198
+ }
1199
+ return new_str;
1200
+ }
1201
+
1202
+ /**
1203
+ * Returns a new String where every character is converted to lowercase.
1204
+ *
1205
+ * ```
1206
+ * auto str = String("hElLo");
1207
+ * assert_str_eq("hello", str.lowercase());
1208
+ * ```
1209
+ */
1210
+ String lowercase() const {
1211
+ auto new_str = String(this);
1212
+ for (size_t i = 0; i < new_str.m_length; ++i) {
1213
+ new_str.m_str[i] = tolower(new_str.m_str[i]);
1214
+ }
1215
+ return new_str;
1216
+ }
1217
+
1218
+ /**
1219
+ * Returns true if this String ends with the given String.
1220
+ *
1221
+ * ```
1222
+ * auto str = String("hello world");
1223
+ * assert(str.ends_with("world"));
1224
+ * assert_not(str.ends_with("xxx"));
1225
+ * ```
1226
+ */
1227
+ bool ends_with(const String &needle) {
1228
+ if (m_length < needle.m_length)
1229
+ return false;
1230
+ return memcmp(m_str + m_length - needle.m_length, needle.m_str, needle.m_length) == 0;
1231
+ }
1232
+
1233
+ /**
1234
+ * Returns hash value of this String.
1235
+ * This uses the 'djb2' hash algorithm by Dan Bernstein.
1236
+ *
1237
+ * ```
1238
+ * auto str = String("hello");
1239
+ * assert_eq(210714636441, str.djb2_hash());
1240
+ * ```
1241
+ */
1242
+ size_t djb2_hash() const {
1243
+ size_t hash = 5381;
1244
+ int c;
1245
+ for (size_t i = 0; i < m_length; ++i) {
1246
+ c = (*this)[i];
1247
+ hash = ((hash << 5) + hash) + c;
1248
+ }
1249
+ return hash;
1250
+ }
1251
+
1252
+ /**
1253
+ * Prints the full string with printf(), character by character.
1254
+ * This method will print the full String, even if null characters
1255
+ * are encountered.
1256
+ *
1257
+ * ```
1258
+ * auto str = String("foo\0bar");
1259
+ * str.print();
1260
+ * ```
1261
+ */
1262
+ void print() const {
1263
+ for (size_t i = 0; i < m_length; ++i) {
1264
+ printf("%c", (*this)[i]);
1265
+ }
1266
+ printf("\n");
1267
+ }
1268
+
1269
+ /**
1270
+ * Returns true if the string contains UTF-8-encoded
1271
+ * characters that seem to be valid, multibyte or not.
1272
+ *
1273
+ * An ASCII string "foo" would return true, because it's
1274
+ * also a valid UTF-8-encoded string. It can be
1275
+ * represented with UTF-8.
1276
+ *
1277
+ * NOTE: This is not fool-proof. There's a lot of
1278
+ * checks we aren't doing.
1279
+ *
1280
+ * ```
1281
+ * assert_eq(true, String("abc").contains_seemingly_valid_utf8_encoded_characters());
1282
+ * assert_eq(true, String("πŸ„").contains_seemingly_valid_utf8_encoded_characters());
1283
+ * assert_eq(false, String("\xC3").contains_seemingly_valid_utf8_encoded_characters());
1284
+ * ```
1285
+ */
1286
+ bool contains_seemingly_valid_utf8_encoded_characters() const {
1287
+ int index = 0;
1288
+ char buf[5];
1289
+ do {
1290
+ index = next_utf8_char(index, buf);
1291
+ } while (index > 0);
1292
+ return index == 0;
1293
+ }
1294
+
1295
+ /**
1296
+ * Returns true if the string contains multibyte
1297
+ * UTF-8-encoded characters that seem to be valid.
1298
+ *
1299
+ * NOTE: This is not fool-proof. There's a lot of
1300
+ * checks we aren't doing.
1301
+ *
1302
+ * ```
1303
+ * assert_eq(false, String("abc").contains_utf8_encoded_multibyte_characters());
1304
+ * assert_eq(true, String("πŸ„").contains_utf8_encoded_multibyte_characters());
1305
+ * assert_eq(false, String("\xC3").contains_utf8_encoded_multibyte_characters());
1306
+ * ```
1307
+ */
1308
+ bool contains_utf8_encoded_multibyte_characters() const {
1309
+ int index_was = 0;
1310
+ int index = 0;
1311
+ char buf[5];
1312
+ bool multibyte = false;
1313
+ do {
1314
+ index = next_utf8_char(index, buf);
1315
+ if (index > index_was + 1)
1316
+ multibyte = true;
1317
+ index_was = index;
1318
+ } while (index > 0);
1319
+ return index == 0 && multibyte;
1320
+ }
1321
+
1322
+ /**
1323
+ * Takes an integer for the starting index and a
1324
+ * buffer char*. Consumes the proper number of
1325
+ * bytes, appending to the buffer C string to build
1326
+ * a single UTF-8-encoded character.
1327
+ *
1328
+ * Be sure to pass a pointer to a buffer that is
1329
+ * at least 5 characters in size (one is used for
1330
+ * the null terminator).
1331
+ *
1332
+ * Returns the new index, a positive integer,
1333
+ * on success.
1334
+ *
1335
+ * Returns zero (0) if the end of the string has
1336
+ * been reached.
1337
+ *
1338
+ * Returns a negative integer if the character
1339
+ * is invalid.
1340
+ *
1341
+ * (Note: this method only does rudimentary checking
1342
+ * for a valid character, i.e. does it have enough
1343
+ * bytes to satisfy the encoding.)
1344
+ *
1345
+ * ```
1346
+ * auto str = String("abβ˜ΊπŸ„");
1347
+ * int index = 0;
1348
+ *
1349
+ * char buf[5];
1350
+ * index = str.next_utf8_char(index, buf);
1351
+ * assert_eq(1, index);
1352
+ * assert_str_eq("a", String(buf));
1353
+ *
1354
+ * buf[0] = '\0';
1355
+ * index = str.next_utf8_char(index, buf);
1356
+ * assert_eq(2, index);
1357
+ * assert_str_eq("b", String(buf));
1358
+ *
1359
+ * buf[0] = '\0';
1360
+ * index = str.next_utf8_char(index, buf);
1361
+ * assert_eq(5, index);
1362
+ * assert_str_eq("☺", String(buf));
1363
+ *
1364
+ * buf[0] = '\0';
1365
+ * index = str.next_utf8_char(index, buf);
1366
+ * assert_eq(9, index);
1367
+ * assert_str_eq("πŸ„", String(buf));
1368
+ *
1369
+ * buf[0] = '\0';
1370
+ * index = str.next_utf8_char(index, buf);
1371
+ * assert_eq(0, index);
1372
+ * assert_str_eq("", String(buf));
1373
+ * ```
1374
+ */
1375
+ int next_utf8_char(int index, char *buffer) const {
1376
+ if (!m_str)
1377
+ return 0;
1378
+ assert(m_length < INT_MAX);
1379
+ assert(index >= 0);
1380
+ if ((size_t)index >= m_length)
1381
+ return 0;
1382
+ buffer[0] = m_str[index];
1383
+ if (((unsigned char)buffer[0] >> 3) == 30) { // 11110xxx, 4 bytes
1384
+ if ((size_t)index + 3 >= m_length) return -1;
1385
+ buffer[1] = m_str[++index];
1386
+ buffer[2] = m_str[++index];
1387
+ buffer[3] = m_str[++index];
1388
+ buffer[4] = 0;
1389
+ } else if (((unsigned char)buffer[0] >> 4) == 14) { // 1110xxxx, 3 bytes
1390
+ if ((size_t)index + 2 >= m_length) return -1;
1391
+ buffer[1] = m_str[++index];
1392
+ buffer[2] = m_str[++index];
1393
+ buffer[3] = 0;
1394
+ } else if (((unsigned char)buffer[0] >> 5) == 6) { // 110xxxxx, 2 bytes
1395
+ if ((size_t)index + 1 >= m_length) return -1;
1396
+ buffer[1] = m_str[++index];
1397
+ buffer[2] = 0;
1398
+ } else {
1399
+ buffer[1] = 0;
1400
+ }
1401
+ return index + 1;
1402
+ }
1403
+
1404
+ private:
1405
+ void grow(size_t new_capacity) {
1406
+ assert(new_capacity >= m_length);
1407
+ auto old_str = m_str;
1408
+ m_str = new char[new_capacity + 1];
1409
+ if (old_str)
1410
+ memcpy(m_str, old_str, sizeof(char) * (m_capacity + 1));
1411
+ else
1412
+ m_str[0] = '\0';
1413
+ delete[] old_str;
1414
+ m_capacity = new_capacity;
1415
+ }
1416
+
1417
+ void grow_at_least(size_t min_capacity) {
1418
+ if (m_capacity >= min_capacity) return;
1419
+ if (m_capacity > 0 && min_capacity <= m_capacity * STRING_GROW_FACTOR) {
1420
+ grow(m_capacity * STRING_GROW_FACTOR);
1421
+ } else {
1422
+ grow(min_capacity);
1423
+ }
1424
+ }
1425
+
1426
+ void increment_successive_char(char first_char_in_range, char last_char_in_range, char prepend_char_to_grow) {
1427
+ assert(m_length > 0);
1428
+ ssize_t index = m_length - 1;
1429
+ char last_char = m_str[index];
1430
+ while (last_char == last_char_in_range) {
1431
+ m_str[index] = first_char_in_range;
1432
+ if ((--index) < 0)
1433
+ break;
1434
+ last_char = m_str[index];
1435
+ }
1436
+ if (index == -1) {
1437
+ this->prepend_char(prepend_char_to_grow);
1438
+ } else {
1439
+ m_str[index]++;
1440
+ }
1441
+ }
1442
+
1443
+ char *m_str { nullptr };
1444
+ size_t m_length { 0 };
1445
+ size_t m_capacity { 0 };
1446
+ };
1447
+ }