natalie_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
@@ -0,0 +1,1447 @@
1
+ #pragma once
2
+
3
+ #include <algorithm>
4
+ #include <assert.h>
5
+ #include <ctype.h>
6
+ #include <limits.h>
7
+ #include <stdarg.h>
8
+ #include <stdio.h>
9
+ #include <string.h>
10
+
11
+ namespace TM {
12
+
13
+ class String {
14
+ public:
15
+ static constexpr int STRING_GROW_FACTOR = 2;
16
+
17
+ /**
18
+ * Constructs an empty String.
19
+ *
20
+ * ```
21
+ * auto str = String();
22
+ * assert_eq(0, str.size());
23
+ * ```
24
+ */
25
+ String() { }
26
+
27
+ /**
28
+ * Constructs a new String by copying the contents
29
+ * from an existing C string.
30
+ *
31
+ * ```
32
+ * auto cstr = "foo";
33
+ * auto str = String(cstr);
34
+ * assert_eq(3, str.size());
35
+ * assert_neq(str.c_str(), cstr);
36
+ * ```
37
+ */
38
+ String(const char *str) {
39
+ assert(str);
40
+ set_str(str);
41
+ }
42
+
43
+ /**
44
+ * Constructs a new String by copying the contents
45
+ * from an existing C string with a given length.
46
+ *
47
+ * The given C string can contain null characters.
48
+ * The full given length gets copied regardless of nulls.
49
+ *
50
+ * ```
51
+ * auto cstr = "foo\0bar";
52
+ * auto str = String(cstr, 7);
53
+ * assert_eq(7, str.size());
54
+ * assert_eq('\0', str[3]);
55
+ * assert_eq('r', str[6]);
56
+ * ```
57
+ */
58
+ String(const char *str, size_t length) {
59
+ assert(str);
60
+ set_str(str, length);
61
+ }
62
+
63
+ /**
64
+ * Constructs a new String by copying the contents
65
+ * from an existing String.
66
+ *
67
+ * ```
68
+ * auto str1 = String { "foo" };
69
+ * auto str2 = String { str1 };
70
+ * assert_str_eq("foo", str2);
71
+ * ```
72
+ */
73
+ String(const String &other) {
74
+ set_str(other.c_str(), other.size());
75
+ }
76
+
77
+ /**
78
+ * Constructs a new String by copying the contents
79
+ * from an existing String pointer.
80
+ *
81
+ * ```
82
+ * auto str1 = String { "foo" };
83
+ * auto str2 = String { &str1 };
84
+ * assert_str_eq("foo", str2);
85
+ * ```
86
+ */
87
+ String(const String *other) {
88
+ assert(other);
89
+ set_str(other->c_str(), other->size());
90
+ }
91
+
92
+ /**
93
+ * Constructs a new String with the single given charater.
94
+ *
95
+ * ```
96
+ * auto str = String { 'x' };
97
+ * assert_str_eq("x", str);
98
+ * ```
99
+ */
100
+ String(char c) {
101
+ char buf[2] = { c, 0 };
102
+ set_str(buf);
103
+ }
104
+
105
+ /**
106
+ * Constructs a new String of the specified length
107
+ * by filling it with the given character.
108
+ *
109
+ * ```
110
+ * auto str = String { 10, 'x' };
111
+ * assert_str_eq("xxxxxxxxxx", str);
112
+ * ```
113
+ */
114
+ String(size_t length, char c) {
115
+ char buf[length];
116
+ memset(buf, c, sizeof(char) * length);
117
+ set_str(buf, length);
118
+ }
119
+
120
+ /**
121
+ * Constructs a new String by converting the given number.
122
+ *
123
+ * ```
124
+ * auto str = String { (long long)10 };
125
+ * assert_str_eq("10", str);
126
+ * ```
127
+ */
128
+ String(long long number) {
129
+ int length = snprintf(NULL, 0, "%lli", number);
130
+ char buf[length + 1];
131
+ snprintf(buf, length + 1, "%lli", number);
132
+ set_str(buf);
133
+ }
134
+
135
+ /**
136
+ * Constructs a new String by converting the given number.
137
+ *
138
+ * ```
139
+ * auto str = String { (int)10 };
140
+ * assert_str_eq("10", str);
141
+ * ```
142
+ */
143
+ String(int number) {
144
+ int length = snprintf(NULL, 0, "%d", number);
145
+ char buf[length + 1];
146
+ snprintf(buf, length + 1, "%d", number);
147
+ set_str(buf);
148
+ }
149
+
150
+ /**
151
+ * Constructs a new String by converting the given
152
+ * double-precision float.
153
+ *
154
+ * ```
155
+ * auto str = String { 4.1 };
156
+ * assert_str_eq("4.1000", str);
157
+ * ```
158
+ *
159
+ * You can optionally specify the decimal precision.
160
+ *
161
+ * ```
162
+ * auto str = String { 4.1, 1 };
163
+ * assert_str_eq("4.1", str);
164
+ * ```
165
+ */
166
+ String(double number, int precision = 4) {
167
+ int length = snprintf(NULL, 0, "%.*f", precision, number);
168
+ char buf[length + 1];
169
+ snprintf(buf, length + 1, "%.*f", precision, number);
170
+ set_str(buf);
171
+ }
172
+
173
+ enum class HexFormat {
174
+ UppercaseAndPrefixed,
175
+ Uppercase,
176
+ LowercaseAndPrefixed,
177
+ Lowercase,
178
+ };
179
+
180
+ /**
181
+ * Creates a new String by converting the given
182
+ * number to hexadecimal format. By default, the
183
+ * result will be uppercase and prefixed with "0X".
184
+ *
185
+ * ```
186
+ * auto str = String::hex(254);
187
+ * assert_str_eq("0XFE", str);
188
+ * ```
189
+ *
190
+ * You can optionally specify the format, one of:
191
+ * - HexFormat::UppercaseAndPrefixed
192
+ * - HexFormat::Uppercase
193
+ * - HexFormat::LowercaseAndPrefixed
194
+ * - HexFormat::Lowercase
195
+ *
196
+ * ```
197
+ * auto str = String::hex(254, String::HexFormat::Lowercase);
198
+ * assert_str_eq("fe", str);
199
+ * ```
200
+ */
201
+ static String hex(long long number, HexFormat format = HexFormat::UppercaseAndPrefixed) {
202
+ bool uppercase = format == HexFormat::UppercaseAndPrefixed || format == HexFormat::Uppercase;
203
+ bool prefixed = format == HexFormat::UppercaseAndPrefixed || format == HexFormat::LowercaseAndPrefixed;
204
+ const char *format_str = uppercase ? "%llX" : "%llx";
205
+ int length = snprintf(NULL, 0, format_str, number);
206
+ char buf[length + 1];
207
+ snprintf(buf, length + 1, format_str, number);
208
+ auto str = String(buf);
209
+ if (prefixed)
210
+ str.prepend(uppercase ? "0X" : "0x");
211
+ return str;
212
+ }
213
+
214
+ virtual ~String() {
215
+ delete[] m_str;
216
+ }
217
+
218
+ /**
219
+ * Replaces the String data by copying from an another String.
220
+ *
221
+ * ```
222
+ * auto str1 = String { "foo" };
223
+ * auto str2 = String { str1 };
224
+ * assert_str_eq("foo", str2);
225
+ * ```
226
+ */
227
+ String &operator=(const String &other) {
228
+ set_str(other.c_str(), other.size());
229
+ return *this;
230
+ }
231
+
232
+ /**
233
+ * Appends two Strings together and returns the result.
234
+ *
235
+ * ```
236
+ * auto str1 = String { "foo" };
237
+ * auto str2 = String { "bar" };
238
+ * assert_str_eq("foobar", str1 + str2);
239
+ *
240
+ * assert_str_eq("12", String("1") + "2");
241
+ * ```
242
+ */
243
+ String operator+(const String &other) const {
244
+ auto new_string = String(*this);
245
+ new_string.append(other);
246
+ return new_string;
247
+ }
248
+
249
+ /**
250
+ * Replaces the String data by copying from an a C string.
251
+ *
252
+ * ```
253
+ * auto cstr = "foo";
254
+ * auto str = String { cstr };
255
+ * assert_str_eq("foo", str);
256
+ * ```
257
+ */
258
+ String &operator=(const char *other) {
259
+ if (other[0] == '\0') {
260
+ truncate(0);
261
+ return *this;
262
+ }
263
+ set_str(other);
264
+ return *this;
265
+ }
266
+
267
+ /**
268
+ * Returns the character at the specified index.
269
+ *
270
+ * ```
271
+ * auto str = String { "abc" };
272
+ * assert_eq('b', str.at(1));
273
+ * ```
274
+ *
275
+ * This method aborts if the given index is beyond the end of the String.
276
+ *
277
+ * ```should_abort
278
+ * auto str = String { "abc" };
279
+ * str.at(10);
280
+ * ```
281
+ */
282
+ char at(size_t index) const {
283
+ assert(index < m_length);
284
+ return m_str[index];
285
+ }
286
+
287
+ /**
288
+ * Returns the character at the specified index.
289
+ *
290
+ * ```
291
+ * auto str = String { "abc" };
292
+ * assert_eq('b', str[1]);
293
+ * ```
294
+ *
295
+ * WARNING: This method does *not* check that the given
296
+ * index is within the bounds of the string data!
297
+ */
298
+ char operator[](size_t index) const {
299
+ return m_str[index];
300
+ }
301
+
302
+ /**
303
+ * Returns a reference to the character at the specified index.
304
+ *
305
+ * ```
306
+ * auto str = String { "abc" };
307
+ * assert_eq('b', str[1]);
308
+ * ```
309
+ *
310
+ * This allows you to set the character at the given index.
311
+ *
312
+ * ```
313
+ * auto str = String { "abc" };
314
+ * str[1] = 'r';
315
+ * assert_eq('r', str[1]);
316
+ * ```
317
+ *
318
+ * WARNING: This method does *not* check that the given
319
+ * index is within the bounds of the string data!
320
+ */
321
+ char &operator[](size_t index) {
322
+ return m_str[index];
323
+ }
324
+
325
+ /**
326
+ * Returns the last character in the string.
327
+ *
328
+ * ```
329
+ * auto str = String { "abc" };
330
+ * assert_eq('c', str.last_char());
331
+ * ```
332
+ *
333
+ * This method aborts if the String is zero-length.
334
+ *
335
+ * ```should_abort
336
+ * auto str = String { "" };
337
+ * str.last_char();
338
+ * ```
339
+ */
340
+ char last_char() const {
341
+ assert(m_length > 0);
342
+ return m_str[m_length - 1];
343
+ }
344
+
345
+ /**
346
+ * Removes the last character from the string and returns it.
347
+ *
348
+ * ```
349
+ * auto str = String { "abc" };
350
+ * assert_eq('c', str.pop_char());
351
+ * assert_eq('b', str.pop_char());
352
+ * assert_eq(1, str.size());
353
+ * ```
354
+ *
355
+ * This method aborts if the String is zero-length.
356
+ *
357
+ * ```should_abort
358
+ * auto str = String { "" };
359
+ * str.pop_char();
360
+ * ```
361
+ */
362
+ char pop_char() {
363
+ assert(m_length > 0);
364
+ return m_str[--m_length];
365
+ }
366
+
367
+ /**
368
+ * Returns a new copy of the String.
369
+ *
370
+ * ```
371
+ * auto str1 = String { "abc" };
372
+ * auto str2 = str1.clone();
373
+ * assert_str_eq("abc", str2);
374
+ * ```
375
+ */
376
+ String clone() const { return String { *this }; }
377
+
378
+ /**
379
+ * Returns a String created from a substring of this one.
380
+ * Pass a start index and the desired length.
381
+ *
382
+ * ```
383
+ * auto str1 = String { "abc" };
384
+ * auto str2 = str1.substring(1, 2);
385
+ * assert_str_eq("bc", str2);
386
+ * ```
387
+ *
388
+ * This method aborts if the given start index is past the end.
389
+ *
390
+ * ```should_abort
391
+ * auto str = String { "abc" };
392
+ * str.substring(3, 1);
393
+ * ```
394
+ *
395
+ * ...and if the resulting end index (start + length) is past the end.
396
+ *
397
+ * ```should_abort
398
+ * auto str = String { "abc" };
399
+ * str.substring(1, 3);
400
+ * ```
401
+ */
402
+ String substring(size_t start, size_t length) const {
403
+ assert(start < m_length);
404
+ assert(start + length <= m_length);
405
+ return String(c_str() + start, length);
406
+ }
407
+
408
+ /**
409
+ * Returns a String created from a substring of this one.
410
+ * Pass a start index. (All characters to the end of the string
411
+ * will be included.)
412
+ *
413
+ * ```
414
+ * auto str1 = String { "abc" };
415
+ * auto str2 = str1.substring(1);
416
+ * assert_str_eq("bc", str2);
417
+ * ```
418
+ *
419
+ * This method aborts if the given start index is past the end.
420
+ *
421
+ * ```should_abort
422
+ * auto str = String { "abc" };
423
+ * str.substring(3);
424
+ * ```
425
+ */
426
+ String substring(size_t start) const {
427
+ return substring(start, m_length - start);
428
+ }
429
+
430
+ /**
431
+ * Returns a C string pointer to the internal data.
432
+ *
433
+ * ```
434
+ * auto str = String { "abc" };
435
+ * auto cstr = str.c_str();
436
+ * assert_eq(0, strcmp(cstr, "abc"));
437
+ * ```
438
+ */
439
+ const char *c_str() const { return m_str ? m_str : ""; }
440
+
441
+ /**
442
+ * Returns the number of bytes in the String.
443
+ *
444
+ * ```
445
+ * auto str = String { "πŸ€–" }; // 4-byte emoji
446
+ * assert_eq(4, str.length());
447
+ * ```
448
+ */
449
+ size_t length() const { return m_length; }
450
+
451
+ /**
452
+ * Returns the number of bytes in the String.
453
+ *
454
+ * ```
455
+ * auto str = String { "πŸ€–" }; // 4-byte emoji
456
+ * assert_eq(4, str.size());
457
+ * ```
458
+ */
459
+ size_t size() const { return m_length; }
460
+
461
+ /**
462
+ * Returns the number of bytes available in internal storage.
463
+ *
464
+ * ```
465
+ * auto str = String { "abc" };
466
+ * str.append_char('d');
467
+ * assert_eq(6, str.capacity()); // the capacity is doubled
468
+ * ```
469
+ */
470
+ size_t capacity() const { return m_capacity; }
471
+
472
+ /**
473
+ * Overwrites the String with the given C string.
474
+ *
475
+ * ```
476
+ * auto str = String { "abc" };
477
+ * str.set_str("xyz");
478
+ * assert_str_eq("xyz", str);
479
+ * ```
480
+ */
481
+ void set_str(const char *str) {
482
+ assert(str);
483
+ auto old_str = m_str;
484
+ m_length = strlen(str);
485
+ m_capacity = m_length;
486
+ m_str = new char[m_length + 1];
487
+ memcpy(m_str, str, sizeof(char) * (m_length + 1));
488
+ if (old_str)
489
+ delete[] old_str;
490
+ }
491
+
492
+ /**
493
+ * Overwrites the String with the given C string with
494
+ * specified length.
495
+ *
496
+ * The given C string can contain null characters.
497
+ * The full given length gets copied regardless of nulls.
498
+ *
499
+ * ```
500
+ * auto str = String { "abc" };
501
+ * str.set_str("def\0ghi", 7);
502
+ * assert_eq(7, str.size());
503
+ * assert_eq('d', str[0]);
504
+ * assert_eq('\0', str[3]);
505
+ * assert_eq('i', str[6]);
506
+ * ```
507
+ */
508
+ void set_str(const char *str, size_t length) {
509
+ assert(str);
510
+ auto old_str = m_str;
511
+ m_str = new char[length + 1];
512
+ memcpy(m_str, str, sizeof(char) * length);
513
+ m_str[length] = 0;
514
+ m_length = length;
515
+ m_capacity = length;
516
+ if (old_str)
517
+ delete[] old_str;
518
+ }
519
+
520
+ /**
521
+ * Inserts the given character character at the front
522
+ * of the string, shifting all the other strings down by one.
523
+ *
524
+ * ```
525
+ * auto str = String { "23" };
526
+ * str.prepend_char('1');
527
+ * assert_str_eq("123", str);
528
+ * ```
529
+ */
530
+ void prepend_char(char c) {
531
+ size_t total_length = m_length + 1;
532
+ grow_at_least(total_length);
533
+ memmove(m_str + 1, m_str, m_length + 1); // 1 extra for null terminator
534
+ m_str[0] = c;
535
+ m_length = total_length;
536
+ }
537
+
538
+ /**
539
+ * Converts the given number and prepends the resulting string.
540
+ *
541
+ * ```
542
+ * auto str = String { "abc" };
543
+ * str.prepend(123);
544
+ * assert_str_eq("123abc", str);
545
+ * ```
546
+ */
547
+ void prepend(long long i) {
548
+ int length = snprintf(NULL, 0, "%lli", i);
549
+ char buf[length + 1];
550
+ snprintf(buf, length + 1, "%lli", i);
551
+ prepend(buf);
552
+ }
553
+
554
+ /**
555
+ * Prepends the given C string.
556
+ *
557
+ * ```
558
+ * auto str = String { "def" };
559
+ * str.prepend("abc");
560
+ * assert_str_eq("abcdef", str);
561
+ * ```
562
+ */
563
+ void prepend(const char *str) {
564
+ if (!str) return;
565
+ size_t new_length = strlen(str);
566
+ if (new_length == 0) return;
567
+ char buf[m_length + 1];
568
+ memcpy(buf, c_str(), sizeof(char) * (m_length + 1));
569
+ set_str(str);
570
+ append(buf);
571
+ }
572
+
573
+ /**
574
+ * Prepends the given String.
575
+ *
576
+ * ```
577
+ * auto str1 = String { "def" };
578
+ * auto str2 = String { "abc" };
579
+ * str1.prepend(str2);
580
+ * assert_str_eq("abcdef", str1);
581
+ * ```
582
+ */
583
+ void prepend(const String &str) {
584
+ size_t new_length = str.size();
585
+ if (new_length == 0) return;
586
+ char buf[new_length + m_length + 1];
587
+ memcpy(buf, str.c_str(), sizeof(char) * new_length);
588
+ memcpy(buf + new_length, c_str(), sizeof(char) * (m_length + 1));
589
+ set_str(buf);
590
+ }
591
+
592
+ /**
593
+ * Inserts at the specified index the character given.
594
+ *
595
+ * ```
596
+ * auto str = String { "xyz" };
597
+ * str.insert(0, '-');
598
+ * str.insert(2, '-');
599
+ * str.insert(4, '-');
600
+ * assert_str_eq("-x-y-z", str);
601
+ * ```
602
+ *
603
+ * This method aborts if the index is past the end.
604
+ *
605
+ * ```should_abort
606
+ * auto str = String { "xxx" };
607
+ * str.insert(3, '-');
608
+ * ```
609
+ */
610
+ void insert(size_t index, char c) {
611
+ assert(index < m_length);
612
+ grow_at_least(m_length + 1);
613
+ size_t nbytes = m_length - index + 1; // 1 extra for null terminator
614
+ memmove(m_str + index + 1, m_str + index, nbytes);
615
+ m_str[index] = c;
616
+ m_length++;
617
+ }
618
+
619
+ /**
620
+ * Adds the given character at the end of the string.
621
+ *
622
+ * ```
623
+ * auto str = String { "ab" };
624
+ * str.append_char('c');
625
+ * assert_str_eq("abc", str);
626
+ * ```
627
+ */
628
+ void append_char(char c) {
629
+ size_t total_length = m_length + 1;
630
+ grow_at_least(total_length);
631
+ m_str[total_length - 1] = c;
632
+ m_str[total_length] = 0;
633
+ m_length = total_length;
634
+ }
635
+
636
+ /**
637
+ * Adds the given signed character at the end of the string.
638
+ *
639
+ * ```
640
+ * auto str = String { "ab" };
641
+ * str.append((signed char)'c');
642
+ * assert_str_eq("abc", str);
643
+ * ```
644
+ */
645
+ void append(signed char c) {
646
+ size_t total_length = m_length + 1;
647
+ grow_at_least(total_length);
648
+ m_str[total_length - 1] = c;
649
+ m_str[total_length] = 0;
650
+ m_length = total_length;
651
+ }
652
+
653
+ /**
654
+ * Adds the given unsigned character at the end of the string.
655
+ *
656
+ * ```
657
+ * auto str = String { "ab" };
658
+ * str.append((unsigned char)'c');
659
+ * assert_str_eq("abc", str);
660
+ * ```
661
+ */
662
+ void append(unsigned char c) {
663
+ append(static_cast<signed char>(c));
664
+ }
665
+
666
+ /**
667
+ * Converts the given number and append the resulting string.
668
+ *
669
+ * ```
670
+ * auto str = String { "a" };
671
+ * str.append((size_t)123);
672
+ * assert_str_eq("a123", str);
673
+ * ```
674
+ */
675
+ void append(size_t i) {
676
+ int length = snprintf(NULL, 0, "%zu", i);
677
+ char buf[length + 1];
678
+ snprintf(buf, length + 1, "%zu", i);
679
+ append(buf);
680
+ }
681
+
682
+ /**
683
+ * Converts the given number and append the resulting string.
684
+ *
685
+ * ```
686
+ * auto str = String { "a" };
687
+ * str.append((long long)123);
688
+ * assert_str_eq("a123", str);
689
+ * ```
690
+ */
691
+ void append(long long i) {
692
+ int length = snprintf(NULL, 0, "%lli", i);
693
+ char buf[length + 1];
694
+ snprintf(buf, length + 1, "%lli", i);
695
+ append(buf);
696
+ }
697
+
698
+ /**
699
+ * Converts the given number and append the resulting string.
700
+ *
701
+ * ```
702
+ * auto str = String { "a" };
703
+ * str.append((int)123);
704
+ * assert_str_eq("a123", str);
705
+ * ```
706
+ */
707
+ void append(int i) {
708
+ int length = snprintf(NULL, 0, "%i", i);
709
+ char buf[length + 1];
710
+ snprintf(buf, length + 1, "%i", i);
711
+ append(buf);
712
+ }
713
+
714
+ /**
715
+ * Appends the given C string.
716
+ *
717
+ * ```
718
+ * auto str = String { "a" };
719
+ * str.append("bc");
720
+ * assert_str_eq("abc", str);
721
+ * ```
722
+ */
723
+ void append(const char *str) {
724
+ if (!str) return;
725
+ size_t length = strlen(str);
726
+ if (length == 0) return;
727
+ append(str, length);
728
+ }
729
+
730
+ /**
731
+ * Appends the given C string with specified length.
732
+ *
733
+ * The given C string can contain null characters.
734
+ * The full given length gets append regardless of nulls.
735
+ *
736
+ * ```
737
+ * auto str = String { "x" };
738
+ * str.append("abc\0def", 7);
739
+ * assert_eq(8, str.size());
740
+ * assert_eq('x', str[0]);
741
+ * assert_eq('\0', str[4]);
742
+ * assert_eq('f', str[7]);
743
+ * ```
744
+ */
745
+ void append(const char *str, size_t length) {
746
+ if (!str) return;
747
+ if (length == 0) return;
748
+ size_t total_length = m_length + length;
749
+ grow_at_least(total_length);
750
+ memcpy(m_str + m_length, str, length);
751
+ m_str[total_length] = 0;
752
+ m_length = total_length;
753
+ }
754
+
755
+ /**
756
+ * Appends the given arguments, formatting using the specified format.
757
+ *
758
+ * This is roughly equivalent to constructing a C string using sprintf()
759
+ * and then appending the result to this String.
760
+ *
761
+ * ```
762
+ * auto str = String { "x" };
763
+ * str.append_sprintf("y%c%d", 'z', 1);
764
+ * assert_str_eq("xyz1", str);
765
+ * ```
766
+ */
767
+ void append_sprintf(const char *format, ...) {
768
+ va_list args;
769
+ va_start(args, format);
770
+ append_vsprintf(format, args);
771
+ va_end(args);
772
+ }
773
+
774
+ /**
775
+ * Appends the given va_list args, formatting using the specified format.
776
+ */
777
+ void append_vsprintf(const char *format, va_list args) {
778
+ va_list args_copy;
779
+ va_copy(args_copy, args);
780
+ int fmt_length = vsnprintf(nullptr, 0, format, args_copy);
781
+ va_end(args_copy);
782
+ char buf[fmt_length + 1];
783
+ vsnprintf(buf, fmt_length + 1, format, args);
784
+ append(buf);
785
+ }
786
+
787
+ /**
788
+ * Appends the given String.
789
+ *
790
+ * ```
791
+ * auto str1 = String { "x" };
792
+ * auto str2 = String { "yz" };
793
+ * str1.append(str2);
794
+ * assert_str_eq("xyz", str1);
795
+ * ```
796
+ */
797
+ void append(const String &str) {
798
+ if (str.size() == 0) return;
799
+ size_t total_length = m_length + str.size();
800
+ grow_at_least(total_length);
801
+ memcpy(m_str + m_length, str.c_str(), sizeof(char) * str.size());
802
+ m_length = total_length;
803
+ m_str[m_length] = 0;
804
+ }
805
+
806
+ /**
807
+ * Repeatedly adds the given number of the given character.
808
+ *
809
+ * ```
810
+ * auto str = String { "x" };
811
+ * str.append(2, 'y');
812
+ * assert_str_eq("xyy", str);
813
+ * ```
814
+ */
815
+ void append(size_t n, char c) {
816
+ size_t total_length = m_length + n;
817
+ grow_at_least(total_length);
818
+ memset(m_str + m_length, c, sizeof(char) * n);
819
+ m_length = total_length;
820
+ m_str[m_length] = 0;
821
+ }
822
+
823
+ /**
824
+ * Returns true if this and the given String are equivalent.
825
+ *
826
+ * ```
827
+ * auto str1 = String { "abc" };
828
+ * auto str2 = String { "abc" };
829
+ * assert(str1 == str2);
830
+ * auto str3 = String { "xyz" };
831
+ * assert_not(str1 == str3);
832
+ * ```
833
+ */
834
+ bool operator==(const String &other) const {
835
+ if (size() != other.size())
836
+ return false;
837
+ return memcmp(c_str(), other.c_str(), sizeof(char) * m_length) == 0;
838
+ }
839
+
840
+ bool operator!=(const String &other) const {
841
+ return !operator==(other);
842
+ }
843
+
844
+ /**
845
+ * Returns true if this and the given C string are equivalent.
846
+ *
847
+ * ```
848
+ * auto str = String { "abc" };
849
+ * auto cstr1 = "abc";
850
+ * assert(str == cstr1);
851
+ * auto cstr2 = "xyz";
852
+ * assert_not(str == cstr2);
853
+ * ```
854
+ */
855
+ bool operator==(const char *other) const {
856
+ assert(other);
857
+ if (size() != strlen(other))
858
+ return false;
859
+ return memcmp(c_str(), other, sizeof(char) * m_length) == 0;
860
+ }
861
+
862
+ bool operator!=(const char *other) const {
863
+ return !(*this == other);
864
+ }
865
+
866
+ /**
867
+ * Returns true if this is alphanumerically greater than the given String.
868
+ *
869
+ * ```
870
+ * auto str1 = String { "def" };
871
+ * auto str2 = String { "abc" };
872
+ * assert(str1 > str2);
873
+ * assert_not(str2 > str1);
874
+ * ```
875
+ */
876
+ bool operator>(const String &other) const {
877
+ // FIXME: cannot use strcmp here
878
+ return strcmp(c_str(), other.c_str()) > 0;
879
+ }
880
+
881
+ /**
882
+ * Returns true if this is alphanumerically less than the given String.
883
+ *
884
+ * ```
885
+ * auto str1 = String { "abc" };
886
+ * auto str2 = String { "def" };
887
+ * assert(str1 < str2);
888
+ * assert_not(str2 < str1);
889
+ * ```
890
+ */
891
+ bool operator<(const String &other) const {
892
+ // FIXME: cannot use strcmp here
893
+ return strcmp(c_str(), other.c_str()) < 0;
894
+ }
895
+
896
+ /**
897
+ * Returns -1, 0, or 1 by comparing this String to the given String.
898
+ * -1 is returned if this String is alphanumerically less than the other one.
899
+ * 0 is returned if they are equivalent.
900
+ * 1 is returned if this String is alphanumerically greater than the other one.
901
+ *
902
+ * ```
903
+ * auto str1 = String { "def" };
904
+ * auto str2 = String { "abc" };
905
+ * assert_eq(1, str1.cmp(str2));
906
+ * assert_eq(-1, str2.cmp(str1));
907
+ * auto str3 = String { "abc" };
908
+ * assert_eq(0, str2.cmp(str3));
909
+ * ```
910
+ */
911
+ int cmp(const String &other) const {
912
+ if (m_length == 0) {
913
+ if (other.m_length == 0)
914
+ return 0;
915
+ return -1;
916
+ }
917
+ size_t i;
918
+ for (i = 0; i < std::min(m_length, other.m_length); ++i) {
919
+ auto c1 = (unsigned char)(*this)[i], c2 = (unsigned char)other[i];
920
+ if (c1 < c2)
921
+ return -1;
922
+ else if (c1 > c2)
923
+ return 1;
924
+ }
925
+ // "x" (len 1) <=> "xx" (len 2)
926
+ // 1 - 2 = -1
927
+ return m_length - other.m_length;
928
+ }
929
+
930
+ /**
931
+ * Finds the given String inside this one and return its starting index.
932
+ * If not found, return -1.
933
+ *
934
+ * ```
935
+ * auto str1 = String { "hello world" };
936
+ * auto str2 = String { "lo" };
937
+ * assert_eq(3, str1.find(str2));
938
+ * auto str3 = String { "xx" };
939
+ * assert_eq(-1, str1.find(str3));
940
+ * ```
941
+ */
942
+ ssize_t find(const String &needle) const {
943
+ if (m_length < needle.size() || needle.is_empty())
944
+ return -1;
945
+ assert(m_str);
946
+ size_t max_index = m_length - needle.size();
947
+ size_t byte_count = sizeof(char) * needle.size();
948
+ for (size_t index = 0; index <= max_index; ++index) {
949
+ if (memcmp(m_str + index, needle.c_str(), byte_count) == 0)
950
+ return index;
951
+ }
952
+ return -1;
953
+ }
954
+
955
+ /**
956
+ * Finds the given charcter inside this String and return its starting index.
957
+ * If not found, return -1.
958
+ *
959
+ * ```
960
+ * auto str = String { "hello world" };
961
+ * assert_eq(6, str.find('w'));
962
+ * assert_eq(-1, str.find('x'));
963
+ * ```
964
+ */
965
+ ssize_t find(const char c) const {
966
+ for (size_t i = 0; i < m_length; ++i) {
967
+ if (c == m_str[i]) return i;
968
+ }
969
+ return -1;
970
+ }
971
+
972
+ /**
973
+ * Truncates this String to the specified length.
974
+ *
975
+ * ```
976
+ * auto str = String { "abcdef" };
977
+ * str.truncate(3);
978
+ * assert_str_eq("abc", str);
979
+ * ```
980
+ *
981
+ * This method aborts if the given length is longer
982
+ * than the String currently is.
983
+ *
984
+ * ```should_abort
985
+ * auto str = String { "abc" };
986
+ * str.truncate(4);
987
+ * ```
988
+ */
989
+ void truncate(size_t length) {
990
+ assert(length <= m_length);
991
+ if (length == 0) {
992
+ delete[] m_str;
993
+ m_str = nullptr;
994
+ m_length = 0;
995
+ m_capacity = 0;
996
+ } else {
997
+ m_str[length] = 0;
998
+ m_length = length;
999
+ }
1000
+ }
1001
+
1002
+ /**
1003
+ * Truncates this String to a length of zero.
1004
+ *
1005
+ * ```
1006
+ * auto str = String { "abcdef" };
1007
+ * str.clear();
1008
+ * assert_eq(0, str.size());
1009
+ * ```
1010
+ */
1011
+ void clear() { truncate(0); }
1012
+
1013
+ /**
1014
+ * Removes one character from the end of the String,
1015
+ * if this String is not already empty.
1016
+ *
1017
+ * ```
1018
+ * auto str = String { "ab" };
1019
+ * str.chomp();
1020
+ * assert_str_eq("a", str);
1021
+ * str.chomp();
1022
+ * assert_str_eq("", str);
1023
+ * str.chomp();
1024
+ * assert_str_eq("", str);
1025
+ * ```
1026
+ */
1027
+ void chomp() {
1028
+ if (m_length == 0) return;
1029
+ truncate(m_length - 1);
1030
+ }
1031
+
1032
+ /**
1033
+ * Removes any trailing whitespace, including tabs and newlines,
1034
+ * from the end of the String.
1035
+ *
1036
+ * ```
1037
+ * auto str = String { "a \t\n " };
1038
+ * str.strip_trailing_whitespace();
1039
+ * assert_str_eq("a", str);
1040
+ * ```
1041
+ */
1042
+ void strip_trailing_whitespace() {
1043
+ while (m_length > 0) {
1044
+ switch (m_str[m_length - 1]) {
1045
+ case ' ':
1046
+ case '\t':
1047
+ case '\n':
1048
+ case '\r':
1049
+ chomp();
1050
+ break;
1051
+ default:
1052
+ return;
1053
+ }
1054
+ }
1055
+ }
1056
+
1057
+ /**
1058
+ * Removes any trailing spaces from the end of the String.
1059
+ *
1060
+ * ```
1061
+ * auto str = String { "a\n " };
1062
+ * str.strip_trailing_spaces();
1063
+ * assert_str_eq("a\n", str);
1064
+ * ```
1065
+ */
1066
+ void strip_trailing_spaces() {
1067
+ while (m_length > 0) {
1068
+ if (m_str[m_length - 1] == ' ')
1069
+ chomp();
1070
+ else
1071
+ return;
1072
+ }
1073
+ }
1074
+
1075
+ /**
1076
+ * Removes all occurrences of the given character
1077
+ * from the String.
1078
+ *
1079
+ * ```
1080
+ * auto str = String { "abcabac" };
1081
+ * str.remove('a');
1082
+ * assert_str_eq("bcbc", str);
1083
+ * ```
1084
+ */
1085
+ void remove(char character) {
1086
+ size_t i;
1087
+ assert(m_str);
1088
+ for (i = 0; i < m_length; ++i) {
1089
+ if (m_str[i] == character) {
1090
+ for (size_t j = i; j < m_length; ++j)
1091
+ m_str[j] = m_str[j + 1];
1092
+
1093
+ --m_length;
1094
+ --i;
1095
+ }
1096
+ }
1097
+ m_str[m_length] = '\0';
1098
+ }
1099
+
1100
+ /**
1101
+ * Retruns true if the String has a length of zero.
1102
+ *
1103
+ * ```
1104
+ * auto str1 = String { "abc" };
1105
+ * auto str2 = String { "" };
1106
+ * assert_not(str1.is_empty());
1107
+ * assert(str2.is_empty());
1108
+ * ```
1109
+ */
1110
+ bool is_empty() const { return m_length == 0; }
1111
+
1112
+ /**
1113
+ * Returns a new String that is the result of incrementing
1114
+ * the last character of this String. If the the last character
1115
+ * is z/Z/9, then the next-to-last character is incremented
1116
+ * (or a new one is prepended) and the last character is reset.
1117
+ *
1118
+ * ```
1119
+ * assert_str_eq("b", String("a").successive());
1120
+ * assert_str_eq("az", String("ay").successive());
1121
+ * assert_str_eq("ba", String("az").successive());
1122
+ * assert_str_eq("aaa", String("zz").successive());
1123
+ * assert_str_eq("AAA", String("ZZ").successive());
1124
+ * assert_str_eq("1", String("0").successive());
1125
+ * assert_str_eq("100", String("99").successive());
1126
+ * assert_str_eq("d000", String("c999").successive());
1127
+ * ```
1128
+ */
1129
+ String successive() {
1130
+ auto result = String { *this };
1131
+ assert(m_length > 0);
1132
+ size_t index = size() - 1;
1133
+ char last_char = m_str[index];
1134
+ if (last_char == 'z') {
1135
+ result.increment_successive_char('a', 'z', 'a');
1136
+ } else if (last_char == 'Z') {
1137
+ result.increment_successive_char('A', 'Z', 'A');
1138
+ } else if (last_char == '9') {
1139
+ result.increment_successive_char('0', '9', '1');
1140
+ } else {
1141
+ result.m_str[index]++;
1142
+ }
1143
+ return result;
1144
+ }
1145
+
1146
+ /**
1147
+ * Returns a new String by appending the given arguments according
1148
+ * to the given format. This is a safer version of of String::sprintf
1149
+ * that does not rely on format specifiers matching the argument type.
1150
+ *
1151
+ * ```
1152
+ * auto cstr = "hello";
1153
+ * unsigned char c = 'w'; // must specify signed or unsigned char
1154
+ * int num = 999;
1155
+ * auto str = String::format("{} {}orld {}", cstr, c, num);
1156
+ * assert_str_eq("hello world 999", str);
1157
+ * ```
1158
+ */
1159
+ template <typename... Args>
1160
+ static String format(const char *fmt, Args... args) {
1161
+ String out {};
1162
+ format(out, fmt, args...);
1163
+ return out;
1164
+ }
1165
+
1166
+ static void format(String &out, const char *fmt) {
1167
+ for (const char *c = fmt; *c != 0; c++) {
1168
+ out.append_char(*c);
1169
+ }
1170
+ }
1171
+
1172
+ template <typename T, typename... Args>
1173
+ static void format(String &out, const char *fmt, T first, Args... rest) {
1174
+ for (const char *c = fmt; *c != 0; c++) {
1175
+ if (*c == '{' && *(c + 1) == '}') {
1176
+ c++;
1177
+ out.append(first);
1178
+ format(out, c + 1, rest...);
1179
+ return;
1180
+ } else {
1181
+ out.append_char(*c);
1182
+ }
1183
+ }
1184
+ }
1185
+
1186
+ /**
1187
+ * Returns a new String where every character is converted to uppercase.
1188
+ *
1189
+ * ```
1190
+ * auto str = String("hElLo");
1191
+ * assert_str_eq("HELLO", str.uppercase());
1192
+ * ```
1193
+ */
1194
+ String uppercase() const {
1195
+ auto new_str = String(this);
1196
+ for (size_t i = 0; i < new_str.m_length; ++i) {
1197
+ new_str.m_str[i] = toupper(new_str.m_str[i]);
1198
+ }
1199
+ return new_str;
1200
+ }
1201
+
1202
+ /**
1203
+ * Returns a new String where every character is converted to lowercase.
1204
+ *
1205
+ * ```
1206
+ * auto str = String("hElLo");
1207
+ * assert_str_eq("hello", str.lowercase());
1208
+ * ```
1209
+ */
1210
+ String lowercase() const {
1211
+ auto new_str = String(this);
1212
+ for (size_t i = 0; i < new_str.m_length; ++i) {
1213
+ new_str.m_str[i] = tolower(new_str.m_str[i]);
1214
+ }
1215
+ return new_str;
1216
+ }
1217
+
1218
+ /**
1219
+ * Returns true if this String ends with the given String.
1220
+ *
1221
+ * ```
1222
+ * auto str = String("hello world");
1223
+ * assert(str.ends_with("world"));
1224
+ * assert_not(str.ends_with("xxx"));
1225
+ * ```
1226
+ */
1227
+ bool ends_with(const String &needle) {
1228
+ if (m_length < needle.m_length)
1229
+ return false;
1230
+ return memcmp(m_str + m_length - needle.m_length, needle.m_str, needle.m_length) == 0;
1231
+ }
1232
+
1233
+ /**
1234
+ * Returns hash value of this String.
1235
+ * This uses the 'djb2' hash algorithm by Dan Bernstein.
1236
+ *
1237
+ * ```
1238
+ * auto str = String("hello");
1239
+ * assert_eq(210714636441, str.djb2_hash());
1240
+ * ```
1241
+ */
1242
+ size_t djb2_hash() const {
1243
+ size_t hash = 5381;
1244
+ int c;
1245
+ for (size_t i = 0; i < m_length; ++i) {
1246
+ c = (*this)[i];
1247
+ hash = ((hash << 5) + hash) + c;
1248
+ }
1249
+ return hash;
1250
+ }
1251
+
1252
+ /**
1253
+ * Prints the full string with printf(), character by character.
1254
+ * This method will print the full String, even if null characters
1255
+ * are encountered.
1256
+ *
1257
+ * ```
1258
+ * auto str = String("foo\0bar");
1259
+ * str.print();
1260
+ * ```
1261
+ */
1262
+ void print() const {
1263
+ for (size_t i = 0; i < m_length; ++i) {
1264
+ printf("%c", (*this)[i]);
1265
+ }
1266
+ printf("\n");
1267
+ }
1268
+
1269
+ /**
1270
+ * Returns true if the string contains UTF-8-encoded
1271
+ * characters that seem to be valid, multibyte or not.
1272
+ *
1273
+ * An ASCII string "foo" would return true, because it's
1274
+ * also a valid UTF-8-encoded string. It can be
1275
+ * represented with UTF-8.
1276
+ *
1277
+ * NOTE: This is not fool-proof. There's a lot of
1278
+ * checks we aren't doing.
1279
+ *
1280
+ * ```
1281
+ * assert_eq(true, String("abc").contains_seemingly_valid_utf8_encoded_characters());
1282
+ * assert_eq(true, String("πŸ„").contains_seemingly_valid_utf8_encoded_characters());
1283
+ * assert_eq(false, String("\xC3").contains_seemingly_valid_utf8_encoded_characters());
1284
+ * ```
1285
+ */
1286
+ bool contains_seemingly_valid_utf8_encoded_characters() const {
1287
+ int index = 0;
1288
+ char buf[5];
1289
+ do {
1290
+ index = next_utf8_char(index, buf);
1291
+ } while (index > 0);
1292
+ return index == 0;
1293
+ }
1294
+
1295
+ /**
1296
+ * Returns true if the string contains multibyte
1297
+ * UTF-8-encoded characters that seem to be valid.
1298
+ *
1299
+ * NOTE: This is not fool-proof. There's a lot of
1300
+ * checks we aren't doing.
1301
+ *
1302
+ * ```
1303
+ * assert_eq(false, String("abc").contains_utf8_encoded_multibyte_characters());
1304
+ * assert_eq(true, String("πŸ„").contains_utf8_encoded_multibyte_characters());
1305
+ * assert_eq(false, String("\xC3").contains_utf8_encoded_multibyte_characters());
1306
+ * ```
1307
+ */
1308
+ bool contains_utf8_encoded_multibyte_characters() const {
1309
+ int index_was = 0;
1310
+ int index = 0;
1311
+ char buf[5];
1312
+ bool multibyte = false;
1313
+ do {
1314
+ index = next_utf8_char(index, buf);
1315
+ if (index > index_was + 1)
1316
+ multibyte = true;
1317
+ index_was = index;
1318
+ } while (index > 0);
1319
+ return index == 0 && multibyte;
1320
+ }
1321
+
1322
+ /**
1323
+ * Takes an integer for the starting index and a
1324
+ * buffer char*. Consumes the proper number of
1325
+ * bytes, appending to the buffer C string to build
1326
+ * a single UTF-8-encoded character.
1327
+ *
1328
+ * Be sure to pass a pointer to a buffer that is
1329
+ * at least 5 characters in size (one is used for
1330
+ * the null terminator).
1331
+ *
1332
+ * Returns the new index, a positive integer,
1333
+ * on success.
1334
+ *
1335
+ * Returns zero (0) if the end of the string has
1336
+ * been reached.
1337
+ *
1338
+ * Returns a negative integer if the character
1339
+ * is invalid.
1340
+ *
1341
+ * (Note: this method only does rudimentary checking
1342
+ * for a valid character, i.e. does it have enough
1343
+ * bytes to satisfy the encoding.)
1344
+ *
1345
+ * ```
1346
+ * auto str = String("abβ˜ΊπŸ„");
1347
+ * int index = 0;
1348
+ *
1349
+ * char buf[5];
1350
+ * index = str.next_utf8_char(index, buf);
1351
+ * assert_eq(1, index);
1352
+ * assert_str_eq("a", String(buf));
1353
+ *
1354
+ * buf[0] = '\0';
1355
+ * index = str.next_utf8_char(index, buf);
1356
+ * assert_eq(2, index);
1357
+ * assert_str_eq("b", String(buf));
1358
+ *
1359
+ * buf[0] = '\0';
1360
+ * index = str.next_utf8_char(index, buf);
1361
+ * assert_eq(5, index);
1362
+ * assert_str_eq("☺", String(buf));
1363
+ *
1364
+ * buf[0] = '\0';
1365
+ * index = str.next_utf8_char(index, buf);
1366
+ * assert_eq(9, index);
1367
+ * assert_str_eq("πŸ„", String(buf));
1368
+ *
1369
+ * buf[0] = '\0';
1370
+ * index = str.next_utf8_char(index, buf);
1371
+ * assert_eq(0, index);
1372
+ * assert_str_eq("", String(buf));
1373
+ * ```
1374
+ */
1375
+ int next_utf8_char(int index, char *buffer) const {
1376
+ if (!m_str)
1377
+ return 0;
1378
+ assert(m_length < INT_MAX);
1379
+ assert(index >= 0);
1380
+ if ((size_t)index >= m_length)
1381
+ return 0;
1382
+ buffer[0] = m_str[index];
1383
+ if (((unsigned char)buffer[0] >> 3) == 30) { // 11110xxx, 4 bytes
1384
+ if ((size_t)index + 3 >= m_length) return -1;
1385
+ buffer[1] = m_str[++index];
1386
+ buffer[2] = m_str[++index];
1387
+ buffer[3] = m_str[++index];
1388
+ buffer[4] = 0;
1389
+ } else if (((unsigned char)buffer[0] >> 4) == 14) { // 1110xxxx, 3 bytes
1390
+ if ((size_t)index + 2 >= m_length) return -1;
1391
+ buffer[1] = m_str[++index];
1392
+ buffer[2] = m_str[++index];
1393
+ buffer[3] = 0;
1394
+ } else if (((unsigned char)buffer[0] >> 5) == 6) { // 110xxxxx, 2 bytes
1395
+ if ((size_t)index + 1 >= m_length) return -1;
1396
+ buffer[1] = m_str[++index];
1397
+ buffer[2] = 0;
1398
+ } else {
1399
+ buffer[1] = 0;
1400
+ }
1401
+ return index + 1;
1402
+ }
1403
+
1404
+ private:
1405
+ void grow(size_t new_capacity) {
1406
+ assert(new_capacity >= m_length);
1407
+ auto old_str = m_str;
1408
+ m_str = new char[new_capacity + 1];
1409
+ if (old_str)
1410
+ memcpy(m_str, old_str, sizeof(char) * (m_capacity + 1));
1411
+ else
1412
+ m_str[0] = '\0';
1413
+ delete[] old_str;
1414
+ m_capacity = new_capacity;
1415
+ }
1416
+
1417
+ void grow_at_least(size_t min_capacity) {
1418
+ if (m_capacity >= min_capacity) return;
1419
+ if (m_capacity > 0 && min_capacity <= m_capacity * STRING_GROW_FACTOR) {
1420
+ grow(m_capacity * STRING_GROW_FACTOR);
1421
+ } else {
1422
+ grow(min_capacity);
1423
+ }
1424
+ }
1425
+
1426
+ void increment_successive_char(char first_char_in_range, char last_char_in_range, char prepend_char_to_grow) {
1427
+ assert(m_length > 0);
1428
+ ssize_t index = m_length - 1;
1429
+ char last_char = m_str[index];
1430
+ while (last_char == last_char_in_range) {
1431
+ m_str[index] = first_char_in_range;
1432
+ if ((--index) < 0)
1433
+ break;
1434
+ last_char = m_str[index];
1435
+ }
1436
+ if (index == -1) {
1437
+ this->prepend_char(prepend_char_to_grow);
1438
+ } else {
1439
+ m_str[index]++;
1440
+ }
1441
+ }
1442
+
1443
+ char *m_str { nullptr };
1444
+ size_t m_length { 0 };
1445
+ size_t m_capacity { 0 };
1446
+ };
1447
+ }