natalie_parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/Dockerfile +26 -0
- data/Gemfile +10 -0
- data/LICENSE +21 -0
- data/README.md +55 -0
- data/Rakefile +242 -0
- data/ext/natalie_parser/extconf.rb +9 -0
- data/ext/natalie_parser/mri_creator.hpp +139 -0
- data/ext/natalie_parser/natalie_parser.cpp +144 -0
- data/include/natalie_parser/creator/debug_creator.hpp +113 -0
- data/include/natalie_parser/creator.hpp +108 -0
- data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
- data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
- data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
- data/include/natalie_parser/lexer.hpp +135 -0
- data/include/natalie_parser/node/alias_node.hpp +35 -0
- data/include/natalie_parser/node/arg_node.hpp +74 -0
- data/include/natalie_parser/node/array_node.hpp +34 -0
- data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
- data/include/natalie_parser/node/assignment_node.hpp +34 -0
- data/include/natalie_parser/node/back_ref_node.hpp +28 -0
- data/include/natalie_parser/node/begin_block_node.hpp +25 -0
- data/include/natalie_parser/node/begin_node.hpp +52 -0
- data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
- data/include/natalie_parser/node/bignum_node.hpp +37 -0
- data/include/natalie_parser/node/block_node.hpp +55 -0
- data/include/natalie_parser/node/block_pass_node.hpp +33 -0
- data/include/natalie_parser/node/break_node.hpp +32 -0
- data/include/natalie_parser/node/call_node.hpp +85 -0
- data/include/natalie_parser/node/case_in_node.hpp +40 -0
- data/include/natalie_parser/node/case_node.hpp +52 -0
- data/include/natalie_parser/node/case_when_node.hpp +43 -0
- data/include/natalie_parser/node/class_node.hpp +39 -0
- data/include/natalie_parser/node/colon2_node.hpp +44 -0
- data/include/natalie_parser/node/colon3_node.hpp +34 -0
- data/include/natalie_parser/node/constant_node.hpp +26 -0
- data/include/natalie_parser/node/def_node.hpp +55 -0
- data/include/natalie_parser/node/defined_node.hpp +33 -0
- data/include/natalie_parser/node/encoding_node.hpp +26 -0
- data/include/natalie_parser/node/end_block_node.hpp +25 -0
- data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
- data/include/natalie_parser/node/false_node.hpp +23 -0
- data/include/natalie_parser/node/fixnum_node.hpp +36 -0
- data/include/natalie_parser/node/float_node.hpp +36 -0
- data/include/natalie_parser/node/hash_node.hpp +34 -0
- data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
- data/include/natalie_parser/node/identifier_node.hpp +123 -0
- data/include/natalie_parser/node/if_node.hpp +43 -0
- data/include/natalie_parser/node/infix_op_node.hpp +46 -0
- data/include/natalie_parser/node/interpolated_node.hpp +33 -0
- data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
- data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
- data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
- data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
- data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
- data/include/natalie_parser/node/iter_node.hpp +45 -0
- data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
- data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
- data/include/natalie_parser/node/logical_and_node.hpp +40 -0
- data/include/natalie_parser/node/logical_or_node.hpp +40 -0
- data/include/natalie_parser/node/match_node.hpp +38 -0
- data/include/natalie_parser/node/module_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
- data/include/natalie_parser/node/next_node.hpp +37 -0
- data/include/natalie_parser/node/nil_node.hpp +23 -0
- data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
- data/include/natalie_parser/node/node.hpp +155 -0
- data/include/natalie_parser/node/node_with_args.hpp +47 -0
- data/include/natalie_parser/node/not_match_node.hpp +35 -0
- data/include/natalie_parser/node/not_node.hpp +37 -0
- data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
- data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
- data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
- data/include/natalie_parser/node/op_assign_node.hpp +47 -0
- data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
- data/include/natalie_parser/node/pin_node.hpp +33 -0
- data/include/natalie_parser/node/range_node.hpp +52 -0
- data/include/natalie_parser/node/redo_node.hpp +20 -0
- data/include/natalie_parser/node/regexp_node.hpp +36 -0
- data/include/natalie_parser/node/retry_node.hpp +20 -0
- data/include/natalie_parser/node/return_node.hpp +34 -0
- data/include/natalie_parser/node/safe_call_node.hpp +31 -0
- data/include/natalie_parser/node/sclass_node.hpp +37 -0
- data/include/natalie_parser/node/self_node.hpp +23 -0
- data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
- data/include/natalie_parser/node/shell_node.hpp +32 -0
- data/include/natalie_parser/node/splat_node.hpp +39 -0
- data/include/natalie_parser/node/splat_value_node.hpp +32 -0
- data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
- data/include/natalie_parser/node/string_node.hpp +42 -0
- data/include/natalie_parser/node/super_node.hpp +44 -0
- data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
- data/include/natalie_parser/node/symbol_node.hpp +30 -0
- data/include/natalie_parser/node/to_array_node.hpp +33 -0
- data/include/natalie_parser/node/true_node.hpp +23 -0
- data/include/natalie_parser/node/unary_op_node.hpp +41 -0
- data/include/natalie_parser/node/undef_node.hpp +31 -0
- data/include/natalie_parser/node/until_node.hpp +21 -0
- data/include/natalie_parser/node/while_node.hpp +52 -0
- data/include/natalie_parser/node/yield_node.hpp +29 -0
- data/include/natalie_parser/node.hpp +89 -0
- data/include/natalie_parser/parser.hpp +218 -0
- data/include/natalie_parser/token.hpp +842 -0
- data/include/tm/defer.hpp +34 -0
- data/include/tm/hashmap.hpp +826 -0
- data/include/tm/macros.hpp +16 -0
- data/include/tm/optional.hpp +223 -0
- data/include/tm/owned_ptr.hpp +186 -0
- data/include/tm/recursion_guard.hpp +156 -0
- data/include/tm/shared_ptr.hpp +259 -0
- data/include/tm/string.hpp +1447 -0
- data/include/tm/tests.hpp +78 -0
- data/include/tm/vector.hpp +796 -0
- data/lib/natalie_parser/sexp.rb +36 -0
- data/lib/natalie_parser/version.rb +5 -0
- data/lib/natalie_parser.rb +3 -0
- data/natalie_parser.gemspec +23 -0
- data/src/lexer/interpolated_string_lexer.cpp +88 -0
- data/src/lexer/regexp_lexer.cpp +95 -0
- data/src/lexer/word_array_lexer.cpp +134 -0
- data/src/lexer.cpp +1703 -0
- data/src/node/alias_node.cpp +11 -0
- data/src/node/assignment_node.cpp +33 -0
- data/src/node/begin_node.cpp +29 -0
- data/src/node/begin_rescue_node.cpp +33 -0
- data/src/node/class_node.cpp +22 -0
- data/src/node/interpolated_regexp_node.cpp +19 -0
- data/src/node/interpolated_shell_node.cpp +25 -0
- data/src/node/interpolated_string_node.cpp +111 -0
- data/src/node/interpolated_symbol_node.cpp +25 -0
- data/src/node/match_node.cpp +14 -0
- data/src/node/module_node.cpp +21 -0
- data/src/node/multiple_assignment_node.cpp +37 -0
- data/src/node/node.cpp +10 -0
- data/src/node/node_with_args.cpp +35 -0
- data/src/node/op_assign_node.cpp +36 -0
- data/src/node/string_node.cpp +33 -0
- data/src/parser.cpp +2972 -0
- data/src/token.cpp +27 -0
- metadata +186 -0
@@ -0,0 +1,1447 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <algorithm>
|
4
|
+
#include <assert.h>
|
5
|
+
#include <ctype.h>
|
6
|
+
#include <limits.h>
|
7
|
+
#include <stdarg.h>
|
8
|
+
#include <stdio.h>
|
9
|
+
#include <string.h>
|
10
|
+
|
11
|
+
namespace TM {
|
12
|
+
|
13
|
+
class String {
|
14
|
+
public:
|
15
|
+
static constexpr int STRING_GROW_FACTOR = 2;
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Constructs an empty String.
|
19
|
+
*
|
20
|
+
* ```
|
21
|
+
* auto str = String();
|
22
|
+
* assert_eq(0, str.size());
|
23
|
+
* ```
|
24
|
+
*/
|
25
|
+
String() { }
|
26
|
+
|
27
|
+
/**
|
28
|
+
* Constructs a new String by copying the contents
|
29
|
+
* from an existing C string.
|
30
|
+
*
|
31
|
+
* ```
|
32
|
+
* auto cstr = "foo";
|
33
|
+
* auto str = String(cstr);
|
34
|
+
* assert_eq(3, str.size());
|
35
|
+
* assert_neq(str.c_str(), cstr);
|
36
|
+
* ```
|
37
|
+
*/
|
38
|
+
String(const char *str) {
|
39
|
+
assert(str);
|
40
|
+
set_str(str);
|
41
|
+
}
|
42
|
+
|
43
|
+
/**
|
44
|
+
* Constructs a new String by copying the contents
|
45
|
+
* from an existing C string with a given length.
|
46
|
+
*
|
47
|
+
* The given C string can contain null characters.
|
48
|
+
* The full given length gets copied regardless of nulls.
|
49
|
+
*
|
50
|
+
* ```
|
51
|
+
* auto cstr = "foo\0bar";
|
52
|
+
* auto str = String(cstr, 7);
|
53
|
+
* assert_eq(7, str.size());
|
54
|
+
* assert_eq('\0', str[3]);
|
55
|
+
* assert_eq('r', str[6]);
|
56
|
+
* ```
|
57
|
+
*/
|
58
|
+
String(const char *str, size_t length) {
|
59
|
+
assert(str);
|
60
|
+
set_str(str, length);
|
61
|
+
}
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Constructs a new String by copying the contents
|
65
|
+
* from an existing String.
|
66
|
+
*
|
67
|
+
* ```
|
68
|
+
* auto str1 = String { "foo" };
|
69
|
+
* auto str2 = String { str1 };
|
70
|
+
* assert_str_eq("foo", str2);
|
71
|
+
* ```
|
72
|
+
*/
|
73
|
+
String(const String &other) {
|
74
|
+
set_str(other.c_str(), other.size());
|
75
|
+
}
|
76
|
+
|
77
|
+
/**
|
78
|
+
* Constructs a new String by copying the contents
|
79
|
+
* from an existing String pointer.
|
80
|
+
*
|
81
|
+
* ```
|
82
|
+
* auto str1 = String { "foo" };
|
83
|
+
* auto str2 = String { &str1 };
|
84
|
+
* assert_str_eq("foo", str2);
|
85
|
+
* ```
|
86
|
+
*/
|
87
|
+
String(const String *other) {
|
88
|
+
assert(other);
|
89
|
+
set_str(other->c_str(), other->size());
|
90
|
+
}
|
91
|
+
|
92
|
+
/**
|
93
|
+
* Constructs a new String with the single given charater.
|
94
|
+
*
|
95
|
+
* ```
|
96
|
+
* auto str = String { 'x' };
|
97
|
+
* assert_str_eq("x", str);
|
98
|
+
* ```
|
99
|
+
*/
|
100
|
+
String(char c) {
|
101
|
+
char buf[2] = { c, 0 };
|
102
|
+
set_str(buf);
|
103
|
+
}
|
104
|
+
|
105
|
+
/**
|
106
|
+
* Constructs a new String of the specified length
|
107
|
+
* by filling it with the given character.
|
108
|
+
*
|
109
|
+
* ```
|
110
|
+
* auto str = String { 10, 'x' };
|
111
|
+
* assert_str_eq("xxxxxxxxxx", str);
|
112
|
+
* ```
|
113
|
+
*/
|
114
|
+
String(size_t length, char c) {
|
115
|
+
char buf[length];
|
116
|
+
memset(buf, c, sizeof(char) * length);
|
117
|
+
set_str(buf, length);
|
118
|
+
}
|
119
|
+
|
120
|
+
/**
|
121
|
+
* Constructs a new String by converting the given number.
|
122
|
+
*
|
123
|
+
* ```
|
124
|
+
* auto str = String { (long long)10 };
|
125
|
+
* assert_str_eq("10", str);
|
126
|
+
* ```
|
127
|
+
*/
|
128
|
+
String(long long number) {
|
129
|
+
int length = snprintf(NULL, 0, "%lli", number);
|
130
|
+
char buf[length + 1];
|
131
|
+
snprintf(buf, length + 1, "%lli", number);
|
132
|
+
set_str(buf);
|
133
|
+
}
|
134
|
+
|
135
|
+
/**
|
136
|
+
* Constructs a new String by converting the given number.
|
137
|
+
*
|
138
|
+
* ```
|
139
|
+
* auto str = String { (int)10 };
|
140
|
+
* assert_str_eq("10", str);
|
141
|
+
* ```
|
142
|
+
*/
|
143
|
+
String(int number) {
|
144
|
+
int length = snprintf(NULL, 0, "%d", number);
|
145
|
+
char buf[length + 1];
|
146
|
+
snprintf(buf, length + 1, "%d", number);
|
147
|
+
set_str(buf);
|
148
|
+
}
|
149
|
+
|
150
|
+
/**
|
151
|
+
* Constructs a new String by converting the given
|
152
|
+
* double-precision float.
|
153
|
+
*
|
154
|
+
* ```
|
155
|
+
* auto str = String { 4.1 };
|
156
|
+
* assert_str_eq("4.1000", str);
|
157
|
+
* ```
|
158
|
+
*
|
159
|
+
* You can optionally specify the decimal precision.
|
160
|
+
*
|
161
|
+
* ```
|
162
|
+
* auto str = String { 4.1, 1 };
|
163
|
+
* assert_str_eq("4.1", str);
|
164
|
+
* ```
|
165
|
+
*/
|
166
|
+
String(double number, int precision = 4) {
|
167
|
+
int length = snprintf(NULL, 0, "%.*f", precision, number);
|
168
|
+
char buf[length + 1];
|
169
|
+
snprintf(buf, length + 1, "%.*f", precision, number);
|
170
|
+
set_str(buf);
|
171
|
+
}
|
172
|
+
|
173
|
+
enum class HexFormat {
|
174
|
+
UppercaseAndPrefixed,
|
175
|
+
Uppercase,
|
176
|
+
LowercaseAndPrefixed,
|
177
|
+
Lowercase,
|
178
|
+
};
|
179
|
+
|
180
|
+
/**
|
181
|
+
* Creates a new String by converting the given
|
182
|
+
* number to hexadecimal format. By default, the
|
183
|
+
* result will be uppercase and prefixed with "0X".
|
184
|
+
*
|
185
|
+
* ```
|
186
|
+
* auto str = String::hex(254);
|
187
|
+
* assert_str_eq("0XFE", str);
|
188
|
+
* ```
|
189
|
+
*
|
190
|
+
* You can optionally specify the format, one of:
|
191
|
+
* - HexFormat::UppercaseAndPrefixed
|
192
|
+
* - HexFormat::Uppercase
|
193
|
+
* - HexFormat::LowercaseAndPrefixed
|
194
|
+
* - HexFormat::Lowercase
|
195
|
+
*
|
196
|
+
* ```
|
197
|
+
* auto str = String::hex(254, String::HexFormat::Lowercase);
|
198
|
+
* assert_str_eq("fe", str);
|
199
|
+
* ```
|
200
|
+
*/
|
201
|
+
static String hex(long long number, HexFormat format = HexFormat::UppercaseAndPrefixed) {
|
202
|
+
bool uppercase = format == HexFormat::UppercaseAndPrefixed || format == HexFormat::Uppercase;
|
203
|
+
bool prefixed = format == HexFormat::UppercaseAndPrefixed || format == HexFormat::LowercaseAndPrefixed;
|
204
|
+
const char *format_str = uppercase ? "%llX" : "%llx";
|
205
|
+
int length = snprintf(NULL, 0, format_str, number);
|
206
|
+
char buf[length + 1];
|
207
|
+
snprintf(buf, length + 1, format_str, number);
|
208
|
+
auto str = String(buf);
|
209
|
+
if (prefixed)
|
210
|
+
str.prepend(uppercase ? "0X" : "0x");
|
211
|
+
return str;
|
212
|
+
}
|
213
|
+
|
214
|
+
virtual ~String() {
|
215
|
+
delete[] m_str;
|
216
|
+
}
|
217
|
+
|
218
|
+
/**
|
219
|
+
* Replaces the String data by copying from an another String.
|
220
|
+
*
|
221
|
+
* ```
|
222
|
+
* auto str1 = String { "foo" };
|
223
|
+
* auto str2 = String { str1 };
|
224
|
+
* assert_str_eq("foo", str2);
|
225
|
+
* ```
|
226
|
+
*/
|
227
|
+
String &operator=(const String &other) {
|
228
|
+
set_str(other.c_str(), other.size());
|
229
|
+
return *this;
|
230
|
+
}
|
231
|
+
|
232
|
+
/**
|
233
|
+
* Appends two Strings together and returns the result.
|
234
|
+
*
|
235
|
+
* ```
|
236
|
+
* auto str1 = String { "foo" };
|
237
|
+
* auto str2 = String { "bar" };
|
238
|
+
* assert_str_eq("foobar", str1 + str2);
|
239
|
+
*
|
240
|
+
* assert_str_eq("12", String("1") + "2");
|
241
|
+
* ```
|
242
|
+
*/
|
243
|
+
String operator+(const String &other) const {
|
244
|
+
auto new_string = String(*this);
|
245
|
+
new_string.append(other);
|
246
|
+
return new_string;
|
247
|
+
}
|
248
|
+
|
249
|
+
/**
|
250
|
+
* Replaces the String data by copying from an a C string.
|
251
|
+
*
|
252
|
+
* ```
|
253
|
+
* auto cstr = "foo";
|
254
|
+
* auto str = String { cstr };
|
255
|
+
* assert_str_eq("foo", str);
|
256
|
+
* ```
|
257
|
+
*/
|
258
|
+
String &operator=(const char *other) {
|
259
|
+
if (other[0] == '\0') {
|
260
|
+
truncate(0);
|
261
|
+
return *this;
|
262
|
+
}
|
263
|
+
set_str(other);
|
264
|
+
return *this;
|
265
|
+
}
|
266
|
+
|
267
|
+
/**
|
268
|
+
* Returns the character at the specified index.
|
269
|
+
*
|
270
|
+
* ```
|
271
|
+
* auto str = String { "abc" };
|
272
|
+
* assert_eq('b', str.at(1));
|
273
|
+
* ```
|
274
|
+
*
|
275
|
+
* This method aborts if the given index is beyond the end of the String.
|
276
|
+
*
|
277
|
+
* ```should_abort
|
278
|
+
* auto str = String { "abc" };
|
279
|
+
* str.at(10);
|
280
|
+
* ```
|
281
|
+
*/
|
282
|
+
char at(size_t index) const {
|
283
|
+
assert(index < m_length);
|
284
|
+
return m_str[index];
|
285
|
+
}
|
286
|
+
|
287
|
+
/**
|
288
|
+
* Returns the character at the specified index.
|
289
|
+
*
|
290
|
+
* ```
|
291
|
+
* auto str = String { "abc" };
|
292
|
+
* assert_eq('b', str[1]);
|
293
|
+
* ```
|
294
|
+
*
|
295
|
+
* WARNING: This method does *not* check that the given
|
296
|
+
* index is within the bounds of the string data!
|
297
|
+
*/
|
298
|
+
char operator[](size_t index) const {
|
299
|
+
return m_str[index];
|
300
|
+
}
|
301
|
+
|
302
|
+
/**
|
303
|
+
* Returns a reference to the character at the specified index.
|
304
|
+
*
|
305
|
+
* ```
|
306
|
+
* auto str = String { "abc" };
|
307
|
+
* assert_eq('b', str[1]);
|
308
|
+
* ```
|
309
|
+
*
|
310
|
+
* This allows you to set the character at the given index.
|
311
|
+
*
|
312
|
+
* ```
|
313
|
+
* auto str = String { "abc" };
|
314
|
+
* str[1] = 'r';
|
315
|
+
* assert_eq('r', str[1]);
|
316
|
+
* ```
|
317
|
+
*
|
318
|
+
* WARNING: This method does *not* check that the given
|
319
|
+
* index is within the bounds of the string data!
|
320
|
+
*/
|
321
|
+
char &operator[](size_t index) {
|
322
|
+
return m_str[index];
|
323
|
+
}
|
324
|
+
|
325
|
+
/**
|
326
|
+
* Returns the last character in the string.
|
327
|
+
*
|
328
|
+
* ```
|
329
|
+
* auto str = String { "abc" };
|
330
|
+
* assert_eq('c', str.last_char());
|
331
|
+
* ```
|
332
|
+
*
|
333
|
+
* This method aborts if the String is zero-length.
|
334
|
+
*
|
335
|
+
* ```should_abort
|
336
|
+
* auto str = String { "" };
|
337
|
+
* str.last_char();
|
338
|
+
* ```
|
339
|
+
*/
|
340
|
+
char last_char() const {
|
341
|
+
assert(m_length > 0);
|
342
|
+
return m_str[m_length - 1];
|
343
|
+
}
|
344
|
+
|
345
|
+
/**
|
346
|
+
* Removes the last character from the string and returns it.
|
347
|
+
*
|
348
|
+
* ```
|
349
|
+
* auto str = String { "abc" };
|
350
|
+
* assert_eq('c', str.pop_char());
|
351
|
+
* assert_eq('b', str.pop_char());
|
352
|
+
* assert_eq(1, str.size());
|
353
|
+
* ```
|
354
|
+
*
|
355
|
+
* This method aborts if the String is zero-length.
|
356
|
+
*
|
357
|
+
* ```should_abort
|
358
|
+
* auto str = String { "" };
|
359
|
+
* str.pop_char();
|
360
|
+
* ```
|
361
|
+
*/
|
362
|
+
char pop_char() {
|
363
|
+
assert(m_length > 0);
|
364
|
+
return m_str[--m_length];
|
365
|
+
}
|
366
|
+
|
367
|
+
/**
|
368
|
+
* Returns a new copy of the String.
|
369
|
+
*
|
370
|
+
* ```
|
371
|
+
* auto str1 = String { "abc" };
|
372
|
+
* auto str2 = str1.clone();
|
373
|
+
* assert_str_eq("abc", str2);
|
374
|
+
* ```
|
375
|
+
*/
|
376
|
+
String clone() const { return String { *this }; }
|
377
|
+
|
378
|
+
/**
|
379
|
+
* Returns a String created from a substring of this one.
|
380
|
+
* Pass a start index and the desired length.
|
381
|
+
*
|
382
|
+
* ```
|
383
|
+
* auto str1 = String { "abc" };
|
384
|
+
* auto str2 = str1.substring(1, 2);
|
385
|
+
* assert_str_eq("bc", str2);
|
386
|
+
* ```
|
387
|
+
*
|
388
|
+
* This method aborts if the given start index is past the end.
|
389
|
+
*
|
390
|
+
* ```should_abort
|
391
|
+
* auto str = String { "abc" };
|
392
|
+
* str.substring(3, 1);
|
393
|
+
* ```
|
394
|
+
*
|
395
|
+
* ...and if the resulting end index (start + length) is past the end.
|
396
|
+
*
|
397
|
+
* ```should_abort
|
398
|
+
* auto str = String { "abc" };
|
399
|
+
* str.substring(1, 3);
|
400
|
+
* ```
|
401
|
+
*/
|
402
|
+
String substring(size_t start, size_t length) const {
|
403
|
+
assert(start < m_length);
|
404
|
+
assert(start + length <= m_length);
|
405
|
+
return String(c_str() + start, length);
|
406
|
+
}
|
407
|
+
|
408
|
+
/**
|
409
|
+
* Returns a String created from a substring of this one.
|
410
|
+
* Pass a start index. (All characters to the end of the string
|
411
|
+
* will be included.)
|
412
|
+
*
|
413
|
+
* ```
|
414
|
+
* auto str1 = String { "abc" };
|
415
|
+
* auto str2 = str1.substring(1);
|
416
|
+
* assert_str_eq("bc", str2);
|
417
|
+
* ```
|
418
|
+
*
|
419
|
+
* This method aborts if the given start index is past the end.
|
420
|
+
*
|
421
|
+
* ```should_abort
|
422
|
+
* auto str = String { "abc" };
|
423
|
+
* str.substring(3);
|
424
|
+
* ```
|
425
|
+
*/
|
426
|
+
String substring(size_t start) const {
|
427
|
+
return substring(start, m_length - start);
|
428
|
+
}
|
429
|
+
|
430
|
+
/**
|
431
|
+
* Returns a C string pointer to the internal data.
|
432
|
+
*
|
433
|
+
* ```
|
434
|
+
* auto str = String { "abc" };
|
435
|
+
* auto cstr = str.c_str();
|
436
|
+
* assert_eq(0, strcmp(cstr, "abc"));
|
437
|
+
* ```
|
438
|
+
*/
|
439
|
+
const char *c_str() const { return m_str ? m_str : ""; }
|
440
|
+
|
441
|
+
/**
|
442
|
+
* Returns the number of bytes in the String.
|
443
|
+
*
|
444
|
+
* ```
|
445
|
+
* auto str = String { "π€" }; // 4-byte emoji
|
446
|
+
* assert_eq(4, str.length());
|
447
|
+
* ```
|
448
|
+
*/
|
449
|
+
size_t length() const { return m_length; }
|
450
|
+
|
451
|
+
/**
|
452
|
+
* Returns the number of bytes in the String.
|
453
|
+
*
|
454
|
+
* ```
|
455
|
+
* auto str = String { "π€" }; // 4-byte emoji
|
456
|
+
* assert_eq(4, str.size());
|
457
|
+
* ```
|
458
|
+
*/
|
459
|
+
size_t size() const { return m_length; }
|
460
|
+
|
461
|
+
/**
|
462
|
+
* Returns the number of bytes available in internal storage.
|
463
|
+
*
|
464
|
+
* ```
|
465
|
+
* auto str = String { "abc" };
|
466
|
+
* str.append_char('d');
|
467
|
+
* assert_eq(6, str.capacity()); // the capacity is doubled
|
468
|
+
* ```
|
469
|
+
*/
|
470
|
+
size_t capacity() const { return m_capacity; }
|
471
|
+
|
472
|
+
/**
|
473
|
+
* Overwrites the String with the given C string.
|
474
|
+
*
|
475
|
+
* ```
|
476
|
+
* auto str = String { "abc" };
|
477
|
+
* str.set_str("xyz");
|
478
|
+
* assert_str_eq("xyz", str);
|
479
|
+
* ```
|
480
|
+
*/
|
481
|
+
void set_str(const char *str) {
|
482
|
+
assert(str);
|
483
|
+
auto old_str = m_str;
|
484
|
+
m_length = strlen(str);
|
485
|
+
m_capacity = m_length;
|
486
|
+
m_str = new char[m_length + 1];
|
487
|
+
memcpy(m_str, str, sizeof(char) * (m_length + 1));
|
488
|
+
if (old_str)
|
489
|
+
delete[] old_str;
|
490
|
+
}
|
491
|
+
|
492
|
+
/**
|
493
|
+
* Overwrites the String with the given C string with
|
494
|
+
* specified length.
|
495
|
+
*
|
496
|
+
* The given C string can contain null characters.
|
497
|
+
* The full given length gets copied regardless of nulls.
|
498
|
+
*
|
499
|
+
* ```
|
500
|
+
* auto str = String { "abc" };
|
501
|
+
* str.set_str("def\0ghi", 7);
|
502
|
+
* assert_eq(7, str.size());
|
503
|
+
* assert_eq('d', str[0]);
|
504
|
+
* assert_eq('\0', str[3]);
|
505
|
+
* assert_eq('i', str[6]);
|
506
|
+
* ```
|
507
|
+
*/
|
508
|
+
void set_str(const char *str, size_t length) {
|
509
|
+
assert(str);
|
510
|
+
auto old_str = m_str;
|
511
|
+
m_str = new char[length + 1];
|
512
|
+
memcpy(m_str, str, sizeof(char) * length);
|
513
|
+
m_str[length] = 0;
|
514
|
+
m_length = length;
|
515
|
+
m_capacity = length;
|
516
|
+
if (old_str)
|
517
|
+
delete[] old_str;
|
518
|
+
}
|
519
|
+
|
520
|
+
/**
|
521
|
+
* Inserts the given character character at the front
|
522
|
+
* of the string, shifting all the other strings down by one.
|
523
|
+
*
|
524
|
+
* ```
|
525
|
+
* auto str = String { "23" };
|
526
|
+
* str.prepend_char('1');
|
527
|
+
* assert_str_eq("123", str);
|
528
|
+
* ```
|
529
|
+
*/
|
530
|
+
void prepend_char(char c) {
|
531
|
+
size_t total_length = m_length + 1;
|
532
|
+
grow_at_least(total_length);
|
533
|
+
memmove(m_str + 1, m_str, m_length + 1); // 1 extra for null terminator
|
534
|
+
m_str[0] = c;
|
535
|
+
m_length = total_length;
|
536
|
+
}
|
537
|
+
|
538
|
+
/**
|
539
|
+
* Converts the given number and prepends the resulting string.
|
540
|
+
*
|
541
|
+
* ```
|
542
|
+
* auto str = String { "abc" };
|
543
|
+
* str.prepend(123);
|
544
|
+
* assert_str_eq("123abc", str);
|
545
|
+
* ```
|
546
|
+
*/
|
547
|
+
void prepend(long long i) {
|
548
|
+
int length = snprintf(NULL, 0, "%lli", i);
|
549
|
+
char buf[length + 1];
|
550
|
+
snprintf(buf, length + 1, "%lli", i);
|
551
|
+
prepend(buf);
|
552
|
+
}
|
553
|
+
|
554
|
+
/**
|
555
|
+
* Prepends the given C string.
|
556
|
+
*
|
557
|
+
* ```
|
558
|
+
* auto str = String { "def" };
|
559
|
+
* str.prepend("abc");
|
560
|
+
* assert_str_eq("abcdef", str);
|
561
|
+
* ```
|
562
|
+
*/
|
563
|
+
void prepend(const char *str) {
|
564
|
+
if (!str) return;
|
565
|
+
size_t new_length = strlen(str);
|
566
|
+
if (new_length == 0) return;
|
567
|
+
char buf[m_length + 1];
|
568
|
+
memcpy(buf, c_str(), sizeof(char) * (m_length + 1));
|
569
|
+
set_str(str);
|
570
|
+
append(buf);
|
571
|
+
}
|
572
|
+
|
573
|
+
/**
|
574
|
+
* Prepends the given String.
|
575
|
+
*
|
576
|
+
* ```
|
577
|
+
* auto str1 = String { "def" };
|
578
|
+
* auto str2 = String { "abc" };
|
579
|
+
* str1.prepend(str2);
|
580
|
+
* assert_str_eq("abcdef", str1);
|
581
|
+
* ```
|
582
|
+
*/
|
583
|
+
void prepend(const String &str) {
|
584
|
+
size_t new_length = str.size();
|
585
|
+
if (new_length == 0) return;
|
586
|
+
char buf[new_length + m_length + 1];
|
587
|
+
memcpy(buf, str.c_str(), sizeof(char) * new_length);
|
588
|
+
memcpy(buf + new_length, c_str(), sizeof(char) * (m_length + 1));
|
589
|
+
set_str(buf);
|
590
|
+
}
|
591
|
+
|
592
|
+
/**
|
593
|
+
* Inserts at the specified index the character given.
|
594
|
+
*
|
595
|
+
* ```
|
596
|
+
* auto str = String { "xyz" };
|
597
|
+
* str.insert(0, '-');
|
598
|
+
* str.insert(2, '-');
|
599
|
+
* str.insert(4, '-');
|
600
|
+
* assert_str_eq("-x-y-z", str);
|
601
|
+
* ```
|
602
|
+
*
|
603
|
+
* This method aborts if the index is past the end.
|
604
|
+
*
|
605
|
+
* ```should_abort
|
606
|
+
* auto str = String { "xxx" };
|
607
|
+
* str.insert(3, '-');
|
608
|
+
* ```
|
609
|
+
*/
|
610
|
+
void insert(size_t index, char c) {
|
611
|
+
assert(index < m_length);
|
612
|
+
grow_at_least(m_length + 1);
|
613
|
+
size_t nbytes = m_length - index + 1; // 1 extra for null terminator
|
614
|
+
memmove(m_str + index + 1, m_str + index, nbytes);
|
615
|
+
m_str[index] = c;
|
616
|
+
m_length++;
|
617
|
+
}
|
618
|
+
|
619
|
+
/**
|
620
|
+
* Adds the given character at the end of the string.
|
621
|
+
*
|
622
|
+
* ```
|
623
|
+
* auto str = String { "ab" };
|
624
|
+
* str.append_char('c');
|
625
|
+
* assert_str_eq("abc", str);
|
626
|
+
* ```
|
627
|
+
*/
|
628
|
+
void append_char(char c) {
|
629
|
+
size_t total_length = m_length + 1;
|
630
|
+
grow_at_least(total_length);
|
631
|
+
m_str[total_length - 1] = c;
|
632
|
+
m_str[total_length] = 0;
|
633
|
+
m_length = total_length;
|
634
|
+
}
|
635
|
+
|
636
|
+
/**
|
637
|
+
* Adds the given signed character at the end of the string.
|
638
|
+
*
|
639
|
+
* ```
|
640
|
+
* auto str = String { "ab" };
|
641
|
+
* str.append((signed char)'c');
|
642
|
+
* assert_str_eq("abc", str);
|
643
|
+
* ```
|
644
|
+
*/
|
645
|
+
void append(signed char c) {
|
646
|
+
size_t total_length = m_length + 1;
|
647
|
+
grow_at_least(total_length);
|
648
|
+
m_str[total_length - 1] = c;
|
649
|
+
m_str[total_length] = 0;
|
650
|
+
m_length = total_length;
|
651
|
+
}
|
652
|
+
|
653
|
+
/**
|
654
|
+
* Adds the given unsigned character at the end of the string.
|
655
|
+
*
|
656
|
+
* ```
|
657
|
+
* auto str = String { "ab" };
|
658
|
+
* str.append((unsigned char)'c');
|
659
|
+
* assert_str_eq("abc", str);
|
660
|
+
* ```
|
661
|
+
*/
|
662
|
+
void append(unsigned char c) {
|
663
|
+
append(static_cast<signed char>(c));
|
664
|
+
}
|
665
|
+
|
666
|
+
/**
|
667
|
+
* Converts the given number and append the resulting string.
|
668
|
+
*
|
669
|
+
* ```
|
670
|
+
* auto str = String { "a" };
|
671
|
+
* str.append((size_t)123);
|
672
|
+
* assert_str_eq("a123", str);
|
673
|
+
* ```
|
674
|
+
*/
|
675
|
+
void append(size_t i) {
|
676
|
+
int length = snprintf(NULL, 0, "%zu", i);
|
677
|
+
char buf[length + 1];
|
678
|
+
snprintf(buf, length + 1, "%zu", i);
|
679
|
+
append(buf);
|
680
|
+
}
|
681
|
+
|
682
|
+
/**
|
683
|
+
* Converts the given number and append the resulting string.
|
684
|
+
*
|
685
|
+
* ```
|
686
|
+
* auto str = String { "a" };
|
687
|
+
* str.append((long long)123);
|
688
|
+
* assert_str_eq("a123", str);
|
689
|
+
* ```
|
690
|
+
*/
|
691
|
+
void append(long long i) {
|
692
|
+
int length = snprintf(NULL, 0, "%lli", i);
|
693
|
+
char buf[length + 1];
|
694
|
+
snprintf(buf, length + 1, "%lli", i);
|
695
|
+
append(buf);
|
696
|
+
}
|
697
|
+
|
698
|
+
/**
|
699
|
+
* Converts the given number and append the resulting string.
|
700
|
+
*
|
701
|
+
* ```
|
702
|
+
* auto str = String { "a" };
|
703
|
+
* str.append((int)123);
|
704
|
+
* assert_str_eq("a123", str);
|
705
|
+
* ```
|
706
|
+
*/
|
707
|
+
void append(int i) {
|
708
|
+
int length = snprintf(NULL, 0, "%i", i);
|
709
|
+
char buf[length + 1];
|
710
|
+
snprintf(buf, length + 1, "%i", i);
|
711
|
+
append(buf);
|
712
|
+
}
|
713
|
+
|
714
|
+
/**
|
715
|
+
* Appends the given C string.
|
716
|
+
*
|
717
|
+
* ```
|
718
|
+
* auto str = String { "a" };
|
719
|
+
* str.append("bc");
|
720
|
+
* assert_str_eq("abc", str);
|
721
|
+
* ```
|
722
|
+
*/
|
723
|
+
void append(const char *str) {
|
724
|
+
if (!str) return;
|
725
|
+
size_t length = strlen(str);
|
726
|
+
if (length == 0) return;
|
727
|
+
append(str, length);
|
728
|
+
}
|
729
|
+
|
730
|
+
/**
|
731
|
+
* Appends the given C string with specified length.
|
732
|
+
*
|
733
|
+
* The given C string can contain null characters.
|
734
|
+
* The full given length gets append regardless of nulls.
|
735
|
+
*
|
736
|
+
* ```
|
737
|
+
* auto str = String { "x" };
|
738
|
+
* str.append("abc\0def", 7);
|
739
|
+
* assert_eq(8, str.size());
|
740
|
+
* assert_eq('x', str[0]);
|
741
|
+
* assert_eq('\0', str[4]);
|
742
|
+
* assert_eq('f', str[7]);
|
743
|
+
* ```
|
744
|
+
*/
|
745
|
+
void append(const char *str, size_t length) {
|
746
|
+
if (!str) return;
|
747
|
+
if (length == 0) return;
|
748
|
+
size_t total_length = m_length + length;
|
749
|
+
grow_at_least(total_length);
|
750
|
+
memcpy(m_str + m_length, str, length);
|
751
|
+
m_str[total_length] = 0;
|
752
|
+
m_length = total_length;
|
753
|
+
}
|
754
|
+
|
755
|
+
/**
|
756
|
+
* Appends the given arguments, formatting using the specified format.
|
757
|
+
*
|
758
|
+
* This is roughly equivalent to constructing a C string using sprintf()
|
759
|
+
* and then appending the result to this String.
|
760
|
+
*
|
761
|
+
* ```
|
762
|
+
* auto str = String { "x" };
|
763
|
+
* str.append_sprintf("y%c%d", 'z', 1);
|
764
|
+
* assert_str_eq("xyz1", str);
|
765
|
+
* ```
|
766
|
+
*/
|
767
|
+
void append_sprintf(const char *format, ...) {
|
768
|
+
va_list args;
|
769
|
+
va_start(args, format);
|
770
|
+
append_vsprintf(format, args);
|
771
|
+
va_end(args);
|
772
|
+
}
|
773
|
+
|
774
|
+
/**
|
775
|
+
* Appends the given va_list args, formatting using the specified format.
|
776
|
+
*/
|
777
|
+
void append_vsprintf(const char *format, va_list args) {
|
778
|
+
va_list args_copy;
|
779
|
+
va_copy(args_copy, args);
|
780
|
+
int fmt_length = vsnprintf(nullptr, 0, format, args_copy);
|
781
|
+
va_end(args_copy);
|
782
|
+
char buf[fmt_length + 1];
|
783
|
+
vsnprintf(buf, fmt_length + 1, format, args);
|
784
|
+
append(buf);
|
785
|
+
}
|
786
|
+
|
787
|
+
/**
|
788
|
+
* Appends the given String.
|
789
|
+
*
|
790
|
+
* ```
|
791
|
+
* auto str1 = String { "x" };
|
792
|
+
* auto str2 = String { "yz" };
|
793
|
+
* str1.append(str2);
|
794
|
+
* assert_str_eq("xyz", str1);
|
795
|
+
* ```
|
796
|
+
*/
|
797
|
+
void append(const String &str) {
|
798
|
+
if (str.size() == 0) return;
|
799
|
+
size_t total_length = m_length + str.size();
|
800
|
+
grow_at_least(total_length);
|
801
|
+
memcpy(m_str + m_length, str.c_str(), sizeof(char) * str.size());
|
802
|
+
m_length = total_length;
|
803
|
+
m_str[m_length] = 0;
|
804
|
+
}
|
805
|
+
|
806
|
+
/**
|
807
|
+
* Repeatedly adds the given number of the given character.
|
808
|
+
*
|
809
|
+
* ```
|
810
|
+
* auto str = String { "x" };
|
811
|
+
* str.append(2, 'y');
|
812
|
+
* assert_str_eq("xyy", str);
|
813
|
+
* ```
|
814
|
+
*/
|
815
|
+
void append(size_t n, char c) {
|
816
|
+
size_t total_length = m_length + n;
|
817
|
+
grow_at_least(total_length);
|
818
|
+
memset(m_str + m_length, c, sizeof(char) * n);
|
819
|
+
m_length = total_length;
|
820
|
+
m_str[m_length] = 0;
|
821
|
+
}
|
822
|
+
|
823
|
+
/**
|
824
|
+
* Returns true if this and the given String are equivalent.
|
825
|
+
*
|
826
|
+
* ```
|
827
|
+
* auto str1 = String { "abc" };
|
828
|
+
* auto str2 = String { "abc" };
|
829
|
+
* assert(str1 == str2);
|
830
|
+
* auto str3 = String { "xyz" };
|
831
|
+
* assert_not(str1 == str3);
|
832
|
+
* ```
|
833
|
+
*/
|
834
|
+
bool operator==(const String &other) const {
|
835
|
+
if (size() != other.size())
|
836
|
+
return false;
|
837
|
+
return memcmp(c_str(), other.c_str(), sizeof(char) * m_length) == 0;
|
838
|
+
}
|
839
|
+
|
840
|
+
bool operator!=(const String &other) const {
|
841
|
+
return !operator==(other);
|
842
|
+
}
|
843
|
+
|
844
|
+
/**
|
845
|
+
* Returns true if this and the given C string are equivalent.
|
846
|
+
*
|
847
|
+
* ```
|
848
|
+
* auto str = String { "abc" };
|
849
|
+
* auto cstr1 = "abc";
|
850
|
+
* assert(str == cstr1);
|
851
|
+
* auto cstr2 = "xyz";
|
852
|
+
* assert_not(str == cstr2);
|
853
|
+
* ```
|
854
|
+
*/
|
855
|
+
bool operator==(const char *other) const {
|
856
|
+
assert(other);
|
857
|
+
if (size() != strlen(other))
|
858
|
+
return false;
|
859
|
+
return memcmp(c_str(), other, sizeof(char) * m_length) == 0;
|
860
|
+
}
|
861
|
+
|
862
|
+
bool operator!=(const char *other) const {
|
863
|
+
return !(*this == other);
|
864
|
+
}
|
865
|
+
|
866
|
+
/**
|
867
|
+
* Returns true if this is alphanumerically greater than the given String.
|
868
|
+
*
|
869
|
+
* ```
|
870
|
+
* auto str1 = String { "def" };
|
871
|
+
* auto str2 = String { "abc" };
|
872
|
+
* assert(str1 > str2);
|
873
|
+
* assert_not(str2 > str1);
|
874
|
+
* ```
|
875
|
+
*/
|
876
|
+
bool operator>(const String &other) const {
|
877
|
+
// FIXME: cannot use strcmp here
|
878
|
+
return strcmp(c_str(), other.c_str()) > 0;
|
879
|
+
}
|
880
|
+
|
881
|
+
/**
|
882
|
+
* Returns true if this is alphanumerically less than the given String.
|
883
|
+
*
|
884
|
+
* ```
|
885
|
+
* auto str1 = String { "abc" };
|
886
|
+
* auto str2 = String { "def" };
|
887
|
+
* assert(str1 < str2);
|
888
|
+
* assert_not(str2 < str1);
|
889
|
+
* ```
|
890
|
+
*/
|
891
|
+
bool operator<(const String &other) const {
|
892
|
+
// FIXME: cannot use strcmp here
|
893
|
+
return strcmp(c_str(), other.c_str()) < 0;
|
894
|
+
}
|
895
|
+
|
896
|
+
/**
|
897
|
+
* Returns -1, 0, or 1 by comparing this String to the given String.
|
898
|
+
* -1 is returned if this String is alphanumerically less than the other one.
|
899
|
+
* 0 is returned if they are equivalent.
|
900
|
+
* 1 is returned if this String is alphanumerically greater than the other one.
|
901
|
+
*
|
902
|
+
* ```
|
903
|
+
* auto str1 = String { "def" };
|
904
|
+
* auto str2 = String { "abc" };
|
905
|
+
* assert_eq(1, str1.cmp(str2));
|
906
|
+
* assert_eq(-1, str2.cmp(str1));
|
907
|
+
* auto str3 = String { "abc" };
|
908
|
+
* assert_eq(0, str2.cmp(str3));
|
909
|
+
* ```
|
910
|
+
*/
|
911
|
+
int cmp(const String &other) const {
|
912
|
+
if (m_length == 0) {
|
913
|
+
if (other.m_length == 0)
|
914
|
+
return 0;
|
915
|
+
return -1;
|
916
|
+
}
|
917
|
+
size_t i;
|
918
|
+
for (i = 0; i < std::min(m_length, other.m_length); ++i) {
|
919
|
+
auto c1 = (unsigned char)(*this)[i], c2 = (unsigned char)other[i];
|
920
|
+
if (c1 < c2)
|
921
|
+
return -1;
|
922
|
+
else if (c1 > c2)
|
923
|
+
return 1;
|
924
|
+
}
|
925
|
+
// "x" (len 1) <=> "xx" (len 2)
|
926
|
+
// 1 - 2 = -1
|
927
|
+
return m_length - other.m_length;
|
928
|
+
}
|
929
|
+
|
930
|
+
/**
|
931
|
+
* Finds the given String inside this one and return its starting index.
|
932
|
+
* If not found, return -1.
|
933
|
+
*
|
934
|
+
* ```
|
935
|
+
* auto str1 = String { "hello world" };
|
936
|
+
* auto str2 = String { "lo" };
|
937
|
+
* assert_eq(3, str1.find(str2));
|
938
|
+
* auto str3 = String { "xx" };
|
939
|
+
* assert_eq(-1, str1.find(str3));
|
940
|
+
* ```
|
941
|
+
*/
|
942
|
+
ssize_t find(const String &needle) const {
|
943
|
+
if (m_length < needle.size() || needle.is_empty())
|
944
|
+
return -1;
|
945
|
+
assert(m_str);
|
946
|
+
size_t max_index = m_length - needle.size();
|
947
|
+
size_t byte_count = sizeof(char) * needle.size();
|
948
|
+
for (size_t index = 0; index <= max_index; ++index) {
|
949
|
+
if (memcmp(m_str + index, needle.c_str(), byte_count) == 0)
|
950
|
+
return index;
|
951
|
+
}
|
952
|
+
return -1;
|
953
|
+
}
|
954
|
+
|
955
|
+
/**
|
956
|
+
* Finds the given charcter inside this String and return its starting index.
|
957
|
+
* If not found, return -1.
|
958
|
+
*
|
959
|
+
* ```
|
960
|
+
* auto str = String { "hello world" };
|
961
|
+
* assert_eq(6, str.find('w'));
|
962
|
+
* assert_eq(-1, str.find('x'));
|
963
|
+
* ```
|
964
|
+
*/
|
965
|
+
ssize_t find(const char c) const {
|
966
|
+
for (size_t i = 0; i < m_length; ++i) {
|
967
|
+
if (c == m_str[i]) return i;
|
968
|
+
}
|
969
|
+
return -1;
|
970
|
+
}
|
971
|
+
|
972
|
+
/**
|
973
|
+
* Truncates this String to the specified length.
|
974
|
+
*
|
975
|
+
* ```
|
976
|
+
* auto str = String { "abcdef" };
|
977
|
+
* str.truncate(3);
|
978
|
+
* assert_str_eq("abc", str);
|
979
|
+
* ```
|
980
|
+
*
|
981
|
+
* This method aborts if the given length is longer
|
982
|
+
* than the String currently is.
|
983
|
+
*
|
984
|
+
* ```should_abort
|
985
|
+
* auto str = String { "abc" };
|
986
|
+
* str.truncate(4);
|
987
|
+
* ```
|
988
|
+
*/
|
989
|
+
void truncate(size_t length) {
|
990
|
+
assert(length <= m_length);
|
991
|
+
if (length == 0) {
|
992
|
+
delete[] m_str;
|
993
|
+
m_str = nullptr;
|
994
|
+
m_length = 0;
|
995
|
+
m_capacity = 0;
|
996
|
+
} else {
|
997
|
+
m_str[length] = 0;
|
998
|
+
m_length = length;
|
999
|
+
}
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
/**
|
1003
|
+
* Truncates this String to a length of zero.
|
1004
|
+
*
|
1005
|
+
* ```
|
1006
|
+
* auto str = String { "abcdef" };
|
1007
|
+
* str.clear();
|
1008
|
+
* assert_eq(0, str.size());
|
1009
|
+
* ```
|
1010
|
+
*/
|
1011
|
+
void clear() { truncate(0); }
|
1012
|
+
|
1013
|
+
/**
|
1014
|
+
* Removes one character from the end of the String,
|
1015
|
+
* if this String is not already empty.
|
1016
|
+
*
|
1017
|
+
* ```
|
1018
|
+
* auto str = String { "ab" };
|
1019
|
+
* str.chomp();
|
1020
|
+
* assert_str_eq("a", str);
|
1021
|
+
* str.chomp();
|
1022
|
+
* assert_str_eq("", str);
|
1023
|
+
* str.chomp();
|
1024
|
+
* assert_str_eq("", str);
|
1025
|
+
* ```
|
1026
|
+
*/
|
1027
|
+
void chomp() {
|
1028
|
+
if (m_length == 0) return;
|
1029
|
+
truncate(m_length - 1);
|
1030
|
+
}
|
1031
|
+
|
1032
|
+
/**
|
1033
|
+
* Removes any trailing whitespace, including tabs and newlines,
|
1034
|
+
* from the end of the String.
|
1035
|
+
*
|
1036
|
+
* ```
|
1037
|
+
* auto str = String { "a \t\n " };
|
1038
|
+
* str.strip_trailing_whitespace();
|
1039
|
+
* assert_str_eq("a", str);
|
1040
|
+
* ```
|
1041
|
+
*/
|
1042
|
+
void strip_trailing_whitespace() {
|
1043
|
+
while (m_length > 0) {
|
1044
|
+
switch (m_str[m_length - 1]) {
|
1045
|
+
case ' ':
|
1046
|
+
case '\t':
|
1047
|
+
case '\n':
|
1048
|
+
case '\r':
|
1049
|
+
chomp();
|
1050
|
+
break;
|
1051
|
+
default:
|
1052
|
+
return;
|
1053
|
+
}
|
1054
|
+
}
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
/**
|
1058
|
+
* Removes any trailing spaces from the end of the String.
|
1059
|
+
*
|
1060
|
+
* ```
|
1061
|
+
* auto str = String { "a\n " };
|
1062
|
+
* str.strip_trailing_spaces();
|
1063
|
+
* assert_str_eq("a\n", str);
|
1064
|
+
* ```
|
1065
|
+
*/
|
1066
|
+
void strip_trailing_spaces() {
|
1067
|
+
while (m_length > 0) {
|
1068
|
+
if (m_str[m_length - 1] == ' ')
|
1069
|
+
chomp();
|
1070
|
+
else
|
1071
|
+
return;
|
1072
|
+
}
|
1073
|
+
}
|
1074
|
+
|
1075
|
+
/**
|
1076
|
+
* Removes all occurrences of the given character
|
1077
|
+
* from the String.
|
1078
|
+
*
|
1079
|
+
* ```
|
1080
|
+
* auto str = String { "abcabac" };
|
1081
|
+
* str.remove('a');
|
1082
|
+
* assert_str_eq("bcbc", str);
|
1083
|
+
* ```
|
1084
|
+
*/
|
1085
|
+
void remove(char character) {
|
1086
|
+
size_t i;
|
1087
|
+
assert(m_str);
|
1088
|
+
for (i = 0; i < m_length; ++i) {
|
1089
|
+
if (m_str[i] == character) {
|
1090
|
+
for (size_t j = i; j < m_length; ++j)
|
1091
|
+
m_str[j] = m_str[j + 1];
|
1092
|
+
|
1093
|
+
--m_length;
|
1094
|
+
--i;
|
1095
|
+
}
|
1096
|
+
}
|
1097
|
+
m_str[m_length] = '\0';
|
1098
|
+
}
|
1099
|
+
|
1100
|
+
/**
|
1101
|
+
* Retruns true if the String has a length of zero.
|
1102
|
+
*
|
1103
|
+
* ```
|
1104
|
+
* auto str1 = String { "abc" };
|
1105
|
+
* auto str2 = String { "" };
|
1106
|
+
* assert_not(str1.is_empty());
|
1107
|
+
* assert(str2.is_empty());
|
1108
|
+
* ```
|
1109
|
+
*/
|
1110
|
+
bool is_empty() const { return m_length == 0; }
|
1111
|
+
|
1112
|
+
/**
|
1113
|
+
* Returns a new String that is the result of incrementing
|
1114
|
+
* the last character of this String. If the the last character
|
1115
|
+
* is z/Z/9, then the next-to-last character is incremented
|
1116
|
+
* (or a new one is prepended) and the last character is reset.
|
1117
|
+
*
|
1118
|
+
* ```
|
1119
|
+
* assert_str_eq("b", String("a").successive());
|
1120
|
+
* assert_str_eq("az", String("ay").successive());
|
1121
|
+
* assert_str_eq("ba", String("az").successive());
|
1122
|
+
* assert_str_eq("aaa", String("zz").successive());
|
1123
|
+
* assert_str_eq("AAA", String("ZZ").successive());
|
1124
|
+
* assert_str_eq("1", String("0").successive());
|
1125
|
+
* assert_str_eq("100", String("99").successive());
|
1126
|
+
* assert_str_eq("d000", String("c999").successive());
|
1127
|
+
* ```
|
1128
|
+
*/
|
1129
|
+
String successive() {
|
1130
|
+
auto result = String { *this };
|
1131
|
+
assert(m_length > 0);
|
1132
|
+
size_t index = size() - 1;
|
1133
|
+
char last_char = m_str[index];
|
1134
|
+
if (last_char == 'z') {
|
1135
|
+
result.increment_successive_char('a', 'z', 'a');
|
1136
|
+
} else if (last_char == 'Z') {
|
1137
|
+
result.increment_successive_char('A', 'Z', 'A');
|
1138
|
+
} else if (last_char == '9') {
|
1139
|
+
result.increment_successive_char('0', '9', '1');
|
1140
|
+
} else {
|
1141
|
+
result.m_str[index]++;
|
1142
|
+
}
|
1143
|
+
return result;
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
/**
|
1147
|
+
* Returns a new String by appending the given arguments according
|
1148
|
+
* to the given format. This is a safer version of of String::sprintf
|
1149
|
+
* that does not rely on format specifiers matching the argument type.
|
1150
|
+
*
|
1151
|
+
* ```
|
1152
|
+
* auto cstr = "hello";
|
1153
|
+
* unsigned char c = 'w'; // must specify signed or unsigned char
|
1154
|
+
* int num = 999;
|
1155
|
+
* auto str = String::format("{} {}orld {}", cstr, c, num);
|
1156
|
+
* assert_str_eq("hello world 999", str);
|
1157
|
+
* ```
|
1158
|
+
*/
|
1159
|
+
template <typename... Args>
|
1160
|
+
static String format(const char *fmt, Args... args) {
|
1161
|
+
String out {};
|
1162
|
+
format(out, fmt, args...);
|
1163
|
+
return out;
|
1164
|
+
}
|
1165
|
+
|
1166
|
+
static void format(String &out, const char *fmt) {
|
1167
|
+
for (const char *c = fmt; *c != 0; c++) {
|
1168
|
+
out.append_char(*c);
|
1169
|
+
}
|
1170
|
+
}
|
1171
|
+
|
1172
|
+
template <typename T, typename... Args>
|
1173
|
+
static void format(String &out, const char *fmt, T first, Args... rest) {
|
1174
|
+
for (const char *c = fmt; *c != 0; c++) {
|
1175
|
+
if (*c == '{' && *(c + 1) == '}') {
|
1176
|
+
c++;
|
1177
|
+
out.append(first);
|
1178
|
+
format(out, c + 1, rest...);
|
1179
|
+
return;
|
1180
|
+
} else {
|
1181
|
+
out.append_char(*c);
|
1182
|
+
}
|
1183
|
+
}
|
1184
|
+
}
|
1185
|
+
|
1186
|
+
/**
|
1187
|
+
* Returns a new String where every character is converted to uppercase.
|
1188
|
+
*
|
1189
|
+
* ```
|
1190
|
+
* auto str = String("hElLo");
|
1191
|
+
* assert_str_eq("HELLO", str.uppercase());
|
1192
|
+
* ```
|
1193
|
+
*/
|
1194
|
+
String uppercase() const {
|
1195
|
+
auto new_str = String(this);
|
1196
|
+
for (size_t i = 0; i < new_str.m_length; ++i) {
|
1197
|
+
new_str.m_str[i] = toupper(new_str.m_str[i]);
|
1198
|
+
}
|
1199
|
+
return new_str;
|
1200
|
+
}
|
1201
|
+
|
1202
|
+
/**
|
1203
|
+
* Returns a new String where every character is converted to lowercase.
|
1204
|
+
*
|
1205
|
+
* ```
|
1206
|
+
* auto str = String("hElLo");
|
1207
|
+
* assert_str_eq("hello", str.lowercase());
|
1208
|
+
* ```
|
1209
|
+
*/
|
1210
|
+
String lowercase() const {
|
1211
|
+
auto new_str = String(this);
|
1212
|
+
for (size_t i = 0; i < new_str.m_length; ++i) {
|
1213
|
+
new_str.m_str[i] = tolower(new_str.m_str[i]);
|
1214
|
+
}
|
1215
|
+
return new_str;
|
1216
|
+
}
|
1217
|
+
|
1218
|
+
/**
|
1219
|
+
* Returns true if this String ends with the given String.
|
1220
|
+
*
|
1221
|
+
* ```
|
1222
|
+
* auto str = String("hello world");
|
1223
|
+
* assert(str.ends_with("world"));
|
1224
|
+
* assert_not(str.ends_with("xxx"));
|
1225
|
+
* ```
|
1226
|
+
*/
|
1227
|
+
bool ends_with(const String &needle) {
|
1228
|
+
if (m_length < needle.m_length)
|
1229
|
+
return false;
|
1230
|
+
return memcmp(m_str + m_length - needle.m_length, needle.m_str, needle.m_length) == 0;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
/**
|
1234
|
+
* Returns hash value of this String.
|
1235
|
+
* This uses the 'djb2' hash algorithm by Dan Bernstein.
|
1236
|
+
*
|
1237
|
+
* ```
|
1238
|
+
* auto str = String("hello");
|
1239
|
+
* assert_eq(210714636441, str.djb2_hash());
|
1240
|
+
* ```
|
1241
|
+
*/
|
1242
|
+
size_t djb2_hash() const {
|
1243
|
+
size_t hash = 5381;
|
1244
|
+
int c;
|
1245
|
+
for (size_t i = 0; i < m_length; ++i) {
|
1246
|
+
c = (*this)[i];
|
1247
|
+
hash = ((hash << 5) + hash) + c;
|
1248
|
+
}
|
1249
|
+
return hash;
|
1250
|
+
}
|
1251
|
+
|
1252
|
+
/**
|
1253
|
+
* Prints the full string with printf(), character by character.
|
1254
|
+
* This method will print the full String, even if null characters
|
1255
|
+
* are encountered.
|
1256
|
+
*
|
1257
|
+
* ```
|
1258
|
+
* auto str = String("foo\0bar");
|
1259
|
+
* str.print();
|
1260
|
+
* ```
|
1261
|
+
*/
|
1262
|
+
void print() const {
|
1263
|
+
for (size_t i = 0; i < m_length; ++i) {
|
1264
|
+
printf("%c", (*this)[i]);
|
1265
|
+
}
|
1266
|
+
printf("\n");
|
1267
|
+
}
|
1268
|
+
|
1269
|
+
/**
|
1270
|
+
* Returns true if the string contains UTF-8-encoded
|
1271
|
+
* characters that seem to be valid, multibyte or not.
|
1272
|
+
*
|
1273
|
+
* An ASCII string "foo" would return true, because it's
|
1274
|
+
* also a valid UTF-8-encoded string. It can be
|
1275
|
+
* represented with UTF-8.
|
1276
|
+
*
|
1277
|
+
* NOTE: This is not fool-proof. There's a lot of
|
1278
|
+
* checks we aren't doing.
|
1279
|
+
*
|
1280
|
+
* ```
|
1281
|
+
* assert_eq(true, String("abc").contains_seemingly_valid_utf8_encoded_characters());
|
1282
|
+
* assert_eq(true, String("π").contains_seemingly_valid_utf8_encoded_characters());
|
1283
|
+
* assert_eq(false, String("\xC3").contains_seemingly_valid_utf8_encoded_characters());
|
1284
|
+
* ```
|
1285
|
+
*/
|
1286
|
+
bool contains_seemingly_valid_utf8_encoded_characters() const {
|
1287
|
+
int index = 0;
|
1288
|
+
char buf[5];
|
1289
|
+
do {
|
1290
|
+
index = next_utf8_char(index, buf);
|
1291
|
+
} while (index > 0);
|
1292
|
+
return index == 0;
|
1293
|
+
}
|
1294
|
+
|
1295
|
+
/**
|
1296
|
+
* Returns true if the string contains multibyte
|
1297
|
+
* UTF-8-encoded characters that seem to be valid.
|
1298
|
+
*
|
1299
|
+
* NOTE: This is not fool-proof. There's a lot of
|
1300
|
+
* checks we aren't doing.
|
1301
|
+
*
|
1302
|
+
* ```
|
1303
|
+
* assert_eq(false, String("abc").contains_utf8_encoded_multibyte_characters());
|
1304
|
+
* assert_eq(true, String("π").contains_utf8_encoded_multibyte_characters());
|
1305
|
+
* assert_eq(false, String("\xC3").contains_utf8_encoded_multibyte_characters());
|
1306
|
+
* ```
|
1307
|
+
*/
|
1308
|
+
bool contains_utf8_encoded_multibyte_characters() const {
|
1309
|
+
int index_was = 0;
|
1310
|
+
int index = 0;
|
1311
|
+
char buf[5];
|
1312
|
+
bool multibyte = false;
|
1313
|
+
do {
|
1314
|
+
index = next_utf8_char(index, buf);
|
1315
|
+
if (index > index_was + 1)
|
1316
|
+
multibyte = true;
|
1317
|
+
index_was = index;
|
1318
|
+
} while (index > 0);
|
1319
|
+
return index == 0 && multibyte;
|
1320
|
+
}
|
1321
|
+
|
1322
|
+
/**
|
1323
|
+
* Takes an integer for the starting index and a
|
1324
|
+
* buffer char*. Consumes the proper number of
|
1325
|
+
* bytes, appending to the buffer C string to build
|
1326
|
+
* a single UTF-8-encoded character.
|
1327
|
+
*
|
1328
|
+
* Be sure to pass a pointer to a buffer that is
|
1329
|
+
* at least 5 characters in size (one is used for
|
1330
|
+
* the null terminator).
|
1331
|
+
*
|
1332
|
+
* Returns the new index, a positive integer,
|
1333
|
+
* on success.
|
1334
|
+
*
|
1335
|
+
* Returns zero (0) if the end of the string has
|
1336
|
+
* been reached.
|
1337
|
+
*
|
1338
|
+
* Returns a negative integer if the character
|
1339
|
+
* is invalid.
|
1340
|
+
*
|
1341
|
+
* (Note: this method only does rudimentary checking
|
1342
|
+
* for a valid character, i.e. does it have enough
|
1343
|
+
* bytes to satisfy the encoding.)
|
1344
|
+
*
|
1345
|
+
* ```
|
1346
|
+
* auto str = String("abβΊπ");
|
1347
|
+
* int index = 0;
|
1348
|
+
*
|
1349
|
+
* char buf[5];
|
1350
|
+
* index = str.next_utf8_char(index, buf);
|
1351
|
+
* assert_eq(1, index);
|
1352
|
+
* assert_str_eq("a", String(buf));
|
1353
|
+
*
|
1354
|
+
* buf[0] = '\0';
|
1355
|
+
* index = str.next_utf8_char(index, buf);
|
1356
|
+
* assert_eq(2, index);
|
1357
|
+
* assert_str_eq("b", String(buf));
|
1358
|
+
*
|
1359
|
+
* buf[0] = '\0';
|
1360
|
+
* index = str.next_utf8_char(index, buf);
|
1361
|
+
* assert_eq(5, index);
|
1362
|
+
* assert_str_eq("βΊ", String(buf));
|
1363
|
+
*
|
1364
|
+
* buf[0] = '\0';
|
1365
|
+
* index = str.next_utf8_char(index, buf);
|
1366
|
+
* assert_eq(9, index);
|
1367
|
+
* assert_str_eq("π", String(buf));
|
1368
|
+
*
|
1369
|
+
* buf[0] = '\0';
|
1370
|
+
* index = str.next_utf8_char(index, buf);
|
1371
|
+
* assert_eq(0, index);
|
1372
|
+
* assert_str_eq("", String(buf));
|
1373
|
+
* ```
|
1374
|
+
*/
|
1375
|
+
int next_utf8_char(int index, char *buffer) const {
|
1376
|
+
if (!m_str)
|
1377
|
+
return 0;
|
1378
|
+
assert(m_length < INT_MAX);
|
1379
|
+
assert(index >= 0);
|
1380
|
+
if ((size_t)index >= m_length)
|
1381
|
+
return 0;
|
1382
|
+
buffer[0] = m_str[index];
|
1383
|
+
if (((unsigned char)buffer[0] >> 3) == 30) { // 11110xxx, 4 bytes
|
1384
|
+
if ((size_t)index + 3 >= m_length) return -1;
|
1385
|
+
buffer[1] = m_str[++index];
|
1386
|
+
buffer[2] = m_str[++index];
|
1387
|
+
buffer[3] = m_str[++index];
|
1388
|
+
buffer[4] = 0;
|
1389
|
+
} else if (((unsigned char)buffer[0] >> 4) == 14) { // 1110xxxx, 3 bytes
|
1390
|
+
if ((size_t)index + 2 >= m_length) return -1;
|
1391
|
+
buffer[1] = m_str[++index];
|
1392
|
+
buffer[2] = m_str[++index];
|
1393
|
+
buffer[3] = 0;
|
1394
|
+
} else if (((unsigned char)buffer[0] >> 5) == 6) { // 110xxxxx, 2 bytes
|
1395
|
+
if ((size_t)index + 1 >= m_length) return -1;
|
1396
|
+
buffer[1] = m_str[++index];
|
1397
|
+
buffer[2] = 0;
|
1398
|
+
} else {
|
1399
|
+
buffer[1] = 0;
|
1400
|
+
}
|
1401
|
+
return index + 1;
|
1402
|
+
}
|
1403
|
+
|
1404
|
+
private:
|
1405
|
+
void grow(size_t new_capacity) {
|
1406
|
+
assert(new_capacity >= m_length);
|
1407
|
+
auto old_str = m_str;
|
1408
|
+
m_str = new char[new_capacity + 1];
|
1409
|
+
if (old_str)
|
1410
|
+
memcpy(m_str, old_str, sizeof(char) * (m_capacity + 1));
|
1411
|
+
else
|
1412
|
+
m_str[0] = '\0';
|
1413
|
+
delete[] old_str;
|
1414
|
+
m_capacity = new_capacity;
|
1415
|
+
}
|
1416
|
+
|
1417
|
+
void grow_at_least(size_t min_capacity) {
|
1418
|
+
if (m_capacity >= min_capacity) return;
|
1419
|
+
if (m_capacity > 0 && min_capacity <= m_capacity * STRING_GROW_FACTOR) {
|
1420
|
+
grow(m_capacity * STRING_GROW_FACTOR);
|
1421
|
+
} else {
|
1422
|
+
grow(min_capacity);
|
1423
|
+
}
|
1424
|
+
}
|
1425
|
+
|
1426
|
+
void increment_successive_char(char first_char_in_range, char last_char_in_range, char prepend_char_to_grow) {
|
1427
|
+
assert(m_length > 0);
|
1428
|
+
ssize_t index = m_length - 1;
|
1429
|
+
char last_char = m_str[index];
|
1430
|
+
while (last_char == last_char_in_range) {
|
1431
|
+
m_str[index] = first_char_in_range;
|
1432
|
+
if ((--index) < 0)
|
1433
|
+
break;
|
1434
|
+
last_char = m_str[index];
|
1435
|
+
}
|
1436
|
+
if (index == -1) {
|
1437
|
+
this->prepend_char(prepend_char_to_grow);
|
1438
|
+
} else {
|
1439
|
+
m_str[index]++;
|
1440
|
+
}
|
1441
|
+
}
|
1442
|
+
|
1443
|
+
char *m_str { nullptr };
|
1444
|
+
size_t m_length { 0 };
|
1445
|
+
size_t m_capacity { 0 };
|
1446
|
+
};
|
1447
|
+
}
|