cxxfilt 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  /* Demangler for the Rust programming language
2
- Copyright (C) 2016-2018 Free Software Foundation, Inc.
2
+ Copyright (C) 2016-2025 Free Software Foundation, Inc.
3
3
  Written by David Tolnay (dtolnay@gmail.com).
4
+ Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
4
5
 
5
6
  This file is part of the libiberty library.
6
7
  Libiberty is free software; you can redistribute it and/or
@@ -33,9 +34,11 @@ If not, see <http://www.gnu.org/licenses/>. */
33
34
 
34
35
  #include "safe-ctype.h"
35
36
 
37
+ #include <inttypes.h>
36
38
  #include <sys/types.h>
37
39
  #include <string.h>
38
40
  #include <stdio.h>
41
+ #include <stdlib.h>
39
42
 
40
43
  #ifdef HAVE_STRING_H
41
44
  #include <string.h>
@@ -48,301 +51,1554 @@ extern void *memset(void *s, int c, size_t n);
48
51
  #include <demangle.h>
49
52
  #include "libiberty.h"
50
53
 
54
+ struct rust_demangler
55
+ {
56
+ const char *sym;
57
+ size_t sym_len;
51
58
 
52
- /* Mangled Rust symbols look like this:
53
- _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
59
+ void *callback_opaque;
60
+ demangle_callbackref callback;
54
61
 
55
- The original symbol is:
56
- <std::sys::fd::FileDesc as core::ops::Drop>::drop
62
+ /* Position of the next character to read from the symbol. */
63
+ size_t next;
57
64
 
58
- The last component of the path is a 64-bit hash in lowercase hex,
59
- prefixed with "h". Rust does not have a global namespace between
60
- crates, an illusion which Rust maintains by using the hash to
61
- distinguish things that would otherwise have the same symbol.
65
+ /* Non-zero if any error occurred. */
66
+ int errored;
62
67
 
63
- Any path component not starting with a XID_Start character is
64
- prefixed with "_".
68
+ /* Non-zero if nothing should be printed. */
69
+ int skipping_printing;
65
70
 
66
- The following escape sequences are used:
71
+ /* Non-zero if printing should be verbose (e.g. include hashes). */
72
+ int verbose;
67
73
 
68
- "," => $C$
69
- "@" => $SP$
70
- "*" => $BP$
71
- "&" => $RF$
72
- "<" => $LT$
73
- ">" => $GT$
74
- "(" => $LP$
75
- ")" => $RP$
76
- " " => $u20$
77
- "\"" => $u22$
78
- "'" => $u27$
79
- "+" => $u2b$
80
- ";" => $u3b$
81
- "[" => $u5b$
82
- "]" => $u5d$
83
- "{" => $u7b$
84
- "}" => $u7d$
85
- "~" => $u7e$
74
+ /* Rust mangling version, with legacy mangling being -1. */
75
+ int version;
86
76
 
87
- A double ".." means "::" and a single "." means "-".
77
+ /* Recursion depth. */
78
+ unsigned int recursion;
79
+ /* Maximum number of times demangle_path may be called recursively. */
80
+ #define RUST_MAX_RECURSION_COUNT 1024
81
+ #define RUST_NO_RECURSION_LIMIT ((unsigned int) -1)
88
82
 
89
- The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ */
83
+ uint64_t bound_lifetime_depth;
84
+ };
90
85
 
91
- static const char *hash_prefix = "::h";
92
- static const size_t hash_prefix_len = 3;
93
- static const size_t hash_len = 16;
86
+ /* Parsing functions. */
94
87
 
95
- static int is_prefixed_hash (const char *start);
96
- static int looks_like_rust (const char *sym, size_t len);
97
- static int unescape (const char **in, char **out, const char *seq, char value);
88
+ static char
89
+ peek (const struct rust_demangler *rdm)
90
+ {
91
+ if (rdm->next < rdm->sym_len)
92
+ return rdm->sym[rdm->next];
93
+ return 0;
94
+ }
98
95
 
99
- /* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
96
+ static int
97
+ eat (struct rust_demangler *rdm, char c)
98
+ {
99
+ if (peek (rdm) == c)
100
+ {
101
+ rdm->next++;
102
+ return 1;
103
+ }
104
+ else
105
+ return 0;
106
+ }
100
107
 
101
- This function looks for the following indicators:
108
+ static char
109
+ next (struct rust_demangler *rdm)
110
+ {
111
+ char c = peek (rdm);
112
+ if (!c)
113
+ rdm->errored = 1;
114
+ else
115
+ rdm->next++;
116
+ return c;
117
+ }
102
118
 
103
- 1. The hash must consist of "h" followed by 16 lowercase hex digits.
119
+ static uint64_t
120
+ parse_integer_62 (struct rust_demangler *rdm)
121
+ {
122
+ char c;
123
+ uint64_t x;
104
124
 
105
- 2. As a sanity check, the hash must use between 5 and 15 of the 16
106
- possible hex digits. This is true of 99.9998% of hashes so once
107
- in your life you may see a false negative. The point is to
108
- notice path components that could be Rust hashes but are
109
- probably not, like "haaaaaaaaaaaaaaaa". In this case a false
110
- positive (non-Rust symbol has an important path component
111
- removed because it looks like a Rust hash) is worse than a false
112
- negative (the rare Rust symbol is not demangled) so this sets
113
- the balance in favor of false negatives.
125
+ if (eat (rdm, '_'))
126
+ return 0;
114
127
 
115
- 3. There must be no characters other than a-zA-Z0-9 and _.:$
128
+ x = 0;
129
+ while (!eat (rdm, '_') && !rdm->errored)
130
+ {
131
+ c = next (rdm);
132
+ x *= 62;
133
+ if (ISDIGIT (c))
134
+ x += c - '0';
135
+ else if (ISLOWER (c))
136
+ x += 10 + (c - 'a');
137
+ else if (ISUPPER (c))
138
+ x += 10 + 26 + (c - 'A');
139
+ else
140
+ {
141
+ rdm->errored = 1;
142
+ return 0;
143
+ }
144
+ }
145
+ return x + 1;
146
+ }
116
147
 
117
- 4. There must be no unrecognized $-sign sequences.
148
+ static uint64_t
149
+ parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
150
+ {
151
+ if (!eat (rdm, tag))
152
+ return 0;
153
+ return 1 + parse_integer_62 (rdm);
154
+ }
118
155
 
119
- 5. There must be no sequence of three or more dots in a row ("..."). */
156
+ static uint64_t
157
+ parse_disambiguator (struct rust_demangler *rdm)
158
+ {
159
+ return parse_opt_integer_62 (rdm, 's');
160
+ }
120
161
 
121
- int
122
- rust_is_mangled (const char *sym)
162
+ static size_t
163
+ parse_hex_nibbles (struct rust_demangler *rdm, uint64_t *value)
123
164
  {
124
- size_t len, len_without_hash;
165
+ char c;
166
+ size_t hex_len;
125
167
 
126
- if (!sym)
127
- return 0;
168
+ hex_len = 0;
169
+ *value = 0;
128
170
 
129
- len = strlen (sym);
130
- if (len <= hash_prefix_len + hash_len)
131
- /* Not long enough to contain "::h" + hash + something else */
132
- return 0;
171
+ while (!eat (rdm, '_'))
172
+ {
173
+ *value <<= 4;
133
174
 
134
- len_without_hash = len - (hash_prefix_len + hash_len);
135
- if (!is_prefixed_hash (sym + len_without_hash))
136
- return 0;
175
+ c = next (rdm);
176
+ if (ISDIGIT (c))
177
+ *value |= c - '0';
178
+ else if (c >= 'a' && c <= 'f')
179
+ *value |= 10 + (c - 'a');
180
+ else
181
+ {
182
+ rdm->errored = 1;
183
+ return 0;
184
+ }
185
+ hex_len++;
186
+ }
187
+
188
+ return hex_len;
189
+ }
190
+
191
+ struct rust_mangled_ident
192
+ {
193
+ /* ASCII part of the identifier. */
194
+ const char *ascii;
195
+ size_t ascii_len;
196
+
197
+ /* Punycode insertion codes for Unicode codepoints, if any. */
198
+ const char *punycode;
199
+ size_t punycode_len;
200
+ };
201
+
202
+ static struct rust_mangled_ident
203
+ parse_ident (struct rust_demangler *rdm)
204
+ {
205
+ char c;
206
+ size_t start, len;
207
+ int is_punycode = 0;
208
+ struct rust_mangled_ident ident;
209
+
210
+ ident.ascii = NULL;
211
+ ident.ascii_len = 0;
212
+ ident.punycode = NULL;
213
+ ident.punycode_len = 0;
214
+
215
+ if (rdm->version != -1)
216
+ is_punycode = eat (rdm, 'u');
217
+
218
+ c = next (rdm);
219
+ if (!ISDIGIT (c))
220
+ {
221
+ rdm->errored = 1;
222
+ return ident;
223
+ }
224
+ len = c - '0';
225
+
226
+ if (c != '0')
227
+ while (ISDIGIT (peek (rdm)))
228
+ len = len * 10 + (next (rdm) - '0');
229
+
230
+ /* Skip past the optional `_` separator (v0). */
231
+ if (rdm->version != -1)
232
+ eat (rdm, '_');
137
233
 
138
- return looks_like_rust (sym, len_without_hash);
234
+ start = rdm->next;
235
+ rdm->next += len;
236
+ /* Check for overflows. */
237
+ if ((start > rdm->next) || (rdm->next > rdm->sym_len))
238
+ {
239
+ rdm->errored = 1;
240
+ return ident;
241
+ }
242
+
243
+ ident.ascii = rdm->sym + start;
244
+ ident.ascii_len = len;
245
+
246
+ if (is_punycode)
247
+ {
248
+ ident.punycode_len = 0;
249
+ while (ident.ascii_len > 0)
250
+ {
251
+ ident.ascii_len--;
252
+
253
+ /* The last '_' is a separator between ascii & punycode. */
254
+ if (ident.ascii[ident.ascii_len] == '_')
255
+ break;
256
+
257
+ ident.punycode_len++;
258
+ }
259
+ if (!ident.punycode_len)
260
+ {
261
+ rdm->errored = 1;
262
+ return ident;
263
+ }
264
+ ident.punycode = ident.ascii + (len - ident.punycode_len);
265
+ }
266
+
267
+ if (ident.ascii_len == 0)
268
+ ident.ascii = NULL;
269
+
270
+ return ident;
139
271
  }
140
272
 
141
- /* A hash is the prefix "::h" followed by 16 lowercase hex digits. The
142
- hex digits must comprise between 5 and 15 (inclusive) distinct
143
- digits. */
273
+ /* Printing functions. */
274
+
275
+ static void
276
+ print_str (struct rust_demangler *rdm, const char *data, size_t len)
277
+ {
278
+ if (!rdm->errored && !rdm->skipping_printing)
279
+ rdm->callback (data, len, rdm->callback_opaque);
280
+ }
144
281
 
282
+ #define PRINT(s) print_str (rdm, s, strlen (s))
283
+
284
+ static void
285
+ print_uint64 (struct rust_demangler *rdm, uint64_t x)
286
+ {
287
+ char s[21];
288
+ snprintf (s, 21, "%" PRIu64, x);
289
+ PRINT (s);
290
+ }
291
+
292
+ static void
293
+ print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
294
+ {
295
+ char s[17];
296
+ snprintf (s, 17, "%" PRIx64, x);
297
+ PRINT (s);
298
+ }
299
+
300
+ /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
145
301
  static int
146
- is_prefixed_hash (const char *str)
302
+ decode_lower_hex_nibble (char nibble)
147
303
  {
148
- const char *end;
149
- char seen[16];
150
- size_t i;
151
- int count;
304
+ if ('0' <= nibble && nibble <= '9')
305
+ return nibble - '0';
306
+ if ('a' <= nibble && nibble <= 'f')
307
+ return 0xa + (nibble - 'a');
308
+ return -1;
309
+ }
152
310
 
153
- if (strncmp (str, hash_prefix, hash_prefix_len))
311
+ /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
312
+ static char
313
+ decode_legacy_escape (const char *e, size_t len, size_t *out_len)
314
+ {
315
+ char c = 0;
316
+ size_t escape_len = 0;
317
+ int lo_nibble = -1, hi_nibble = -1;
318
+
319
+ if (len < 3 || e[0] != '$')
154
320
  return 0;
155
- str += hash_prefix_len;
156
-
157
- memset (seen, 0, sizeof(seen));
158
- for (end = str + hash_len; str < end; str++)
159
- if (*str >= '0' && *str <= '9')
160
- seen[*str - '0'] = 1;
161
- else if (*str >= 'a' && *str <= 'f')
162
- seen[*str - 'a' + 10] = 1;
163
- else
164
- return 0;
165
321
 
166
- /* Count how many distinct digits seen */
167
- count = 0;
168
- for (i = 0; i < 16; i++)
169
- if (seen[i])
170
- count++;
322
+ e++;
323
+ len--;
324
+
325
+ if (e[0] == 'C')
326
+ {
327
+ escape_len = 1;
328
+
329
+ c = ',';
330
+ }
331
+ else if (len > 2)
332
+ {
333
+ escape_len = 2;
334
+
335
+ if (e[0] == 'S' && e[1] == 'P')
336
+ c = '@';
337
+ else if (e[0] == 'B' && e[1] == 'P')
338
+ c = '*';
339
+ else if (e[0] == 'R' && e[1] == 'F')
340
+ c = '&';
341
+ else if (e[0] == 'L' && e[1] == 'T')
342
+ c = '<';
343
+ else if (e[0] == 'G' && e[1] == 'T')
344
+ c = '>';
345
+ else if (e[0] == 'L' && e[1] == 'P')
346
+ c = '(';
347
+ else if (e[0] == 'R' && e[1] == 'P')
348
+ c = ')';
349
+ else if (e[0] == 'u' && len > 3)
350
+ {
351
+ escape_len = 3;
171
352
 
172
- return count >= 5 && count <= 15;
353
+ hi_nibble = decode_lower_hex_nibble (e[1]);
354
+ if (hi_nibble < 0)
355
+ return 0;
356
+ lo_nibble = decode_lower_hex_nibble (e[2]);
357
+ if (lo_nibble < 0)
358
+ return 0;
359
+
360
+ /* Only allow non-control ASCII characters. */
361
+ if (hi_nibble > 7)
362
+ return 0;
363
+ c = (hi_nibble << 4) | lo_nibble;
364
+ if (c < 0x20)
365
+ return 0;
366
+ }
367
+ }
368
+
369
+ if (!c || len <= escape_len || e[escape_len] != '$')
370
+ return 0;
371
+
372
+ *out_len = 2 + escape_len;
373
+ return c;
173
374
  }
174
375
 
175
- static int
176
- looks_like_rust (const char *str, size_t len)
177
- {
178
- const char *end = str + len;
179
-
180
- while (str < end)
181
- switch (*str)
182
- {
183
- case '$':
184
- if (!strncmp (str, "$C$", 3))
185
- str += 3;
186
- else if (!strncmp (str, "$SP$", 4)
187
- || !strncmp (str, "$BP$", 4)
188
- || !strncmp (str, "$RF$", 4)
189
- || !strncmp (str, "$LT$", 4)
190
- || !strncmp (str, "$GT$", 4)
191
- || !strncmp (str, "$LP$", 4)
192
- || !strncmp (str, "$RP$", 4))
193
- str += 4;
194
- else if (!strncmp (str, "$u20$", 5)
195
- || !strncmp (str, "$u22$", 5)
196
- || !strncmp (str, "$u27$", 5)
197
- || !strncmp (str, "$u2b$", 5)
198
- || !strncmp (str, "$u3b$", 5)
199
- || !strncmp (str, "$u5b$", 5)
200
- || !strncmp (str, "$u5d$", 5)
201
- || !strncmp (str, "$u7b$", 5)
202
- || !strncmp (str, "$u7d$", 5)
203
- || !strncmp (str, "$u7e$", 5))
204
- str += 5;
205
- else
206
- return 0;
207
- break;
208
- case '.':
209
- /* Do not allow three or more consecutive dots */
210
- if (!strncmp (str, "...", 3))
211
- return 0;
212
- /* Fall through */
213
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
214
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
215
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
216
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
217
- case 'y': case 'z':
218
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
219
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
220
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
221
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
222
- case 'Y': case 'Z':
223
- case '0': case '1': case '2': case '3': case '4': case '5':
224
- case '6': case '7': case '8': case '9':
225
- case '_':
226
- case ':':
227
- str++;
228
- break;
229
- default:
230
- return 0;
231
- }
232
-
233
- return 1;
234
- }
235
-
236
- /*
237
- INPUT: sym: symbol for which rust_is_mangled(sym) returned 1.
238
-
239
- The input is demangled in-place because the mangled name is always
240
- longer than the demangled one. */
241
-
242
- void
243
- rust_demangle_sym (char *sym)
244
- {
245
- const char *in;
246
- char *out;
247
- const char *end;
248
-
249
- if (!sym)
376
+ static void
377
+ print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
378
+ {
379
+ char unescaped;
380
+ uint8_t *out, *p, d;
381
+ size_t len, cap, punycode_pos, j;
382
+ /* Punycode parameters and state. */
383
+ uint32_t c;
384
+ size_t base, t_min, t_max, skew, damp, bias, i;
385
+ size_t delta, w, k, t;
386
+
387
+ if (rdm->errored || rdm->skipping_printing)
250
388
  return;
251
389
 
252
- in = sym;
253
- out = sym;
254
- end = sym + strlen (sym) - (hash_prefix_len + hash_len);
255
-
256
- while (in < end)
257
- switch (*in)
258
- {
259
- case '$':
260
- if (!(unescape (&in, &out, "$C$", ',')
261
- || unescape (&in, &out, "$SP$", '@')
262
- || unescape (&in, &out, "$BP$", '*')
263
- || unescape (&in, &out, "$RF$", '&')
264
- || unescape (&in, &out, "$LT$", '<')
265
- || unescape (&in, &out, "$GT$", '>')
266
- || unescape (&in, &out, "$LP$", '(')
267
- || unescape (&in, &out, "$RP$", ')')
268
- || unescape (&in, &out, "$u20$", ' ')
269
- || unescape (&in, &out, "$u22$", '\"')
270
- || unescape (&in, &out, "$u27$", '\'')
271
- || unescape (&in, &out, "$u2b$", '+')
272
- || unescape (&in, &out, "$u3b$", ';')
273
- || unescape (&in, &out, "$u5b$", '[')
274
- || unescape (&in, &out, "$u5d$", ']')
275
- || unescape (&in, &out, "$u7b$", '{')
276
- || unescape (&in, &out, "$u7d$", '}')
277
- || unescape (&in, &out, "$u7e$", '~'))) {
278
- /* unexpected escape sequence, not looks_like_rust. */
279
- goto fail;
390
+ if (rdm->version == -1)
391
+ {
392
+ /* Ignore leading underscores preceding escape sequences.
393
+ The mangler inserts an underscore to make sure the
394
+ identifier begins with a XID_Start character. */
395
+ if (ident.ascii_len >= 2 && ident.ascii[0] == '_'
396
+ && ident.ascii[1] == '$')
397
+ {
398
+ ident.ascii++;
399
+ ident.ascii_len--;
400
+ }
401
+
402
+ while (ident.ascii_len > 0)
403
+ {
404
+ /* Handle legacy escape sequences ("$...$", ".." or "."). */
405
+ if (ident.ascii[0] == '$')
406
+ {
407
+ unescaped
408
+ = decode_legacy_escape (ident.ascii, ident.ascii_len, &len);
409
+ if (unescaped)
410
+ print_str (rdm, &unescaped, 1);
411
+ else
412
+ {
413
+ /* Unexpected escape sequence, print the rest verbatim. */
414
+ print_str (rdm, ident.ascii, ident.ascii_len);
415
+ return;
416
+ }
417
+ }
418
+ else if (ident.ascii[0] == '.')
419
+ {
420
+ if (ident.ascii_len >= 2 && ident.ascii[1] == '.')
421
+ {
422
+ /* ".." becomes "::" */
423
+ PRINT ("::");
424
+ len = 2;
425
+ }
426
+ else
427
+ {
428
+ PRINT (".");
429
+ len = 1;
430
+ }
431
+ }
432
+ else
433
+ {
434
+ /* Print everything before the next escape sequence, at once. */
435
+ for (len = 0; len < ident.ascii_len; len++)
436
+ if (ident.ascii[len] == '$' || ident.ascii[len] == '.')
437
+ break;
438
+
439
+ print_str (rdm, ident.ascii, len);
440
+ }
441
+
442
+ ident.ascii += len;
443
+ ident.ascii_len -= len;
444
+ }
445
+
446
+ return;
447
+ }
448
+
449
+ if (!ident.punycode)
450
+ {
451
+ print_str (rdm, ident.ascii, ident.ascii_len);
452
+ return;
453
+ }
454
+
455
+ len = 0;
456
+ cap = 4;
457
+ while (cap < ident.ascii_len)
458
+ {
459
+ cap *= 2;
460
+ /* Check for overflows. */
461
+ if ((cap * 4) / 4 != cap)
462
+ {
463
+ rdm->errored = 1;
464
+ return;
465
+ }
466
+ }
467
+
468
+ /* Store the output codepoints as groups of 4 UTF-8 bytes. */
469
+ out = (uint8_t *)malloc (cap * 4);
470
+ if (!out)
471
+ {
472
+ rdm->errored = 1;
473
+ return;
474
+ }
475
+
476
+ /* Populate initial output from ASCII fragment. */
477
+ for (len = 0; len < ident.ascii_len; len++)
478
+ {
479
+ p = out + 4 * len;
480
+ p[0] = 0;
481
+ p[1] = 0;
482
+ p[2] = 0;
483
+ p[3] = ident.ascii[len];
484
+ }
485
+
486
+ /* Punycode parameters and initial state. */
487
+ base = 36;
488
+ t_min = 1;
489
+ t_max = 26;
490
+ skew = 38;
491
+ damp = 700;
492
+ bias = 72;
493
+ i = 0;
494
+ c = 0x80;
495
+
496
+ punycode_pos = 0;
497
+ while (punycode_pos < ident.punycode_len)
498
+ {
499
+ /* Read one delta value. */
500
+ delta = 0;
501
+ w = 1;
502
+ k = 0;
503
+ do
504
+ {
505
+ k += base;
506
+ t = k < bias ? 0 : (k - bias);
507
+ if (t < t_min)
508
+ t = t_min;
509
+ if (t > t_max)
510
+ t = t_max;
511
+
512
+ if (punycode_pos >= ident.punycode_len)
513
+ goto cleanup;
514
+ d = ident.punycode[punycode_pos++];
515
+
516
+ if (ISLOWER (d))
517
+ d = d - 'a';
518
+ else if (ISDIGIT (d))
519
+ d = 26 + (d - '0');
520
+ else
521
+ {
522
+ rdm->errored = 1;
523
+ goto cleanup;
524
+ }
525
+
526
+ delta += d * w;
527
+ w *= base - t;
528
+ }
529
+ while (d >= t);
530
+
531
+ /* Compute the new insert position and character. */
532
+ len++;
533
+ i += delta;
534
+ c += i / len;
535
+ i %= len;
536
+
537
+ /* Ensure enough space is available. */
538
+ if (cap < len)
539
+ {
540
+ cap *= 2;
541
+ /* Check for overflows. */
542
+ if ((cap * 4) / 4 != cap || cap < len)
543
+ {
544
+ rdm->errored = 1;
545
+ goto cleanup;
546
+ }
547
+ }
548
+ p = (uint8_t *)realloc (out, cap * 4);
549
+ if (!p)
550
+ {
551
+ rdm->errored = 1;
552
+ goto cleanup;
553
+ }
554
+ out = p;
555
+
556
+ /* Move the characters after the insert position. */
557
+ p = out + i * 4;
558
+ memmove (p + 4, p, (len - i - 1) * 4);
559
+
560
+ /* Insert the new character, as UTF-8 bytes. */
561
+ p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
562
+ p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
563
+ p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
564
+ p[3] = 0x80 | (c & 0x3f);
565
+
566
+ /* If there are no more deltas, decoding is complete. */
567
+ if (punycode_pos == ident.punycode_len)
568
+ break;
569
+
570
+ i++;
571
+
572
+ /* Perform bias adaptation. */
573
+ delta /= damp;
574
+ damp = 2;
575
+
576
+ delta += delta / len;
577
+ k = 0;
578
+ while (delta > ((base - t_min) * t_max) / 2)
579
+ {
580
+ delta /= base - t_min;
581
+ k += base;
582
+ }
583
+ bias = k + ((base - t_min + 1) * delta) / (delta + skew);
584
+ }
585
+
586
+ /* Remove all the 0 bytes to leave behind an UTF-8 string. */
587
+ for (i = 0, j = 0; i < len * 4; i++)
588
+ if (out[i] != 0)
589
+ out[j++] = out[i];
590
+
591
+ print_str (rdm, (const char *)out, j);
592
+
593
+ cleanup:
594
+ free (out);
595
+ }
596
+
597
+ /* Print the lifetime according to the previously decoded index.
598
+ An index of `0` always refers to `'_`, but starting with `1`,
599
+ indices refer to late-bound lifetimes introduced by a binder. */
600
+ static void
601
+ print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
602
+ {
603
+ char c;
604
+ uint64_t depth;
605
+
606
+ PRINT ("'");
607
+ if (lt == 0)
608
+ {
609
+ PRINT ("_");
610
+ return;
611
+ }
612
+
613
+ depth = rdm->bound_lifetime_depth - lt;
614
+ /* Try to print lifetimes alphabetically first. */
615
+ if (depth < 26)
616
+ {
617
+ c = 'a' + depth;
618
+ print_str (rdm, &c, 1);
619
+ }
620
+ else
621
+ {
622
+ /* Use `'_123` after running out of letters. */
623
+ PRINT ("_");
624
+ print_uint64 (rdm, depth);
625
+ }
626
+ }
627
+
628
+ /* Demangling functions. */
629
+
630
+ static void demangle_binder (struct rust_demangler *rdm);
631
+ static void demangle_path (struct rust_demangler *rdm, int in_value);
632
+ static void demangle_generic_arg (struct rust_demangler *rdm);
633
+ static void demangle_type (struct rust_demangler *rdm);
634
+ static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
635
+ static void demangle_dyn_trait (struct rust_demangler *rdm);
636
+ static void demangle_const (struct rust_demangler *rdm);
637
+ static void demangle_const_uint (struct rust_demangler *rdm);
638
+ static void demangle_const_int (struct rust_demangler *rdm);
639
+ static void demangle_const_bool (struct rust_demangler *rdm);
640
+ static void demangle_const_char (struct rust_demangler *rdm);
641
+
642
+ /* Optionally enter a binder ('G') for late-bound lifetimes,
643
+ printing e.g. `for<'a, 'b> `, and make those lifetimes visible
644
+ to the caller (via depth level, which the caller should reset). */
645
+ static void
646
+ demangle_binder (struct rust_demangler *rdm)
647
+ {
648
+ uint64_t i, bound_lifetimes;
649
+
650
+ if (rdm->errored)
651
+ return;
652
+
653
+ bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
654
+ if (bound_lifetimes > 0)
655
+ {
656
+ PRINT ("for<");
657
+ for (i = 0; i < bound_lifetimes; i++)
658
+ {
659
+ if (i > 0)
660
+ PRINT (", ");
661
+ rdm->bound_lifetime_depth++;
662
+ print_lifetime_from_index (rdm, 1);
663
+ }
664
+ PRINT ("> ");
665
+ }
666
+ }
667
+
668
+ static void
669
+ demangle_path (struct rust_demangler *rdm, int in_value)
670
+ {
671
+ char tag, ns;
672
+ int was_skipping_printing;
673
+ size_t i, backref, old_next;
674
+ uint64_t dis;
675
+ struct rust_mangled_ident name;
676
+
677
+ if (rdm->errored)
678
+ return;
679
+
680
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
681
+ {
682
+ ++ rdm->recursion;
683
+ if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
684
+ /* FIXME: There ought to be a way to report
685
+ that the recursion limit has been reached. */
686
+ goto fail_return;
687
+ }
688
+
689
+ switch (tag = next (rdm))
690
+ {
691
+ case 'C':
692
+ dis = parse_disambiguator (rdm);
693
+ name = parse_ident (rdm);
694
+
695
+ print_ident (rdm, name);
696
+ if (rdm->verbose)
697
+ {
698
+ PRINT ("[");
699
+ print_uint64_hex (rdm, dis);
700
+ PRINT ("]");
701
+ }
702
+ break;
703
+ case 'N':
704
+ ns = next (rdm);
705
+ if (!ISLOWER (ns) && !ISUPPER (ns))
706
+ goto fail_return;
707
+
708
+ demangle_path (rdm, in_value);
709
+
710
+ dis = parse_disambiguator (rdm);
711
+ name = parse_ident (rdm);
712
+
713
+ if (ISUPPER (ns))
714
+ {
715
+ /* Special namespaces, like closures and shims. */
716
+ PRINT ("::{");
717
+ switch (ns)
718
+ {
719
+ case 'C':
720
+ PRINT ("closure");
721
+ break;
722
+ case 'S':
723
+ PRINT ("shim");
724
+ break;
725
+ default:
726
+ print_str (rdm, &ns, 1);
727
+ }
728
+ if (name.ascii || name.punycode)
729
+ {
730
+ PRINT (":");
731
+ print_ident (rdm, name);
732
+ }
733
+ PRINT ("#");
734
+ print_uint64 (rdm, dis);
735
+ PRINT ("}");
736
+ }
737
+ else
738
+ {
739
+ /* Implementation-specific/unspecified namespaces. */
740
+
741
+ if (name.ascii || name.punycode)
742
+ {
743
+ PRINT ("::");
744
+ print_ident (rdm, name);
745
+ }
746
+ }
747
+ break;
748
+ case 'M':
749
+ case 'X':
750
+ /* Ignore the `impl`'s own path.*/
751
+ parse_disambiguator (rdm);
752
+ was_skipping_printing = rdm->skipping_printing;
753
+ rdm->skipping_printing = 1;
754
+ demangle_path (rdm, in_value);
755
+ rdm->skipping_printing = was_skipping_printing;
756
+ /* fallthrough */
757
+ case 'Y':
758
+ PRINT ("<");
759
+ demangle_type (rdm);
760
+ if (tag != 'M')
761
+ {
762
+ PRINT (" as ");
763
+ demangle_path (rdm, 0);
764
+ }
765
+ PRINT (">");
766
+ break;
767
+ case 'I':
768
+ demangle_path (rdm, in_value);
769
+ if (in_value)
770
+ PRINT ("::");
771
+ PRINT ("<");
772
+ for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
773
+ {
774
+ if (i > 0)
775
+ PRINT (", ");
776
+ demangle_generic_arg (rdm);
777
+ }
778
+ PRINT (">");
779
+ break;
780
+ case 'B':
781
+ backref = parse_integer_62 (rdm);
782
+ if (!rdm->skipping_printing)
783
+ {
784
+ old_next = rdm->next;
785
+ rdm->next = backref;
786
+ demangle_path (rdm, in_value);
787
+ rdm->next = old_next;
788
+ }
789
+ break;
790
+ default:
791
+ goto fail_return;
792
+ }
793
+ goto pass_return;
794
+
795
+ fail_return:
796
+ rdm->errored = 1;
797
+ pass_return:
798
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
799
+ -- rdm->recursion;
800
+ }
801
+
802
+ static void
803
+ demangle_generic_arg (struct rust_demangler *rdm)
804
+ {
805
+ uint64_t lt;
806
+ if (eat (rdm, 'L'))
807
+ {
808
+ lt = parse_integer_62 (rdm);
809
+ print_lifetime_from_index (rdm, lt);
810
+ }
811
+ else if (eat (rdm, 'K'))
812
+ demangle_const (rdm);
813
+ else
814
+ demangle_type (rdm);
815
+ }
816
+
817
+ static const char *
818
+ basic_type (char tag)
819
+ {
820
+ switch (tag)
821
+ {
822
+ case 'b':
823
+ return "bool";
824
+ case 'c':
825
+ return "char";
826
+ case 'e':
827
+ return "str";
828
+ case 'u':
829
+ return "()";
830
+ case 'a':
831
+ return "i8";
832
+ case 's':
833
+ return "i16";
834
+ case 'l':
835
+ return "i32";
836
+ case 'x':
837
+ return "i64";
838
+ case 'n':
839
+ return "i128";
840
+ case 'i':
841
+ return "isize";
842
+ case 'h':
843
+ return "u8";
844
+ case 't':
845
+ return "u16";
846
+ case 'm':
847
+ return "u32";
848
+ case 'y':
849
+ return "u64";
850
+ case 'o':
851
+ return "u128";
852
+ case 'j':
853
+ return "usize";
854
+ case 'f':
855
+ return "f32";
856
+ case 'd':
857
+ return "f64";
858
+ case 'z':
859
+ return "!";
860
+ case 'p':
861
+ return "_";
862
+ case 'v':
863
+ return "...";
864
+
865
+ default:
866
+ return NULL;
867
+ }
868
+ }
869
+
870
+ static void
871
+ demangle_type (struct rust_demangler *rdm)
872
+ {
873
+ char tag;
874
+ size_t i, old_next, backref;
875
+ uint64_t lt, old_bound_lifetime_depth;
876
+ const char *basic;
877
+ struct rust_mangled_ident abi;
878
+
879
+ if (rdm->errored)
880
+ return;
881
+
882
+ tag = next (rdm);
883
+
884
+ basic = basic_type (tag);
885
+ if (basic)
886
+ {
887
+ PRINT (basic);
888
+ return;
889
+ }
890
+
891
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
892
+ {
893
+ ++ rdm->recursion;
894
+ if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
895
+ /* FIXME: There ought to be a way to report
896
+ that the recursion limit has been reached. */
897
+ {
898
+ rdm->errored = 1;
899
+ -- rdm->recursion;
900
+ return;
901
+ }
902
+ }
903
+
904
+ switch (tag)
905
+ {
906
+ case 'R':
907
+ case 'Q':
908
+ PRINT ("&");
909
+ if (eat (rdm, 'L'))
910
+ {
911
+ lt = parse_integer_62 (rdm);
912
+ if (lt)
913
+ {
914
+ print_lifetime_from_index (rdm, lt);
915
+ PRINT (" ");
916
+ }
917
+ }
918
+ if (tag != 'R')
919
+ PRINT ("mut ");
920
+ demangle_type (rdm);
921
+ break;
922
+ case 'P':
923
+ case 'O':
924
+ PRINT ("*");
925
+ if (tag != 'P')
926
+ PRINT ("mut ");
927
+ else
928
+ PRINT ("const ");
929
+ demangle_type (rdm);
930
+ break;
931
+ case 'A':
932
+ case 'S':
933
+ PRINT ("[");
934
+ demangle_type (rdm);
935
+ if (tag == 'A')
936
+ {
937
+ PRINT ("; ");
938
+ demangle_const (rdm);
939
+ }
940
+ PRINT ("]");
941
+ break;
942
+ case 'T':
943
+ PRINT ("(");
944
+ for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
945
+ {
946
+ if (i > 0)
947
+ PRINT (", ");
948
+ demangle_type (rdm);
949
+ }
950
+ if (i == 1)
951
+ PRINT (",");
952
+ PRINT (")");
953
+ break;
954
+ case 'F':
955
+ old_bound_lifetime_depth = rdm->bound_lifetime_depth;
956
+ demangle_binder (rdm);
957
+
958
+ if (eat (rdm, 'U'))
959
+ PRINT ("unsafe ");
960
+
961
+ if (eat (rdm, 'K'))
962
+ {
963
+ if (eat (rdm, 'C'))
964
+ {
965
+ abi.ascii = "C";
966
+ abi.ascii_len = 1;
967
+ }
968
+ else
969
+ {
970
+ abi = parse_ident (rdm);
971
+ if (!abi.ascii || abi.punycode)
972
+ {
973
+ rdm->errored = 1;
974
+ goto restore;
975
+ }
976
+ }
977
+
978
+ PRINT ("extern \"");
979
+
980
+ /* If the ABI had any `-`, they were replaced with `_`,
981
+ so the parts between `_` have to be re-joined with `-`. */
982
+ for (i = 0; i < abi.ascii_len; i++)
983
+ {
984
+ if (abi.ascii[i] == '_')
985
+ {
986
+ print_str (rdm, abi.ascii, i);
987
+ PRINT ("-");
988
+ abi.ascii += i + 1;
989
+ abi.ascii_len -= i + 1;
990
+ i = 0;
991
+ }
992
+ }
993
+ print_str (rdm, abi.ascii, abi.ascii_len);
994
+
995
+ PRINT ("\" ");
996
+ }
997
+
998
+ PRINT ("fn(");
999
+ for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1000
+ {
1001
+ if (i > 0)
1002
+ PRINT (", ");
1003
+ demangle_type (rdm);
1004
+ }
1005
+ PRINT (")");
1006
+
1007
+ if (eat (rdm, 'u'))
1008
+ {
1009
+ /* Skip printing the return type if it's 'u', i.e. `()`. */
1010
+ }
1011
+ else
1012
+ {
1013
+ PRINT (" -> ");
1014
+ demangle_type (rdm);
1015
+ }
1016
+
1017
+ /* Restore `bound_lifetime_depth` to outside the binder. */
1018
+ restore:
1019
+ rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1020
+ break;
1021
+ case 'D':
1022
+ PRINT ("dyn ");
1023
+
1024
+ old_bound_lifetime_depth = rdm->bound_lifetime_depth;
1025
+ demangle_binder (rdm);
1026
+
1027
+ for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1028
+ {
1029
+ if (i > 0)
1030
+ PRINT (" + ");
1031
+ demangle_dyn_trait (rdm);
1032
+ }
1033
+
1034
+ /* Restore `bound_lifetime_depth` to outside the binder. */
1035
+ rdm->bound_lifetime_depth = old_bound_lifetime_depth;
1036
+
1037
+ if (!eat (rdm, 'L'))
1038
+ {
1039
+ rdm->errored = 1;
1040
+ return;
1041
+ }
1042
+ lt = parse_integer_62 (rdm);
1043
+ if (lt)
1044
+ {
1045
+ PRINT (" + ");
1046
+ print_lifetime_from_index (rdm, lt);
1047
+ }
1048
+ break;
1049
+ case 'B':
1050
+ backref = parse_integer_62 (rdm);
1051
+ if (!rdm->skipping_printing)
1052
+ {
1053
+ old_next = rdm->next;
1054
+ rdm->next = backref;
1055
+ demangle_type (rdm);
1056
+ rdm->next = old_next;
1057
+ }
1058
+ break;
1059
+ default:
1060
+ /* Go back to the tag, so `demangle_path` also sees it. */
1061
+ rdm->next--;
1062
+ demangle_path (rdm, 0);
1063
+ }
1064
+
1065
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1066
+ -- rdm->recursion;
1067
+ }
1068
+
1069
+ /* A trait in a trait object may have some "existential projections"
1070
+ (i.e. associated type bindings) after it, which should be printed
1071
+ in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
1072
+ To this end, this method will keep the `<...>` of an 'I' path
1073
+ open, by omitting the `>`, and return `Ok(true)` in that case. */
1074
+ static int
1075
+ demangle_path_maybe_open_generics (struct rust_demangler *rdm)
1076
+ {
1077
+ int open;
1078
+ size_t i, old_next, backref;
1079
+
1080
+ open = 0;
1081
+
1082
+ if (rdm->errored)
1083
+ return open;
1084
+
1085
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1086
+ {
1087
+ ++ rdm->recursion;
1088
+ if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1089
+ {
1090
+ /* FIXME: There ought to be a way to report
1091
+ that the recursion limit has been reached. */
1092
+ rdm->errored = 1;
1093
+ goto end_of_func;
280
1094
  }
281
- break;
282
- case '_':
283
- /* If this is the start of a path component and the next
284
- character is an escape sequence, ignore the underscore. The
285
- mangler inserts an underscore to make sure the path
286
- component begins with a XID_Start character. */
287
- if ((in == sym || in[-1] == ':') && in[1] == '$')
288
- in++;
289
- else
290
- *out++ = *in++;
291
- break;
292
- case '.':
293
- if (in[1] == '.')
294
- {
295
- /* ".." becomes "::" */
296
- *out++ = ':';
297
- *out++ = ':';
298
- in += 2;
299
- }
300
- else
301
- {
302
- /* "." becomes "-" */
303
- *out++ = '-';
304
- in++;
305
- }
306
- break;
307
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
308
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
309
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
310
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
311
- case 'y': case 'z':
312
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
313
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
314
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
315
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
316
- case 'Y': case 'Z':
317
- case '0': case '1': case '2': case '3': case '4': case '5':
318
- case '6': case '7': case '8': case '9':
319
- case ':':
320
- *out++ = *in++;
321
- break;
322
- default:
323
- /* unexpected character in symbol, not looks_like_rust. */
324
- goto fail;
325
- }
326
- goto done;
327
-
328
- fail:
329
- *out++ = '?'; /* This is pretty lame, but it's hard to do better. */
330
- done:
331
- *out = '\0';
1095
+ }
1096
+
1097
+ if (eat (rdm, 'B'))
1098
+ {
1099
+ backref = parse_integer_62 (rdm);
1100
+ if (!rdm->skipping_printing)
1101
+ {
1102
+ old_next = rdm->next;
1103
+ rdm->next = backref;
1104
+ open = demangle_path_maybe_open_generics (rdm);
1105
+ rdm->next = old_next;
1106
+ }
1107
+ }
1108
+ else if (eat (rdm, 'I'))
1109
+ {
1110
+ demangle_path (rdm, 0);
1111
+ PRINT ("<");
1112
+ open = 1;
1113
+ for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
1114
+ {
1115
+ if (i > 0)
1116
+ PRINT (", ");
1117
+ demangle_generic_arg (rdm);
1118
+ }
1119
+ }
1120
+ else
1121
+ demangle_path (rdm, 0);
1122
+
1123
+ end_of_func:
1124
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1125
+ -- rdm->recursion;
1126
+
1127
+ return open;
1128
+ }
1129
+
1130
+ static void
1131
+ demangle_dyn_trait (struct rust_demangler *rdm)
1132
+ {
1133
+ int open;
1134
+ struct rust_mangled_ident name;
1135
+
1136
+ if (rdm->errored)
1137
+ return;
1138
+
1139
+ open = demangle_path_maybe_open_generics (rdm);
1140
+
1141
+ while (eat (rdm, 'p'))
1142
+ {
1143
+ if (!open)
1144
+ PRINT ("<");
1145
+ else
1146
+ PRINT (", ");
1147
+ open = 1;
1148
+
1149
+ name = parse_ident (rdm);
1150
+ print_ident (rdm, name);
1151
+ PRINT (" = ");
1152
+ demangle_type (rdm);
1153
+ }
1154
+
1155
+ if (open)
1156
+ PRINT (">");
332
1157
  }
333
1158
 
1159
+ static void
1160
+ demangle_const (struct rust_demangler *rdm)
1161
+ {
1162
+ char ty_tag;
1163
+ size_t old_next, backref;
1164
+
1165
+ if (rdm->errored)
1166
+ return;
1167
+
1168
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1169
+ {
1170
+ ++ rdm->recursion;
1171
+ if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
1172
+ /* FIXME: There ought to be a way to report
1173
+ that the recursion limit has been reached. */
1174
+ goto fail_return;
1175
+ }
1176
+
1177
+ if (eat (rdm, 'B'))
1178
+ {
1179
+ backref = parse_integer_62 (rdm);
1180
+ if (!rdm->skipping_printing)
1181
+ {
1182
+ old_next = rdm->next;
1183
+ rdm->next = backref;
1184
+ demangle_const (rdm);
1185
+ rdm->next = old_next;
1186
+ }
1187
+ goto pass_return;
1188
+ }
1189
+
1190
+ ty_tag = next (rdm);
1191
+ switch (ty_tag)
1192
+ {
1193
+ /* Placeholder. */
1194
+ case 'p':
1195
+ PRINT ("_");
1196
+ goto pass_return;
1197
+
1198
+ /* Unsigned integer types. */
1199
+ case 'h':
1200
+ case 't':
1201
+ case 'm':
1202
+ case 'y':
1203
+ case 'o':
1204
+ case 'j':
1205
+ demangle_const_uint (rdm);
1206
+ break;
1207
+
1208
+ /* Signed integer types. */
1209
+ case 'a':
1210
+ case 's':
1211
+ case 'l':
1212
+ case 'x':
1213
+ case 'n':
1214
+ case 'i':
1215
+ demangle_const_int (rdm);
1216
+ break;
1217
+
1218
+ /* Boolean. */
1219
+ case 'b':
1220
+ demangle_const_bool (rdm);
1221
+ break;
1222
+
1223
+ /* Character. */
1224
+ case 'c':
1225
+ demangle_const_char (rdm);
1226
+ break;
1227
+
1228
+ default:
1229
+ goto fail_return;
1230
+ }
1231
+
1232
+ if (!rdm->errored && rdm->verbose)
1233
+ {
1234
+ PRINT (": ");
1235
+ PRINT (basic_type (ty_tag));
1236
+ }
1237
+ goto pass_return;
1238
+
1239
+ fail_return:
1240
+ rdm->errored = 1;
1241
+ pass_return:
1242
+ if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
1243
+ -- rdm->recursion;
1244
+ }
1245
+
1246
+ static void
1247
+ demangle_const_uint (struct rust_demangler *rdm)
1248
+ {
1249
+ size_t hex_len;
1250
+ uint64_t value;
1251
+
1252
+ if (rdm->errored)
1253
+ return;
1254
+
1255
+ hex_len = parse_hex_nibbles (rdm, &value);
1256
+
1257
+ if (hex_len > 16)
1258
+ {
1259
+ /* Print anything that doesn't fit in `uint64_t` verbatim. */
1260
+ PRINT ("0x");
1261
+ print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
1262
+ }
1263
+ else if (hex_len > 0)
1264
+ print_uint64 (rdm, value);
1265
+ else
1266
+ rdm->errored = 1;
1267
+ }
1268
+
1269
+ static void
1270
+ demangle_const_int (struct rust_demangler *rdm)
1271
+ {
1272
+ if (eat (rdm, 'n'))
1273
+ PRINT ("-");
1274
+ demangle_const_uint (rdm);
1275
+ }
1276
+
1277
+ static void
1278
+ demangle_const_bool (struct rust_demangler *rdm)
1279
+ {
1280
+ uint64_t value;
1281
+
1282
+ if (parse_hex_nibbles (rdm, &value) != 1)
1283
+ {
1284
+ rdm->errored = 1;
1285
+ return;
1286
+ }
1287
+
1288
+ if (value == 0)
1289
+ PRINT ("false");
1290
+ else if (value == 1)
1291
+ PRINT ("true");
1292
+ else
1293
+ rdm->errored = 1;
1294
+ }
1295
+
1296
+ static void
1297
+ demangle_const_char (struct rust_demangler *rdm)
1298
+ {
1299
+ size_t hex_len;
1300
+ uint64_t value;
1301
+
1302
+ hex_len = parse_hex_nibbles (rdm, &value);
1303
+
1304
+ if (hex_len == 0 || hex_len > 8)
1305
+ {
1306
+ rdm->errored = 1;
1307
+ return;
1308
+ }
1309
+
1310
+ /* Match Rust's character "debug" output as best as we can. */
1311
+ PRINT ("'");
1312
+ if (value == '\t')
1313
+ PRINT ("\\t");
1314
+ else if (value == '\r')
1315
+ PRINT ("\\r");
1316
+ else if (value == '\n')
1317
+ PRINT ("\\n");
1318
+ else if (value > ' ' && value < '~')
1319
+ {
1320
+ /* Rust also considers many non-ASCII codepoints to be printable, but
1321
+ that logic is not easily ported to C. */
1322
+ char c = value;
1323
+ print_str (rdm, &c, 1);
1324
+ }
1325
+ else
1326
+ {
1327
+ PRINT ("\\u{");
1328
+ print_uint64_hex (rdm, value);
1329
+ PRINT ("}");
1330
+ }
1331
+ PRINT ("'");
1332
+ }
1333
+
1334
+ /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
1335
+ The hex digits must contain at least 5 distinct digits. */
334
1336
  static int
335
- unescape (const char **in, char **out, const char *seq, char value)
1337
+ is_legacy_prefixed_hash (struct rust_mangled_ident ident)
336
1338
  {
337
- size_t len = strlen (seq);
1339
+ uint16_t seen;
1340
+ int nibble;
1341
+ size_t i, count;
1342
+
1343
+ if (ident.ascii_len != 17 || ident.ascii[0] != 'h')
1344
+ return 0;
1345
+
1346
+ seen = 0;
1347
+ for (i = 0; i < 16; i++)
1348
+ {
1349
+ nibble = decode_lower_hex_nibble (ident.ascii[1 + i]);
1350
+ if (nibble < 0)
1351
+ return 0;
1352
+ seen |= (uint16_t)1 << nibble;
1353
+ }
1354
+
1355
+ /* Count how many distinct digits were seen. */
1356
+ count = 0;
1357
+ while (seen)
1358
+ {
1359
+ if (seen & 1)
1360
+ count++;
1361
+ seen >>= 1;
1362
+ }
1363
+
1364
+ return count >= 5;
1365
+ }
1366
+
1367
+ int
1368
+ rust_demangle_callback (const char *mangled, int options,
1369
+ demangle_callbackref callback, void *opaque)
1370
+ {
1371
+ const char *p;
1372
+ struct rust_demangler rdm;
1373
+ struct rust_mangled_ident ident;
1374
+
1375
+ rdm.sym = mangled;
1376
+ rdm.sym_len = 0;
1377
+
1378
+ rdm.callback_opaque = opaque;
1379
+ rdm.callback = callback;
1380
+
1381
+ rdm.next = 0;
1382
+ rdm.errored = 0;
1383
+ rdm.skipping_printing = 0;
1384
+ rdm.verbose = (options & DMGL_VERBOSE) != 0;
1385
+ rdm.version = 0;
1386
+ rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
1387
+ rdm.bound_lifetime_depth = 0;
338
1388
 
339
- if (strncmp (*in, seq, len))
1389
+ /* Rust symbols always start with _R (v0) or _ZN (legacy). */
1390
+ if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
1391
+ rdm.sym += 2;
1392
+ else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
1393
+ {
1394
+ rdm.sym += 3;
1395
+ rdm.version = -1;
1396
+ }
1397
+ else
340
1398
  return 0;
341
1399
 
342
- **out = value;
1400
+ /* Paths (v0) always start with uppercase characters. */
1401
+ if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
1402
+ return 0;
1403
+
1404
+ /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
1405
+ for (p = rdm.sym; *p; p++)
1406
+ {
1407
+ /* Rust v0 symbols can have '.' suffixes, ignore those. */
1408
+ if (rdm.version == 0 && *p == '.')
1409
+ break;
1410
+
1411
+ rdm.sym_len++;
1412
+
1413
+ if (*p == '_' || ISALNUM (*p))
1414
+ continue;
1415
+
1416
+ /* Legacy Rust symbols can also contain [.:$] characters.
1417
+ Or @ in the .suffix (which will be skipped, see below). */
1418
+ if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
1419
+ || *p == '@'))
1420
+ continue;
1421
+
1422
+ return 0;
1423
+ }
1424
+
1425
+ /* Legacy Rust symbols need to be handled separately. */
1426
+ if (rdm.version == -1)
1427
+ {
1428
+ /* Legacy Rust symbols always end with E. But can be followed by a
1429
+ .suffix (which we want to ignore). */
1430
+ int dot_suffix = 1;
1431
+ while (rdm.sym_len > 0 &&
1432
+ !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
1433
+ {
1434
+ dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
1435
+ rdm.sym_len--;
1436
+ }
1437
+
1438
+ if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
1439
+ return 0;
1440
+ rdm.sym_len--;
1441
+
1442
+ /* Legacy Rust symbols also always end with a path segment
1443
+ that encodes a 16 hex digit hash, i.e. '17h[a-f0-9]{16}'.
1444
+ This early check, before any parse_ident calls, should
1445
+ quickly filter out most C++ symbols unrelated to Rust. */
1446
+ if (!(rdm.sym_len > 19
1447
+ && !memcmp (&rdm.sym[rdm.sym_len - 19], "17h", 3)))
1448
+ return 0;
1449
+
1450
+ do
1451
+ {
1452
+ ident = parse_ident (&rdm);
1453
+ if (rdm.errored || !ident.ascii)
1454
+ return 0;
1455
+ }
1456
+ while (rdm.next < rdm.sym_len);
1457
+
1458
+ /* The last path segment should be the hash. */
1459
+ if (!is_legacy_prefixed_hash (ident))
1460
+ return 0;
1461
+
1462
+ /* Reset the state for a second pass, to print the symbol. */
1463
+ rdm.next = 0;
1464
+ if (!rdm.verbose && rdm.sym_len > 19)
1465
+ {
1466
+ /* Hide the last segment, containing the hash, if not verbose. */
1467
+ rdm.sym_len -= 19;
1468
+ }
1469
+
1470
+ do
1471
+ {
1472
+ if (rdm.next > 0)
1473
+ print_str (&rdm, "::", 2);
1474
+
1475
+ ident = parse_ident (&rdm);
1476
+ print_ident (&rdm, ident);
1477
+ }
1478
+ while (rdm.next < rdm.sym_len);
1479
+ }
1480
+ else
1481
+ {
1482
+ demangle_path (&rdm, 1);
1483
+
1484
+ /* Skip instantiating crate. */
1485
+ if (!rdm.errored && rdm.next < rdm.sym_len)
1486
+ {
1487
+ rdm.skipping_printing = 1;
1488
+ demangle_path (&rdm, 0);
1489
+ }
1490
+
1491
+ /* It's an error to not reach the end. */
1492
+ rdm.errored |= rdm.next != rdm.sym_len;
1493
+ }
1494
+
1495
+ return !rdm.errored;
1496
+ }
1497
+
1498
+ /* Growable string buffers. */
1499
+ struct str_buf
1500
+ {
1501
+ char *ptr;
1502
+ size_t len;
1503
+ size_t cap;
1504
+ int errored;
1505
+ };
1506
+
1507
+ static void
1508
+ str_buf_reserve (struct str_buf *buf, size_t extra)
1509
+ {
1510
+ size_t available, min_new_cap, new_cap;
1511
+ char *new_ptr;
1512
+
1513
+ /* Allocation failed before. */
1514
+ if (buf->errored)
1515
+ return;
1516
+
1517
+ available = buf->cap - buf->len;
1518
+
1519
+ if (extra <= available)
1520
+ return;
1521
+
1522
+ min_new_cap = buf->cap + (extra - available);
1523
+
1524
+ /* Check for overflows. */
1525
+ if (min_new_cap < buf->cap)
1526
+ {
1527
+ buf->errored = 1;
1528
+ return;
1529
+ }
1530
+
1531
+ new_cap = buf->cap;
1532
+
1533
+ if (new_cap == 0)
1534
+ new_cap = 4;
1535
+
1536
+ /* Double capacity until sufficiently large. */
1537
+ while (new_cap < min_new_cap)
1538
+ {
1539
+ new_cap *= 2;
1540
+
1541
+ /* Check for overflows. */
1542
+ if (new_cap < buf->cap)
1543
+ {
1544
+ buf->errored = 1;
1545
+ return;
1546
+ }
1547
+ }
1548
+
1549
+ new_ptr = (char *)realloc (buf->ptr, new_cap);
1550
+ if (new_ptr == NULL)
1551
+ {
1552
+ free (buf->ptr);
1553
+ buf->ptr = NULL;
1554
+ buf->len = 0;
1555
+ buf->cap = 0;
1556
+ buf->errored = 1;
1557
+ }
1558
+ else
1559
+ {
1560
+ buf->ptr = new_ptr;
1561
+ buf->cap = new_cap;
1562
+ }
1563
+ }
1564
+
1565
+ static void
1566
+ str_buf_append (struct str_buf *buf, const char *data, size_t len)
1567
+ {
1568
+ str_buf_reserve (buf, len);
1569
+ if (buf->errored)
1570
+ return;
1571
+
1572
+ memcpy (buf->ptr + buf->len, data, len);
1573
+ buf->len += len;
1574
+ }
1575
+
1576
+ static void
1577
+ str_buf_demangle_callback (const char *data, size_t len, void *opaque)
1578
+ {
1579
+ str_buf_append ((struct str_buf *)opaque, data, len);
1580
+ }
1581
+
1582
+ char *
1583
+ rust_demangle (const char *mangled, int options)
1584
+ {
1585
+ struct str_buf out;
1586
+ int success;
1587
+
1588
+ out.ptr = NULL;
1589
+ out.len = 0;
1590
+ out.cap = 0;
1591
+ out.errored = 0;
1592
+
1593
+ success = rust_demangle_callback (mangled, options,
1594
+ str_buf_demangle_callback, &out);
343
1595
 
344
- *in += len;
345
- *out += 1;
1596
+ if (!success)
1597
+ {
1598
+ free (out.ptr);
1599
+ return NULL;
1600
+ }
346
1601
 
347
- return 1;
1602
+ str_buf_append (&out, "\0", 1);
1603
+ return out.ptr;
348
1604
  }