commonmarker 0.23.1 → 0.23.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

@@ -38,3 +38,26 @@ void cmark_footnote_create(cmark_map *map, cmark_node *node) {
38
38
  cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
39
39
  return cmark_map_new(mem, footnote_free);
40
40
  }
41
+
42
+ // Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43
+ // unlink all of the footnote nodes before freeing their memory.
44
+ //
45
+ // Sometimes, two (unused) footnote nodes can end up referencing each other,
46
+ // which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47
+ // etc, can lead to a use-after-free error.
48
+ //
49
+ // Better to `unlink` every footnote node first, setting their next, prev, and
50
+ // parent pointers to NULL, and only then walk thru & free them up.
51
+ void cmark_unlink_footnotes_map(cmark_map *map) {
52
+ cmark_map_entry *ref;
53
+ cmark_map_entry *next;
54
+
55
+ ref = map->refs;
56
+ while(ref) {
57
+ next = ref->next;
58
+ if (((cmark_footnote *)ref)->node) {
59
+ cmark_node_unlink(((cmark_footnote *)ref)->node);
60
+ }
61
+ ref = next;
62
+ }
63
+ }
@@ -18,6 +18,8 @@ typedef struct cmark_footnote cmark_footnote;
18
18
  void cmark_footnote_create(cmark_map *map, cmark_node *node);
19
19
  cmark_map *cmark_footnote_map_new(cmark_mem *mem);
20
20
 
21
+ void cmark_unlink_footnotes_map(cmark_map *map);
22
+
21
23
  #ifdef __cplusplus
22
24
  }
23
25
  #endif
@@ -59,16 +59,30 @@ static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size
59
59
  cmark_strbuf_put(html, data, (bufsize_t)len);
60
60
  }
61
61
 
62
- static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html) {
62
+ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *html, cmark_node *node) {
63
63
  if (renderer->written_footnote_ix >= renderer->footnote_ix)
64
64
  return false;
65
65
  renderer->written_footnote_ix = renderer->footnote_ix;
66
66
 
67
- cmark_strbuf_puts(html, "<a href=\"#fnref");
68
- char n[32];
69
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
70
- cmark_strbuf_puts(html, n);
71
- cmark_strbuf_puts(html, "\" class=\"footnote-backref\">↩</a>");
67
+ cmark_strbuf_puts(html, "<a href=\"#fnref-");
68
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
69
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩</a>");
70
+
71
+ if (node->footnote.def_count > 1)
72
+ {
73
+ for(int i = 2; i <= node->footnote.def_count; i++) {
74
+ char n[32];
75
+ snprintf(n, sizeof(n), "%d", i);
76
+
77
+ cmark_strbuf_puts(html, " <a href=\"#fnref-");
78
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
79
+ cmark_strbuf_puts(html, "-");
80
+ cmark_strbuf_puts(html, n);
81
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩<sup class=\"footnote-ref\">");
82
+ cmark_strbuf_puts(html, n);
83
+ cmark_strbuf_puts(html, "</sup></a>");
84
+ }
85
+ }
72
86
 
73
87
  return true;
74
88
  }
@@ -273,7 +287,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
273
287
  } else {
274
288
  if (parent->type == CMARK_NODE_FOOTNOTE_DEFINITION && node->next == NULL) {
275
289
  cmark_strbuf_putc(html, ' ');
276
- S_put_footnote_backref(renderer, html);
290
+ S_put_footnote_backref(renderer, html, parent);
277
291
  }
278
292
  cmark_strbuf_puts(html, "</p>\n");
279
293
  }
@@ -392,16 +406,15 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
392
406
  case CMARK_NODE_FOOTNOTE_DEFINITION:
393
407
  if (entering) {
394
408
  if (renderer->footnote_ix == 0) {
395
- cmark_strbuf_puts(html, "<section class=\"footnotes\">\n<ol>\n");
409
+ cmark_strbuf_puts(html, "<section class=\"footnotes\" data-footnotes>\n<ol>\n");
396
410
  }
397
411
  ++renderer->footnote_ix;
398
- cmark_strbuf_puts(html, "<li id=\"fn");
399
- char n[32];
400
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
401
- cmark_strbuf_puts(html, n);
412
+
413
+ cmark_strbuf_puts(html, "<li id=\"fn-");
414
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
402
415
  cmark_strbuf_puts(html, "\">\n");
403
416
  } else {
404
- if (S_put_footnote_backref(renderer, html)) {
417
+ if (S_put_footnote_backref(renderer, html, node)) {
405
418
  cmark_strbuf_putc(html, '\n');
406
419
  }
407
420
  cmark_strbuf_puts(html, "</li>\n");
@@ -410,12 +423,20 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
410
423
 
411
424
  case CMARK_NODE_FOOTNOTE_REFERENCE:
412
425
  if (entering) {
413
- cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn");
414
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
415
- cmark_strbuf_puts(html, "\" id=\"fnref");
416
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
417
- cmark_strbuf_puts(html, "\">");
418
- cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
426
+ cmark_strbuf_puts(html, "<sup class=\"footnote-ref\"><a href=\"#fn-");
427
+ houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
428
+ cmark_strbuf_puts(html, "\" id=\"fnref-");
429
+ houdini_escape_href(html, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
430
+
431
+ if (node->footnote.ref_ix > 1) {
432
+ char n[32];
433
+ snprintf(n, sizeof(n), "%d", node->footnote.ref_ix);
434
+ cmark_strbuf_puts(html, "-");
435
+ cmark_strbuf_puts(html, n);
436
+ }
437
+
438
+ cmark_strbuf_puts(html, "\" data-footnote-ref>");
439
+ houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
419
440
  cmark_strbuf_puts(html, "</a></sup>");
420
441
  }
421
442
  break;
@@ -1137,19 +1137,77 @@ noMatch:
1137
1137
  // What if we're a footnote link?
1138
1138
  if (parser->options & CMARK_OPT_FOOTNOTES &&
1139
1139
  opener->inl_text->next &&
1140
- opener->inl_text->next->type == CMARK_NODE_TEXT &&
1141
- !opener->inl_text->next->next) {
1140
+ opener->inl_text->next->type == CMARK_NODE_TEXT) {
1141
+
1142
1142
  cmark_chunk *literal = &opener->inl_text->next->as.literal;
1143
- if (literal->len > 1 && literal->data[0] == '^') {
1144
- inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1145
- inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1);
1146
- inl->start_line = inl->end_line = subj->line;
1147
- inl->start_column = opener->inl_text->start_column;
1148
- inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1149
- cmark_node_insert_before(opener->inl_text, inl);
1150
- cmark_node_free(opener->inl_text->next);
1151
- cmark_node_free(opener->inl_text);
1143
+
1144
+ // look back to the opening '[', and skip ahead to the next character
1145
+ // if we're looking at a '[^' sequence, and there is other text or nodes
1146
+ // after the ^, let's call it a footnote reference.
1147
+ if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) {
1148
+
1149
+ // Before we got this far, the `handle_close_bracket` function may have
1150
+ // advanced the current state beyond our footnote's actual closing
1151
+ // bracket, ie if it went looking for a `link_label`.
1152
+ // Let's just rewind the subject's position:
1153
+ subj->pos = initial_pos;
1154
+
1155
+ cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1156
+
1157
+ // the start and end of the footnote ref is the opening and closing brace
1158
+ // i.e. the subject's current position, and the opener's start_column
1159
+ int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
1160
+ int fnref_start_column = opener->inl_text->start_column;
1161
+
1162
+ // any given node delineates a substring of the line being processed,
1163
+ // with the remainder of the line being pointed to thru its 'literal'
1164
+ // struct member.
1165
+ // here, we copy the literal's pointer, moving it past the '^' character
1166
+ // for a length equal to the size of footnote reference text.
1167
+ // i.e. end_col minus start_col, minus the [ and the ^ characters
1168
+ //
1169
+ // this copies the footnote reference string, even if between the
1170
+ // `opener` and the subject's current position there are other nodes
1171
+ //
1172
+ // (first, check for underflows)
1173
+ if ((fnref_start_column + 2) <= fnref_end_column) {
1174
+ fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
1175
+ } else {
1176
+ fnref->as.literal = cmark_chunk_dup(literal, 1, 0);
1177
+ }
1178
+
1179
+ fnref->start_line = fnref->end_line = subj->line;
1180
+ fnref->start_column = fnref_start_column;
1181
+ fnref->end_column = fnref_end_column;
1182
+
1183
+ // we then replace the opener with this new fnref node, the net effect
1184
+ // being replacing the opening '[' text node with a `^footnote-ref]` node.
1185
+ cmark_node_insert_before(opener->inl_text, fnref);
1186
+
1152
1187
  process_emphasis(parser, subj, opener->previous_delimiter);
1188
+ // sometimes, the footnote reference text gets parsed into multiple nodes
1189
+ // i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
1190
+ // this happens for ex with the autolink extension. when the autolinker
1191
+ // finds the 'w' character, it will split the text into multiple nodes
1192
+ // in hopes of being able to match a 'www.' substring.
1193
+ //
1194
+ // because this function is called one character at a time via the
1195
+ // `parse_inlines` function, and the current subj->pos is pointing at the
1196
+ // closing ] brace, and because we copy all the text between the [ ]
1197
+ // braces, we should be able to safely ignore and delete any nodes after
1198
+ // the opener->inl_text->next.
1199
+ //
1200
+ // therefore, here we walk thru the list and free them all up
1201
+ cmark_node *next_node;
1202
+ cmark_node *current_node = opener->inl_text->next;
1203
+ while(current_node) {
1204
+ next_node = current_node->next;
1205
+ cmark_node_free(current_node);
1206
+ current_node = next_node;
1207
+ }
1208
+
1209
+ cmark_node_free(opener->inl_text);
1210
+
1153
1211
  pop_bracket(subj);
1154
1212
  return NULL;
1155
1213
  }
@@ -76,6 +76,13 @@ struct cmark_node {
76
76
 
77
77
  cmark_syntax_extension *extension;
78
78
 
79
+ union {
80
+ int ref_ix;
81
+ int def_count;
82
+ } footnote;
83
+
84
+ cmark_node *parent_footnote_def;
85
+
79
86
  union {
80
87
  cmark_chunk literal;
81
88
  cmark_list list;
@@ -114,60 +114,87 @@ static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsi
114
114
  static table_row *row_from_string(cmark_syntax_extension *self,
115
115
  cmark_parser *parser, unsigned char *string,
116
116
  int len) {
117
+ // Parses a single table row. It has the following form:
118
+ // `delim? table_cell (delim table_cell)* delim? newline`
119
+ // Note that cells are allowed to be empty.
120
+ //
121
+ // From the GitHub-flavored Markdown specification:
122
+ //
123
+ // > Each row consists of cells containing arbitrary text, in which inlines
124
+ // > are parsed, separated by pipes (|). A leading and trailing pipe is also
125
+ // > recommended for clarity of reading, and if there’s otherwise parsing
126
+ // > ambiguity.
127
+
117
128
  table_row *row = NULL;
118
129
  bufsize_t cell_matched = 1, pipe_matched = 1, offset;
119
- int cell_end_offset;
130
+ int expect_more_cells = 1;
131
+ int row_end_offset = 0;
120
132
 
121
133
  row = (table_row *)parser->mem->calloc(1, sizeof(table_row));
122
134
  row->n_columns = 0;
123
135
  row->cells = NULL;
124
136
 
137
+ // Scan past the (optional) leading pipe.
125
138
  offset = scan_table_cell_end(string, len, 0);
126
139
 
127
140
  // Parse the cells of the row. Stop if we reach the end of the input, or if we
128
141
  // cannot detect any more cells.
129
- while (offset < len && (cell_matched || pipe_matched)) {
142
+ while (offset < len && expect_more_cells) {
130
143
  cell_matched = scan_table_cell(string, len, offset);
131
144
  pipe_matched = scan_table_cell_end(string, len, offset + cell_matched);
132
145
 
133
146
  if (cell_matched || pipe_matched) {
134
- cell_end_offset = offset + cell_matched - 1;
147
+ // We are guaranteed to have a cell, since (1) either we found some
148
+ // content and cell_matched, or (2) we found an empty cell followed by a
149
+ // pipe.
150
+ cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
151
+ cell_matched);
152
+ cmark_strbuf_trim(cell_buf);
153
+
154
+ node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
155
+ cell->buf = cell_buf;
156
+ cell->start_offset = offset;
157
+ cell->end_offset = offset + cell_matched - 1;
158
+
159
+ while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
160
+ --cell->start_offset;
161
+ ++cell->internal_offset;
162
+ }
163
+
164
+ row->n_columns += 1;
165
+ row->cells = cmark_llist_append(parser->mem, row->cells, cell);
166
+ }
167
+
168
+ offset += cell_matched + pipe_matched;
169
+
170
+ if (pipe_matched) {
171
+ expect_more_cells = 1;
172
+ } else {
173
+ // We've scanned the last cell. Check if we have reached the end of the row
174
+ row_end_offset = scan_table_row_end(string, len, offset);
175
+ offset += row_end_offset;
135
176
 
136
- if (string[cell_end_offset] == '\n' || string[cell_end_offset] == '\r') {
137
- row->paragraph_offset = cell_end_offset;
177
+ // If the end of the row is not the end of the input,
178
+ // the row is not a real row but potentially part of the paragraph
179
+ // preceding the table.
180
+ if (row_end_offset && offset != len) {
181
+ row->paragraph_offset = offset;
138
182
 
139
183
  cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell);
140
184
  row->cells = NULL;
141
185
  row->n_columns = 0;
142
- } else {
143
- cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
144
- cell_matched);
145
- cmark_strbuf_trim(cell_buf);
146
-
147
- node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
148
- cell->buf = cell_buf;
149
- cell->start_offset = offset;
150
- cell->end_offset = cell_end_offset;
151
-
152
- while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
153
- --cell->start_offset;
154
- ++cell->internal_offset;
155
- }
156
186
 
157
- row->n_columns += 1;
158
- row->cells = cmark_llist_append(parser->mem, row->cells, cell);
159
- }
160
- }
187
+ // Scan past the (optional) leading pipe.
188
+ offset += scan_table_cell_end(string, len, offset);
161
189
 
162
- offset += cell_matched + pipe_matched;
163
-
164
- if (!pipe_matched) {
165
- pipe_matched = scan_table_row_end(string, len, offset);
166
- offset += pipe_matched;
190
+ expect_more_cells = 1;
191
+ } else {
192
+ expect_more_cells = 0;
193
+ }
167
194
  }
168
195
  }
169
196
 
170
- if (offset != len || !row->n_columns) {
197
+ if (offset != len || row->n_columns == 0) {
171
198
  free_table_row(parser->mem, row);
172
199
  row = NULL;
173
200
  }
@@ -199,8 +226,6 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
199
226
  cmark_parser *parser,
200
227
  cmark_node *parent_container,
201
228
  unsigned char *input, int len) {
202
- bufsize_t matched =
203
- scan_table_start(input, len, cmark_parser_get_first_nonspace(parser));
204
229
  cmark_node *table_header;
205
230
  table_row *header_row = NULL;
206
231
  table_row *marker_row = NULL;
@@ -208,41 +233,37 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
208
233
  const char *parent_string;
209
234
  uint16_t i;
210
235
 
211
- if (!matched)
212
- return parent_container;
213
-
214
- parent_string = cmark_node_get_string_content(parent_container);
215
-
216
- cmark_arena_push();
217
-
218
- header_row = row_from_string(self, parser, (unsigned char *)parent_string,
219
- (int)strlen(parent_string));
220
-
221
- if (!header_row) {
222
- free_table_row(parser->mem, header_row);
223
- cmark_arena_pop();
236
+ if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) {
224
237
  return parent_container;
225
238
  }
226
239
 
240
+ // Since scan_table_start was successful, we must have a marker row.
227
241
  marker_row = row_from_string(self, parser,
228
242
  input + cmark_parser_get_first_nonspace(parser),
229
243
  len - cmark_parser_get_first_nonspace(parser));
230
-
231
244
  assert(marker_row);
232
245
 
233
- if (header_row->n_columns != marker_row->n_columns) {
234
- free_table_row(parser->mem, header_row);
246
+ cmark_arena_push();
247
+
248
+ // Check for a matching header row. We call `row_from_string` with the entire
249
+ // (potentially long) parent container as input, but this should be safe since
250
+ // `row_from_string` bails out early if it does not find a row.
251
+ parent_string = cmark_node_get_string_content(parent_container);
252
+ header_row = row_from_string(self, parser, (unsigned char *)parent_string,
253
+ (int)strlen(parent_string));
254
+ if (!header_row || header_row->n_columns != marker_row->n_columns) {
235
255
  free_table_row(parser->mem, marker_row);
256
+ free_table_row(parser->mem, header_row);
236
257
  cmark_arena_pop();
237
258
  return parent_container;
238
259
  }
239
260
 
240
261
  if (cmark_arena_pop()) {
262
+ marker_row = row_from_string(
263
+ self, parser, input + cmark_parser_get_first_nonspace(parser),
264
+ len - cmark_parser_get_first_nonspace(parser));
241
265
  header_row = row_from_string(self, parser, (unsigned char *)parent_string,
242
266
  (int)strlen(parent_string));
243
- marker_row = row_from_string(self, parser,
244
- input + cmark_parser_get_first_nonspace(parser),
245
- len - cmark_parser_get_first_nonspace(parser));
246
267
  }
247
268
 
248
269
  if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) {
@@ -257,9 +278,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
257
278
  }
258
279
 
259
280
  cmark_node_set_syntax_extension(parent_container, self);
260
-
261
281
  parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table));
262
-
263
282
  set_n_table_columns(parent_container, header_row->n_columns);
264
283
 
265
284
  uint8_t *alignments =
@@ -13,7 +13,7 @@ module CommonMarker
13
13
  SMART: (1 << 10),
14
14
  LIBERAL_HTML_TAG: (1 << 12),
15
15
  FOOTNOTES: (1 << 13),
16
- STRIKETHROUGH_DOUBLE_TILDE: (1 << 14),
16
+ STRIKETHROUGH_DOUBLE_TILDE: (1 << 14)
17
17
  }.freeze,
18
18
  render: {
19
19
  DEFAULT: 0,
@@ -28,7 +28,7 @@ module CommonMarker
28
28
  FOOTNOTES: (1 << 13),
29
29
  STRIKETHROUGH_DOUBLE_TILDE: (1 << 14),
30
30
  TABLE_PREFER_STYLE_ATTRIBUTES: (1 << 15),
31
- FULL_INFO_STRING: (1 << 16),
31
+ FULL_INFO_STRING: (1 << 16)
32
32
  }.freeze,
33
33
  format: %i[html xml commonmark plaintext].freeze
34
34
  }.freeze
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'commonmarker/node/inspect'
4
+
5
+ module CommonMarker
6
+ class RenderError < StandardError
7
+ PREAMBLE = 'There was an error rendering'
8
+ def initialize(error)
9
+ super("#{PREAMBLE}: #{error.class} #{error.message}")
10
+ end
11
+ end
12
+ end