markly 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/conduct.md +133 -0
  4. data/ext/markly/arena.c +9 -8
  5. data/ext/markly/autolink.c +217 -134
  6. data/ext/markly/blocks.c +27 -2
  7. data/ext/markly/cmark-gfm-core-extensions.h +11 -11
  8. data/ext/markly/cmark-gfm-extension_api.h +1 -0
  9. data/ext/markly/cmark-gfm.h +18 -2
  10. data/ext/markly/cmark.c +3 -3
  11. data/ext/markly/commonmark.c +19 -34
  12. data/ext/markly/extconf.rb +8 -1
  13. data/ext/markly/html.c +22 -6
  14. data/ext/markly/inlines.c +148 -51
  15. data/ext/markly/latex.c +6 -4
  16. data/ext/markly/man.c +7 -11
  17. data/ext/markly/map.c +11 -4
  18. data/ext/markly/map.h +5 -2
  19. data/ext/markly/markly.c +582 -586
  20. data/ext/markly/markly.h +1 -1
  21. data/ext/markly/node.c +76 -10
  22. data/ext/markly/node.h +42 -1
  23. data/ext/markly/parser.h +1 -0
  24. data/ext/markly/plaintext.c +12 -29
  25. data/ext/markly/references.c +1 -0
  26. data/ext/markly/render.c +15 -7
  27. data/ext/markly/scanners.c +13916 -10380
  28. data/ext/markly/scanners.h +8 -0
  29. data/ext/markly/scanners.re +47 -8
  30. data/ext/markly/strikethrough.c +1 -1
  31. data/ext/markly/table.c +81 -31
  32. data/ext/markly/xml.c +2 -1
  33. data/lib/markly/flags.rb +16 -0
  34. data/lib/markly/node/inspect.rb +59 -53
  35. data/lib/markly/node.rb +125 -58
  36. data/lib/markly/renderer/generic.rb +129 -124
  37. data/lib/markly/renderer/html.rb +294 -275
  38. data/lib/markly/version.rb +7 -1
  39. data/lib/markly.rb +36 -30
  40. data/license.md +39 -0
  41. data/readme.md +36 -0
  42. data.tar.gz.sig +0 -0
  43. metadata +61 -29
  44. metadata.gz.sig +0 -0
  45. data/bin/markly +0 -94
  46. data/lib/markly/markly.bundle +0 -0
data/ext/markly/blocks.c CHANGED
@@ -8,6 +8,7 @@
8
8
  #include <stdlib.h>
9
9
  #include <assert.h>
10
10
  #include <stdio.h>
11
+ #include <limits.h>
11
12
 
12
13
  #include "cmark_ctype.h"
13
14
  #include "syntax_extension.h"
@@ -26,6 +27,14 @@
26
27
  #define CODE_INDENT 4
27
28
  #define TAB_STOP 4
28
29
 
30
+ /**
31
+ * Very deeply nested lists can cause quadratic performance issues.
32
+ * This constant is used in open_new_blocks() to limit the nesting
33
+ * depth. It is unlikely that a non-contrived markdown document will
34
+ * be nested this deeply.
35
+ */
36
+ #define MAX_LIST_DEPTH 100
37
+
29
38
  #ifndef MIN
30
39
  #define MIN(x, y) ((x < y) ? x : y)
31
40
  #endif
@@ -639,6 +648,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
639
648
  }
640
649
 
641
650
  finalize(parser, parser->root);
651
+
652
+ // Limit total size of extra content created from reference links to
653
+ // document size to avoid superlinear growth. Always allow 100KB.
654
+ if (parser->total_size > 100000)
655
+ parser->refmap->max_ref_size = parser->total_size;
656
+ else
657
+ parser->refmap->max_ref_size = 100000;
658
+
642
659
  process_inlines(parser, parser->refmap, parser->options);
643
660
  if (parser->options & CMARK_OPT_FOOTNOTES)
644
661
  process_footnotes(parser);
@@ -698,6 +715,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
698
715
  const unsigned char *end = buffer + len;
699
716
  static const uint8_t repl[] = {239, 191, 189};
700
717
 
718
+ if (len > UINT_MAX - parser->total_size)
719
+ parser->total_size = UINT_MAX;
720
+ else
721
+ parser->total_size += len;
722
+
701
723
  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
702
724
  // skip NL if last buffer ended with CR ; see #117
703
725
  buffer++;
@@ -1105,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1105
1127
  bool has_content;
1106
1128
  int save_offset;
1107
1129
  int save_column;
1130
+ size_t depth = 0;
1108
1131
 
1109
1132
  while (cont_type != CMARK_NODE_CODE_BLOCK &&
1110
1133
  cont_type != CMARK_NODE_HTML_BLOCK) {
1111
-
1134
+ depth++;
1112
1135
  S_find_first_nonspace(parser, input);
1113
1136
  indented = parser->indent >= CODE_INDENT;
1114
1137
 
@@ -1197,12 +1220,13 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1197
1220
  parser->options & CMARK_OPT_FOOTNOTES &&
1198
1221
  (matched = scan_footnote_definition(input, parser->first_nonspace))) {
1199
1222
  cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
1200
- cmark_chunk_to_cstr(parser->mem, &c);
1201
1223
 
1202
1224
  while (c.data[c.len - 1] != ']')
1203
1225
  --c.len;
1204
1226
  --c.len;
1205
1227
 
1228
+ cmark_chunk_to_cstr(parser->mem, &c);
1229
+
1206
1230
  S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
1207
1231
  *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
1208
1232
  (*container)->as.literal = c;
@@ -1210,6 +1234,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1210
1234
  (*container)->internal_offset = matched;
1211
1235
  } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1212
1236
  parser->indent < 4 &&
1237
+ depth < MAX_LIST_DEPTH &&
1213
1238
  (matched = parse_list_marker(
1214
1239
  parser->mem, input, parser->first_nonspace,
1215
1240
  (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
@@ -6,45 +6,45 @@ extern "C" {
6
6
  #endif
7
7
 
8
8
  #include "cmark-gfm-extension_api.h"
9
- #include "cmark-gfm-extensions_export.h"
10
- #include "config.h" // for bool
9
+ #include "cmark-gfm_export.h"
10
+ #include <stdbool.h>
11
11
  #include <stdint.h>
12
12
 
13
- CMARK_GFM_EXTENSIONS_EXPORT
13
+ CMARK_GFM_EXPORT
14
14
  void cmark_gfm_core_extensions_ensure_registered(void);
15
15
 
16
- CMARK_GFM_EXTENSIONS_EXPORT
16
+ CMARK_GFM_EXPORT
17
17
  uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
18
18
 
19
19
  /** Sets the number of columns for the table, returning 1 on success and 0 on error.
20
20
  */
21
- CMARK_GFM_EXTENSIONS_EXPORT
21
+ CMARK_GFM_EXPORT
22
22
  int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
23
23
 
24
- CMARK_GFM_EXTENSIONS_EXPORT
24
+ CMARK_GFM_EXPORT
25
25
  uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
26
26
 
27
27
  /** Sets the alignments for the table, returning 1 on success and 0 on error.
28
28
  */
29
- CMARK_GFM_EXTENSIONS_EXPORT
29
+ CMARK_GFM_EXPORT
30
30
  int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
31
31
 
32
- CMARK_GFM_EXTENSIONS_EXPORT
32
+ CMARK_GFM_EXPORT
33
33
  int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
34
34
 
35
35
  /** Sets whether the node is a table header row, returning 1 on success and 0 on error.
36
36
  */
37
- CMARK_GFM_EXTENSIONS_EXPORT
37
+ CMARK_GFM_EXPORT
38
38
  int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
39
39
 
40
- CMARK_GFM_EXTENSIONS_EXPORT
40
+ CMARK_GFM_EXPORT
41
41
  bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
42
42
  /* For backwards compatibility */
43
43
  #define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
44
44
 
45
45
  /** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
46
46
  */
47
- CMARK_GFM_EXTENSIONS_EXPORT
47
+ CMARK_GFM_EXPORT
48
48
  int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
49
49
 
50
50
  #ifdef __cplusplus
@@ -114,6 +114,7 @@ typedef struct delimiter {
114
114
  struct delimiter *previous;
115
115
  struct delimiter *next;
116
116
  cmark_node *inl_text;
117
+ bufsize_t position;
117
118
  bufsize_t length;
118
119
  unsigned char delim_char;
119
120
  int can_open;
@@ -111,13 +111,13 @@ typedef struct cmark_mem {
111
111
  * realloc and free.
112
112
  */
113
113
  CMARK_GFM_EXPORT
114
- cmark_mem *cmark_get_default_mem_allocator();
114
+ cmark_mem *cmark_get_default_mem_allocator(void);
115
115
 
116
116
  /** An arena allocator; uses system calloc to allocate large
117
117
  * slabs of memory. Memory in these slabs is not reused at all.
118
118
  */
119
119
  CMARK_GFM_EXPORT
120
- cmark_mem *cmark_get_arena_mem_allocator();
120
+ cmark_mem *cmark_get_arena_mem_allocator(void);
121
121
 
122
122
  /** Resets the arena allocator, quickly returning all used memory
123
123
  * to the operating system.
@@ -225,6 +225,11 @@ CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
225
225
  */
226
226
  CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
227
227
 
228
+ /** Returns the footnote reference of 'node', or NULL if 'node' doesn't have a
229
+ * footnote reference.
230
+ */
231
+ CMARK_GFM_EXPORT cmark_node *cmark_node_parent_footnote_def(cmark_node *node);
232
+
228
233
  /**
229
234
  * ## Iterator
230
235
  *
@@ -408,6 +413,17 @@ CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
408
413
  */
409
414
  CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
410
415
 
416
+ /**
417
+ * Returns item index of 'node'. This is only used when rendering output
418
+ * formats such as commonmark, which need to output the index. It is not
419
+ * required for formats such as html or latex.
420
+ */
421
+ CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node);
422
+
423
+ /** Sets item index of 'node'. Returns 1 on success, 0 on failure.
424
+ */
425
+ CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx);
426
+
411
427
  /** Returns the info string from a fenced code block.
412
428
  */
413
429
  CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
data/ext/markly/cmark.c CHANGED
@@ -10,9 +10,9 @@
10
10
  cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION;
11
11
  cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE;
12
12
 
13
- int cmark_version() { return CMARK_GFM_VERSION; }
13
+ int cmark_version(void) { return CMARK_GFM_VERSION; }
14
14
 
15
- const char *cmark_version_string() { return CMARK_GFM_VERSION_STRING; }
15
+ const char *cmark_version_string(void) { return CMARK_GFM_VERSION_STRING; }
16
16
 
17
17
  static void *xcalloc(size_t nmem, size_t size) {
18
18
  void *ptr = calloc(nmem, size);
@@ -38,7 +38,7 @@ static void xfree(void *ptr) {
38
38
 
39
39
  cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree};
40
40
 
41
- cmark_mem *cmark_get_default_mem_allocator() {
41
+ cmark_mem *cmark_get_default_mem_allocator(void) {
42
42
  return &CMARK_DEFAULT_MEM_ALLOCATOR;
43
43
  }
44
44
 
@@ -153,23 +153,8 @@ static bool is_autolink(cmark_node *node) {
153
153
  link_text->as.literal.len) == 0);
154
154
  }
155
155
 
156
- // if node is a block node, returns node.
157
- // otherwise returns first block-level node that is an ancestor of node.
158
- // if there is no block-level ancestor, returns NULL.
159
- static cmark_node *get_containing_block(cmark_node *node) {
160
- while (node) {
161
- if (CMARK_NODE_BLOCK_P(node)) {
162
- return node;
163
- } else {
164
- node = node->parent;
165
- }
166
- }
167
- return NULL;
168
- }
169
-
170
156
  static int S_render_node(cmark_renderer *renderer, cmark_node *node,
171
157
  cmark_event_type ev_type, int options) {
172
- cmark_node *tmp;
173
158
  int list_number;
174
159
  cmark_delim_type list_delim;
175
160
  int numticks;
@@ -180,7 +165,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
180
165
  char fencechar[2] = {'\0', '\0'};
181
166
  size_t info_len, code_len;
182
167
  char listmarker[LISTMARKER_SIZE];
183
- char *emph_delim;
168
+ const char *emph_delim;
184
169
  bool first_in_list_item;
185
170
  bufsize_t marker_width;
186
171
  bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
@@ -189,14 +174,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
189
174
  // Don't adjust tight list status til we've started the list.
190
175
  // Otherwise we loose the blank line between a paragraph and
191
176
  // a following list.
192
- if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
193
- tmp = get_containing_block(node);
194
- renderer->in_tight_list_item =
195
- tmp && // tmp might be NULL if there is no containing block
196
- ((tmp->type == CMARK_NODE_ITEM &&
197
- cmark_node_get_list_tight(tmp->parent)) ||
198
- (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
199
- cmark_node_get_list_tight(tmp->parent->parent)));
177
+ if (entering) {
178
+ if (node->parent && node->parent->type == CMARK_NODE_ITEM) {
179
+ renderer->in_tight_list_item = node->parent->parent->as.list.tight;
180
+ }
181
+ } else {
182
+ if (node->type == CMARK_NODE_LIST) {
183
+ renderer->in_tight_list_item =
184
+ node->parent &&
185
+ node->parent->type == CMARK_NODE_ITEM &&
186
+ node->parent->parent->as.list.tight;
187
+ }
200
188
  }
201
189
 
202
190
  if (node->extension && node->extension->commonmark_render_func) {
@@ -234,13 +222,8 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
234
222
  if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
235
223
  marker_width = 4;
236
224
  } else {
237
- list_number = cmark_node_get_list_start(node->parent);
225
+ list_number = cmark_node_get_item_index(node);
238
226
  list_delim = cmark_node_get_list_delim(node->parent);
239
- tmp = node;
240
- while (tmp->prev) {
241
- tmp = tmp->prev;
242
- list_number += 1;
243
- }
244
227
  // we ensure a width of at least 4 so
245
228
  // we get nice transition from single digits
246
229
  // to double
@@ -405,10 +388,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
405
388
  break;
406
389
 
407
390
  case CMARK_NODE_STRONG:
408
- if (entering) {
409
- LIT("**");
410
- } else {
411
- LIT("**");
391
+ if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
392
+ if (entering) {
393
+ LIT("**");
394
+ } else {
395
+ LIT("**");
396
+ }
412
397
  }
413
398
  break;
414
399
 
@@ -1,6 +1,13 @@
1
+ #!/usr/bin/env ruby
1
2
  # frozen_string_literal: true
2
3
 
3
- # Loads mkmf which is used to make makefiles for Ruby extensions
4
+ # Released under the MIT License.
5
+ # Copyright, 2014, by John MacFarlane.
6
+ # Copyright, 2015-2019, by Garen Torikian.
7
+ # Copyright, 2016-2017, by Yuki Izumi.
8
+ # Copyright, 2017, by Ashe Connor.
9
+ # Copyright, 2020-2023, by Samuel Williams.
10
+
4
11
  require 'mkmf'
5
12
 
6
13
  $CFLAGS << " -O3 -std=c99"
data/ext/markly/html.c CHANGED
@@ -63,10 +63,16 @@ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *
63
63
  if (renderer->written_footnote_ix >= renderer->footnote_ix)
64
64
  return false;
65
65
  renderer->written_footnote_ix = renderer->footnote_ix;
66
+ char m[32];
67
+ snprintf(m, sizeof(m), "%d", renderer->written_footnote_ix);
66
68
 
67
69
  cmark_strbuf_puts(html, "<a href=\"#fnref-");
68
70
  houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
69
- cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩</a>");
71
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"");
72
+ cmark_strbuf_puts(html, m);
73
+ cmark_strbuf_puts(html, "\" aria-label=\"Back to reference ");
74
+ cmark_strbuf_puts(html, m);
75
+ cmark_strbuf_puts(html, "\">↩</a>");
70
76
 
71
77
  if (node->footnote.def_count > 1)
72
78
  {
@@ -78,7 +84,15 @@ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *
78
84
  houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
79
85
  cmark_strbuf_puts(html, "-");
80
86
  cmark_strbuf_puts(html, n);
81
- cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩<sup class=\"footnote-ref\">");
87
+ cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"");
88
+ cmark_strbuf_puts(html, m);
89
+ cmark_strbuf_puts(html, "-");
90
+ cmark_strbuf_puts(html, n);
91
+ cmark_strbuf_puts(html, "\" aria-label=\"Back to reference ");
92
+ cmark_strbuf_puts(html, m);
93
+ cmark_strbuf_puts(html, "-");
94
+ cmark_strbuf_puts(html, n);
95
+ cmark_strbuf_puts(html, "\">↩<sup class=\"footnote-ref\">");
82
96
  cmark_strbuf_puts(html, n);
83
97
  cmark_strbuf_puts(html, "</sup></a>");
84
98
  }
@@ -350,10 +364,12 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
350
364
  break;
351
365
 
352
366
  case CMARK_NODE_STRONG:
353
- if (entering) {
354
- cmark_strbuf_puts(html, "<strong>");
355
- } else {
356
- cmark_strbuf_puts(html, "</strong>");
367
+ if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
368
+ if (entering) {
369
+ cmark_strbuf_puts(html, "<strong>");
370
+ } else {
371
+ cmark_strbuf_puts(html, "</strong>");
372
+ }
357
373
  }
358
374
  break;
359
375