markly 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/conduct.md +133 -0
  4. data/ext/markly/arena.c +9 -8
  5. data/ext/markly/autolink.c +217 -134
  6. data/ext/markly/blocks.c +40 -4
  7. data/ext/markly/cmark-gfm-core-extensions.h +11 -11
  8. data/ext/markly/cmark-gfm-extension_api.h +1 -0
  9. data/ext/markly/cmark-gfm.h +18 -2
  10. data/ext/markly/cmark-gfm_version.h +2 -2
  11. data/ext/markly/cmark.c +3 -3
  12. data/ext/markly/commonmark.c +33 -38
  13. data/ext/markly/ext_scanners.c +360 -640
  14. data/ext/markly/extconf.rb +8 -1
  15. data/ext/markly/footnotes.c +23 -0
  16. data/ext/markly/footnotes.h +2 -0
  17. data/ext/markly/html.c +60 -23
  18. data/ext/markly/inlines.c +216 -61
  19. data/ext/markly/latex.c +6 -4
  20. data/ext/markly/man.c +7 -11
  21. data/ext/markly/map.c +11 -4
  22. data/ext/markly/map.h +5 -2
  23. data/ext/markly/markly.c +582 -586
  24. data/ext/markly/markly.h +1 -1
  25. data/ext/markly/node.c +76 -10
  26. data/ext/markly/node.h +49 -1
  27. data/ext/markly/parser.h +1 -0
  28. data/ext/markly/plaintext.c +12 -29
  29. data/ext/markly/references.c +1 -0
  30. data/ext/markly/render.c +15 -7
  31. data/ext/markly/scanners.c +13916 -20242
  32. data/ext/markly/scanners.h +8 -0
  33. data/ext/markly/scanners.re +47 -8
  34. data/ext/markly/strikethrough.c +1 -1
  35. data/ext/markly/table.c +143 -74
  36. data/ext/markly/xml.c +2 -1
  37. data/lib/markly/flags.rb +16 -0
  38. data/lib/markly/node/inspect.rb +59 -53
  39. data/lib/markly/node.rb +125 -58
  40. data/lib/markly/renderer/generic.rb +136 -0
  41. data/lib/markly/renderer/html.rb +301 -0
  42. data/lib/markly/version.rb +7 -1
  43. data/lib/markly.rb +38 -32
  44. data/license.md +39 -0
  45. data/readme.md +36 -0
  46. data.tar.gz.sig +0 -0
  47. metadata +63 -31
  48. metadata.gz.sig +0 -0
  49. data/bin/markly +0 -94
  50. data/lib/markly/markly.so +0 -0
  51. data/lib/markly/renderer/html_renderer.rb +0 -281
  52. data/lib/markly/renderer.rb +0 -133
data/ext/markly/blocks.c CHANGED
@@ -8,6 +8,7 @@
8
8
  #include <stdlib.h>
9
9
  #include <assert.h>
10
10
  #include <stdio.h>
11
+ #include <limits.h>
11
12
 
12
13
  #include "cmark_ctype.h"
13
14
  #include "syntax_extension.h"
@@ -26,6 +27,14 @@
26
27
  #define CODE_INDENT 4
27
28
  #define TAB_STOP 4
28
29
 
30
+ /**
31
+ * Very deeply nested lists can cause quadratic performance issues.
32
+ * This constant is used in open_new_blocks() to limit the nesting
33
+ * depth. It is unlikely that a non-contrived markdown document will
34
+ * be nested this deeply.
35
+ */
36
+ #define MAX_LIST_DEPTH 100
37
+
29
38
  #ifndef MIN
30
39
  #define MIN(x, y) ((x < y) ? x : y)
31
40
  #endif
@@ -468,7 +477,6 @@ static void process_footnotes(cmark_parser *parser) {
468
477
  while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
469
478
  cur = cmark_iter_get_node(iter);
470
479
  if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) {
471
- cmark_node_unlink(cur);
472
480
  cmark_footnote_create(map, cur);
473
481
  }
474
482
  }
@@ -485,6 +493,15 @@ static void process_footnotes(cmark_parser *parser) {
485
493
  if (!footnote->ix)
486
494
  footnote->ix = ++ix;
487
495
 
496
+ // store a reference to this footnote reference's footnote definition
497
+ // this is used by renderers when generating label ids
498
+ cur->parent_footnote_def = footnote->node;
499
+
500
+ // keep track of a) count of how many times this footnote def has been
501
+ // referenced, and b) which reference index this footnote ref is at.
502
+ // this is used by renderers when generating links and backreferences.
503
+ cur->footnote.ref_ix = ++footnote->node->footnote.def_count;
504
+
488
505
  char n[32];
489
506
  snprintf(n, sizeof(n), "%d", footnote->ix);
490
507
  cmark_chunk_free(parser->mem, &cur->as.literal);
@@ -515,13 +532,16 @@ static void process_footnotes(cmark_parser *parser) {
515
532
  qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix);
516
533
  for (unsigned int i = 0; i < map->size; ++i) {
517
534
  cmark_footnote *footnote = (cmark_footnote *)map->sorted[i];
518
- if (!footnote->ix)
535
+ if (!footnote->ix) {
536
+ cmark_node_unlink(footnote->node);
519
537
  continue;
538
+ }
520
539
  cmark_node_append_child(parser->root, footnote->node);
521
540
  footnote->node = NULL;
522
541
  }
523
542
  }
524
543
 
544
+ cmark_unlink_footnotes_map(map);
525
545
  cmark_map_free(map);
526
546
  }
527
547
 
@@ -628,6 +648,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
628
648
  }
629
649
 
630
650
  finalize(parser, parser->root);
651
+
652
+ // Limit total size of extra content created from reference links to
653
+ // document size to avoid superlinear growth. Always allow 100KB.
654
+ if (parser->total_size > 100000)
655
+ parser->refmap->max_ref_size = parser->total_size;
656
+ else
657
+ parser->refmap->max_ref_size = 100000;
658
+
631
659
  process_inlines(parser, parser->refmap, parser->options);
632
660
  if (parser->options & CMARK_OPT_FOOTNOTES)
633
661
  process_footnotes(parser);
@@ -687,6 +715,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
687
715
  const unsigned char *end = buffer + len;
688
716
  static const uint8_t repl[] = {239, 191, 189};
689
717
 
718
+ if (len > UINT_MAX - parser->total_size)
719
+ parser->total_size = UINT_MAX;
720
+ else
721
+ parser->total_size += len;
722
+
690
723
  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
691
724
  // skip NL if last buffer ended with CR ; see #117
692
725
  buffer++;
@@ -1094,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1094
1127
  bool has_content;
1095
1128
  int save_offset;
1096
1129
  int save_column;
1130
+ size_t depth = 0;
1097
1131
 
1098
1132
  while (cont_type != CMARK_NODE_CODE_BLOCK &&
1099
1133
  cont_type != CMARK_NODE_HTML_BLOCK) {
1100
-
1134
+ depth++;
1101
1135
  S_find_first_nonspace(parser, input);
1102
1136
  indented = parser->indent >= CODE_INDENT;
1103
1137
 
@@ -1186,12 +1220,13 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1186
1220
  parser->options & CMARK_OPT_FOOTNOTES &&
1187
1221
  (matched = scan_footnote_definition(input, parser->first_nonspace))) {
1188
1222
  cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
1189
- cmark_chunk_to_cstr(parser->mem, &c);
1190
1223
 
1191
1224
  while (c.data[c.len - 1] != ']')
1192
1225
  --c.len;
1193
1226
  --c.len;
1194
1227
 
1228
+ cmark_chunk_to_cstr(parser->mem, &c);
1229
+
1195
1230
  S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
1196
1231
  *container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
1197
1232
  (*container)->as.literal = c;
@@ -1199,6 +1234,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1199
1234
  (*container)->internal_offset = matched;
1200
1235
  } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
1201
1236
  parser->indent < 4 &&
1237
+ depth < MAX_LIST_DEPTH &&
1202
1238
  (matched = parse_list_marker(
1203
1239
  parser->mem, input, parser->first_nonspace,
1204
1240
  (*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
@@ -6,45 +6,45 @@ extern "C" {
6
6
  #endif
7
7
 
8
8
  #include "cmark-gfm-extension_api.h"
9
- #include "cmark-gfm-extensions_export.h"
10
- #include "config.h" // for bool
9
+ #include "cmark-gfm_export.h"
10
+ #include <stdbool.h>
11
11
  #include <stdint.h>
12
12
 
13
- CMARK_GFM_EXTENSIONS_EXPORT
13
+ CMARK_GFM_EXPORT
14
14
  void cmark_gfm_core_extensions_ensure_registered(void);
15
15
 
16
- CMARK_GFM_EXTENSIONS_EXPORT
16
+ CMARK_GFM_EXPORT
17
17
  uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
18
18
 
19
19
  /** Sets the number of columns for the table, returning 1 on success and 0 on error.
20
20
  */
21
- CMARK_GFM_EXTENSIONS_EXPORT
21
+ CMARK_GFM_EXPORT
22
22
  int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
23
23
 
24
- CMARK_GFM_EXTENSIONS_EXPORT
24
+ CMARK_GFM_EXPORT
25
25
  uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
26
26
 
27
27
  /** Sets the alignments for the table, returning 1 on success and 0 on error.
28
28
  */
29
- CMARK_GFM_EXTENSIONS_EXPORT
29
+ CMARK_GFM_EXPORT
30
30
  int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
31
31
 
32
- CMARK_GFM_EXTENSIONS_EXPORT
32
+ CMARK_GFM_EXPORT
33
33
  int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
34
34
 
35
35
  /** Sets whether the node is a table header row, returning 1 on success and 0 on error.
36
36
  */
37
- CMARK_GFM_EXTENSIONS_EXPORT
37
+ CMARK_GFM_EXPORT
38
38
  int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
39
39
 
40
- CMARK_GFM_EXTENSIONS_EXPORT
40
+ CMARK_GFM_EXPORT
41
41
  bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
42
42
  /* For backwards compatibility */
43
43
  #define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
44
44
 
45
45
  /** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
46
46
  */
47
- CMARK_GFM_EXTENSIONS_EXPORT
47
+ CMARK_GFM_EXPORT
48
48
  int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
49
49
 
50
50
  #ifdef __cplusplus
@@ -114,6 +114,7 @@ typedef struct delimiter {
114
114
  struct delimiter *previous;
115
115
  struct delimiter *next;
116
116
  cmark_node *inl_text;
117
+ bufsize_t position;
117
118
  bufsize_t length;
118
119
  unsigned char delim_char;
119
120
  int can_open;
@@ -111,13 +111,13 @@ typedef struct cmark_mem {
111
111
  * realloc and free.
112
112
  */
113
113
  CMARK_GFM_EXPORT
114
- cmark_mem *cmark_get_default_mem_allocator();
114
+ cmark_mem *cmark_get_default_mem_allocator(void);
115
115
 
116
116
  /** An arena allocator; uses system calloc to allocate large
117
117
  * slabs of memory. Memory in these slabs is not reused at all.
118
118
  */
119
119
  CMARK_GFM_EXPORT
120
- cmark_mem *cmark_get_arena_mem_allocator();
120
+ cmark_mem *cmark_get_arena_mem_allocator(void);
121
121
 
122
122
  /** Resets the arena allocator, quickly returning all used memory
123
123
  * to the operating system.
@@ -225,6 +225,11 @@ CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
225
225
  */
226
226
  CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
227
227
 
228
+ /** Returns the footnote reference of 'node', or NULL if 'node' doesn't have a
229
+ * footnote reference.
230
+ */
231
+ CMARK_GFM_EXPORT cmark_node *cmark_node_parent_footnote_def(cmark_node *node);
232
+
228
233
  /**
229
234
  * ## Iterator
230
235
  *
@@ -408,6 +413,17 @@ CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
408
413
  */
409
414
  CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
410
415
 
416
+ /**
417
+ * Returns item index of 'node'. This is only used when rendering output
418
+ * formats such as commonmark, which need to output the index. It is not
419
+ * required for formats such as html or latex.
420
+ */
421
+ CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node);
422
+
423
+ /** Sets item index of 'node'. Returns 1 on success, 0 on failure.
424
+ */
425
+ CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx);
426
+
411
427
  /** Returns the info string from a fenced code block.
412
428
  */
413
429
  CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
@@ -1,7 +1,7 @@
1
1
  #ifndef CMARK_GFM_VERSION_H
2
2
  #define CMARK_GFM_VERSION_H
3
3
 
4
- #define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 0)
5
- #define CMARK_GFM_VERSION_STRING "0.29.0.gfm.0"
4
+ #define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 2)
5
+ #define CMARK_GFM_VERSION_STRING "0.29.0.gfm.2"
6
6
 
7
7
  #endif
data/ext/markly/cmark.c CHANGED
@@ -10,9 +10,9 @@
10
10
  cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION;
11
11
  cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE;
12
12
 
13
- int cmark_version() { return CMARK_GFM_VERSION; }
13
+ int cmark_version(void) { return CMARK_GFM_VERSION; }
14
14
 
15
- const char *cmark_version_string() { return CMARK_GFM_VERSION_STRING; }
15
+ const char *cmark_version_string(void) { return CMARK_GFM_VERSION_STRING; }
16
16
 
17
17
  static void *xcalloc(size_t nmem, size_t size) {
18
18
  void *ptr = calloc(nmem, size);
@@ -38,7 +38,7 @@ static void xfree(void *ptr) {
38
38
 
39
39
  cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree};
40
40
 
41
- cmark_mem *cmark_get_default_mem_allocator() {
41
+ cmark_mem *cmark_get_default_mem_allocator(void) {
42
42
  return &CMARK_DEFAULT_MEM_ALLOCATOR;
43
43
  }
44
44
 
@@ -153,23 +153,8 @@ static bool is_autolink(cmark_node *node) {
153
153
  link_text->as.literal.len) == 0);
154
154
  }
155
155
 
156
- // if node is a block node, returns node.
157
- // otherwise returns first block-level node that is an ancestor of node.
158
- // if there is no block-level ancestor, returns NULL.
159
- static cmark_node *get_containing_block(cmark_node *node) {
160
- while (node) {
161
- if (CMARK_NODE_BLOCK_P(node)) {
162
- return node;
163
- } else {
164
- node = node->parent;
165
- }
166
- }
167
- return NULL;
168
- }
169
-
170
156
  static int S_render_node(cmark_renderer *renderer, cmark_node *node,
171
157
  cmark_event_type ev_type, int options) {
172
- cmark_node *tmp;
173
158
  int list_number;
174
159
  cmark_delim_type list_delim;
175
160
  int numticks;
@@ -180,7 +165,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
180
165
  char fencechar[2] = {'\0', '\0'};
181
166
  size_t info_len, code_len;
182
167
  char listmarker[LISTMARKER_SIZE];
183
- char *emph_delim;
168
+ const char *emph_delim;
184
169
  bool first_in_list_item;
185
170
  bufsize_t marker_width;
186
171
  bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
@@ -189,14 +174,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
189
174
  // Don't adjust tight list status til we've started the list.
190
175
  // Otherwise we loose the blank line between a paragraph and
191
176
  // a following list.
192
- if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
193
- tmp = get_containing_block(node);
194
- renderer->in_tight_list_item =
195
- tmp && // tmp might be NULL if there is no containing block
196
- ((tmp->type == CMARK_NODE_ITEM &&
197
- cmark_node_get_list_tight(tmp->parent)) ||
198
- (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
199
- cmark_node_get_list_tight(tmp->parent->parent)));
177
+ if (entering) {
178
+ if (node->parent && node->parent->type == CMARK_NODE_ITEM) {
179
+ renderer->in_tight_list_item = node->parent->parent->as.list.tight;
180
+ }
181
+ } else {
182
+ if (node->type == CMARK_NODE_LIST) {
183
+ renderer->in_tight_list_item =
184
+ node->parent &&
185
+ node->parent->type == CMARK_NODE_ITEM &&
186
+ node->parent->parent->as.list.tight;
187
+ }
200
188
  }
201
189
 
202
190
  if (node->extension && node->extension->commonmark_render_func) {
@@ -234,13 +222,8 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
234
222
  if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
235
223
  marker_width = 4;
236
224
  } else {
237
- list_number = cmark_node_get_list_start(node->parent);
225
+ list_number = cmark_node_get_item_index(node);
238
226
  list_delim = cmark_node_get_list_delim(node->parent);
239
- tmp = node;
240
- while (tmp->prev) {
241
- tmp = tmp->prev;
242
- list_number += 1;
243
- }
244
227
  // we ensure a width of at least 4 so
245
228
  // we get nice transition from single digits
246
229
  // to double
@@ -405,10 +388,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
405
388
  break;
406
389
 
407
390
  case CMARK_NODE_STRONG:
408
- if (entering) {
409
- LIT("**");
410
- } else {
411
- LIT("**");
391
+ if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
392
+ if (entering) {
393
+ LIT("**");
394
+ } else {
395
+ LIT("**");
396
+ }
412
397
  }
413
398
  break;
414
399
 
@@ -477,7 +462,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
477
462
  case CMARK_NODE_FOOTNOTE_REFERENCE:
478
463
  if (entering) {
479
464
  LIT("[^");
480
- OUT(cmark_chunk_to_cstr(renderer->mem, &node->as.literal), false, LITERAL);
465
+
466
+ char *footnote_label = renderer->mem->calloc(node->parent_footnote_def->as.literal.len + 1, sizeof(char));
467
+ memmove(footnote_label, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
468
+
469
+ OUT(footnote_label, false, LITERAL);
470
+ renderer->mem->free(footnote_label);
471
+
481
472
  LIT("]");
482
473
  }
483
474
  break;
@@ -486,9 +477,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
486
477
  if (entering) {
487
478
  renderer->footnote_ix += 1;
488
479
  LIT("[^");
489
- char n[32];
490
- snprintf(n, sizeof(n), "%d", renderer->footnote_ix);
491
- OUT(n, false, LITERAL);
480
+
481
+ char *footnote_label = renderer->mem->calloc(node->as.literal.len + 1, sizeof(char));
482
+ memmove(footnote_label, node->as.literal.data, node->as.literal.len);
483
+
484
+ OUT(footnote_label, false, LITERAL);
485
+ renderer->mem->free(footnote_label);
486
+
492
487
  LIT("]:\n");
493
488
 
494
489
  cmark_strbuf_puts(renderer->prefix, " ");