redcarpet 2.0.0b3 → 2.0.0b4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of redcarpet might be problematic. Click here for more details.

@@ -18,49 +18,58 @@
18
18
  */
19
19
 
20
20
  #include "markdown.h"
21
- #include "array.h"
21
+ #include "stack.h"
22
22
 
23
23
  #include <assert.h>
24
24
  #include <string.h>
25
- //#include <strings.h> /* for strncasecmp */
26
25
  #include <ctype.h>
27
26
  #include <stdio.h>
28
27
 
28
+ #define REF_TABLE_SIZE 8
29
+
29
30
  #define BUFFER_BLOCK 0
30
31
  #define BUFFER_SPAN 1
31
32
 
32
33
  #define MKD_LI_END 8 /* internal list flag */
33
34
 
35
+ #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
36
+ #define GPERF_DOWNCASE 1
37
+ #define GPERF_CASE_STRNCMP 1
38
+ #include "html_blocks.h"
39
+
34
40
  /***************
35
41
  * LOCAL TYPES *
36
42
  ***************/
37
43
 
38
- /* link_ref reference to a link */
44
+ /* link_ref: reference to a link */
39
45
  struct link_ref {
40
- struct buf *id;
46
+ unsigned int id;
47
+
41
48
  struct buf *link;
42
49
  struct buf *title;
50
+
51
+ struct link_ref *next;
43
52
  };
44
53
 
45
- /* char_trigger function pointer to render active chars */
54
+ /* char_trigger: function pointer to render active chars */
46
55
  /* returns the number of chars taken care of */
47
56
  /* data is the pointer of the beginning of the span */
48
57
  /* offset is the number of valid chars before data */
49
- struct render;
58
+ struct sd_markdown;
50
59
  typedef size_t
51
- (*char_trigger)(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
52
-
53
- static size_t char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
54
- static size_t char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
55
- static size_t char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
56
- static size_t char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
57
- static size_t char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
58
- static size_t char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
59
- static size_t char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
60
- static size_t char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
61
- static size_t char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
62
- static size_t char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
63
- static size_t char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
60
+ (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
61
+
62
+ static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
63
+ static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
64
+ static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
65
+ static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
66
+ static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
67
+ static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
68
+ static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
69
+ static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
70
+ static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
71
+ static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
72
+ static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
64
73
 
65
74
  enum markdown_char_t {
66
75
  MD_CHAR_NONE = 0,
@@ -93,84 +102,46 @@ static char_trigger markdown_char_ptrs[] = {
93
102
  };
94
103
 
95
104
  /* render • structure containing one particular render */
96
- struct render {
105
+ struct sd_markdown {
97
106
  struct sd_callbacks cb;
98
107
  void *opaque;
99
108
 
100
- struct array refs;
101
- char active_char[256];
102
- struct parray work_bufs[2];
109
+ struct link_ref *refs[REF_TABLE_SIZE];
110
+ uint8_t active_char[256];
111
+ struct stack work_bufs[2];
103
112
  unsigned int ext_flags;
104
113
  size_t max_nesting;
105
114
  };
106
115
 
107
- /* html_tag • structure for quick HTML tag search (inspired from discount) */
108
- struct html_tag {
109
- const char *text;
110
- size_t size;
111
- };
116
+ /***************************
117
+ * HELPER FUNCTIONS *
118
+ ***************************/
112
119
 
113
120
  static inline struct buf *
114
- rndr_newbuf(struct render *rndr, int type)
121
+ rndr_newbuf(struct sd_markdown *rndr, int type)
115
122
  {
116
123
  static const size_t buf_size[2] = {256, 64};
117
124
  struct buf *work = NULL;
118
- struct parray *queue = &rndr->work_bufs[type];
125
+ struct stack *pool = &rndr->work_bufs[type];
119
126
 
120
- if (queue->size < queue->asize) {
121
- work = queue->item[queue->size++];
127
+ if (pool->size < pool->asize &&
128
+ pool->item[pool->size] != NULL) {
129
+ work = pool->item[pool->size++];
122
130
  work->size = 0;
123
131
  } else {
124
132
  work = bufnew(buf_size[type]);
125
- parr_push(queue, work);
133
+ stack_push(pool, work);
126
134
  }
127
135
 
128
136
  return work;
129
137
  }
130
138
 
131
139
  static inline void
132
- rndr_popbuf(struct render *rndr, int type)
140
+ rndr_popbuf(struct sd_markdown *rndr, int type)
133
141
  {
134
142
  rndr->work_bufs[type].size--;
135
143
  }
136
144
 
137
- /********************
138
- * GLOBAL VARIABLES *
139
- ********************/
140
-
141
- /* block_tags • recognised block tags, sorted by cmp_html_tag */
142
- static struct html_tag block_tags[] = {
143
- /*0*/ { "p", 1 },
144
- { "dl", 2 },
145
- { "h1", 2 },
146
- { "h2", 2 },
147
- { "h3", 2 },
148
- { "h4", 2 },
149
- { "h5", 2 },
150
- { "h6", 2 },
151
- { "ol", 2 },
152
- { "ul", 2 },
153
- { "del", 3 }, /* 10 */
154
- { "div", 3 },
155
- { "ins", 3 }, /* 12 */
156
- { "pre", 3 },
157
- { "form", 4 },
158
- { "math", 4 },
159
- { "table", 5 },
160
- { "figure", 6 },
161
- { "iframe", 6 },
162
- { "script", 6 },
163
- { "fieldset", 8 },
164
- { "noscript", 8 },
165
- { "blockquote", 10 }
166
- };
167
-
168
- #define INS_TAG (block_tags + 12)
169
- #define DEL_TAG (block_tags + 10)
170
-
171
- /***************************
172
- * HELPER FUNCTIONS *
173
- ***************************/
174
145
  static void
175
146
  unscape_text(struct buf *ob, struct buf *src)
176
147
  {
@@ -191,54 +162,87 @@ unscape_text(struct buf *ob, struct buf *src)
191
162
  }
192
163
  }
193
164
 
194
- /* cmp_link_ref • comparison function for link_ref sorted arrays */
195
- static int
196
- cmp_link_ref(void *key, void *array_entry)
165
+ static unsigned int
166
+ hash_link_ref(const uint8_t *link_ref, size_t length)
197
167
  {
198
- struct link_ref *lr = array_entry;
199
- return bufcasecmp(key, lr->id);
168
+ size_t i;
169
+ unsigned int hash = 0;
170
+
171
+ for (i = 0; i < length; ++i)
172
+ hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
173
+
174
+ return hash;
200
175
  }
201
176
 
202
- /* cmp_link_ref_sort • comparison function for link_ref qsort */
203
- static int
204
- cmp_link_ref_sort(const void *a, const void *b)
177
+ static struct link_ref *
178
+ add_link_ref(
179
+ struct link_ref **references,
180
+ const uint8_t *name, size_t name_size)
205
181
  {
206
- const struct link_ref *lra = a;
207
- const struct link_ref *lrb = b;
208
- return bufcasecmp(lra->id, lrb->id);
182
+ struct link_ref *ref = calloc(1, sizeof(struct link_ref));
183
+
184
+ if (!ref)
185
+ return NULL;
186
+
187
+ ref->id = hash_link_ref(name, name_size);
188
+ ref->next = references[ref->id % REF_TABLE_SIZE];
189
+
190
+ references[ref->id % REF_TABLE_SIZE] = ref;
191
+ return ref;
209
192
  }
210
193
 
211
- /* cmp_html_tag comparison function for bsearch() (stolen from discount) */
212
- static int
213
- cmp_html_tag(const void *a, const void *b)
194
+ static struct link_ref *
195
+ find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
214
196
  {
215
- const struct html_tag *hta = a;
216
- const struct html_tag *htb = b;
217
- if (hta->size != htb->size) return (int)(hta->size - htb->size);
218
- return strncasecmp(hta->text, htb->text, hta->size);
219
- }
197
+ unsigned int hash = hash_link_ref(name, length);
198
+ struct link_ref *ref = NULL;
220
199
 
200
+ ref = references[hash % REF_TABLE_SIZE];
201
+
202
+ while (ref != NULL) {
203
+ if (ref->id == hash)
204
+ return ref;
205
+
206
+ ref = ref->next;
207
+ }
221
208
 
222
- /* find_block_tag • returns the current block tag */
223
- static struct html_tag *
224
- find_block_tag(char *data, size_t size)
209
+ return NULL;
210
+ }
211
+
212
+ static void
213
+ free_link_refs(struct link_ref **references)
225
214
  {
226
- size_t i = 0;
227
- struct html_tag key;
215
+ size_t i;
228
216
 
229
- /* looking for the word end */
230
- while (i < size && ((data[i] >= '0' && data[i] <= '9')
231
- || (data[i] >= 'A' && data[i] <= 'Z')
232
- || (data[i] >= 'a' && data[i] <= 'z')))
233
- i++;
234
- if (i >= size) return 0;
217
+ for (i = 0; i < REF_TABLE_SIZE; ++i) {
218
+ struct link_ref *r = references[i];
219
+ struct link_ref *next;
235
220
 
236
- /* binary search of the tag */
237
- key.text = data;
238
- key.size = i;
239
- return bsearch(&key, block_tags,
240
- sizeof block_tags / sizeof block_tags[0],
241
- sizeof block_tags[0], cmp_html_tag);
221
+ while (r) {
222
+ next = r->next;
223
+ bufrelease(r->link);
224
+ bufrelease(r->title);
225
+ free(r);
226
+ r = next;
227
+ }
228
+ }
229
+ }
230
+
231
+ /*
232
+ * Check whether a char is a Markdown space.
233
+
234
+ * Right now we only consider spaces the actual
235
+ * space and a newline: tabs and carriage returns
236
+ * are filtered out during the preprocessing phase.
237
+ *
238
+ * If we wanted to actually be UTF-8 compliant, we
239
+ * should instead extract an Unicode codepoint from
240
+ * this character and check for space properties.
241
+ */
242
+ static inline int
243
+ _isspace(int c)
244
+ {
245
+ return c == ' ' || c == '\n';
242
246
  }
243
247
 
244
248
  /****************************
@@ -248,7 +252,7 @@ find_block_tag(char *data, size_t size)
248
252
  /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
249
253
  /* this is less strict than the original markdown e-mail address matching */
250
254
  static size_t
251
- is_mail_autolink(char *data, size_t size)
255
+ is_mail_autolink(uint8_t *data, size_t size)
252
256
  {
253
257
  size_t i = 0, nb = 0;
254
258
 
@@ -279,7 +283,7 @@ is_mail_autolink(char *data, size_t size)
279
283
 
280
284
  /* tag_length • returns the length of the given tag, or 0 is it's not valid */
281
285
  static size_t
282
- tag_length(char *data, size_t size, enum mkd_autolink *autolink)
286
+ tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
283
287
  {
284
288
  size_t i, j;
285
289
 
@@ -322,7 +326,8 @@ tag_length(char *data, size_t size, enum mkd_autolink *autolink)
322
326
  while (i < size) {
323
327
  if (data[i] == '\\') i += 2;
324
328
  else if (data[i] == '>' || data[i] == '\'' ||
325
- data[i] == '"' || isspace(data[i])) break;
329
+ data[i] == '"' || data[i] == ' ' || data[i] == '\n')
330
+ break;
326
331
  else i++;
327
332
  }
328
333
 
@@ -340,19 +345,19 @@ tag_length(char *data, size_t size, enum mkd_autolink *autolink)
340
345
 
341
346
  /* parse_inline • parses inline markdown elements */
342
347
  static void
343
- parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size)
348
+ parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
344
349
  {
345
350
  size_t i = 0, end = 0;
346
- char action = 0;
347
- struct buf work = { 0, 0, 0, 0, 0 };
351
+ uint8_t action = 0;
352
+ struct buf work = { 0, 0, 0, 0 };
348
353
 
349
354
  if (rndr->work_bufs[BUFFER_SPAN].size +
350
- rndr->work_bufs[BUFFER_BLOCK].size > (int)rndr->max_nesting)
355
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
351
356
  return;
352
357
 
353
358
  while (i < size) {
354
359
  /* copying inactive chars into the output */
355
- while (end < size && (action = rndr->active_char[(unsigned char)data[end]]) == 0) {
360
+ while (end < size && (action = rndr->active_char[data[end]]) == 0) {
356
361
  end++;
357
362
  }
358
363
 
@@ -371,16 +376,16 @@ parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size)
371
376
  end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
372
377
  if (!end) /* no action from the callback */
373
378
  end = i + 1;
374
- else {
379
+ else {
375
380
  i += end;
376
381
  end = i;
377
- }
382
+ }
378
383
  }
379
384
  }
380
385
 
381
- /* find_emph_char • looks for the next emph char, skipping other constructs */
386
+ /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
382
387
  static size_t
383
- find_emph_char(char *data, size_t size, char c)
388
+ find_emph_char(uint8_t *data, size_t size, uint8_t c)
384
389
  {
385
390
  size_t i = 1;
386
391
 
@@ -399,25 +404,33 @@ find_emph_char(char *data, size_t size, char c)
399
404
  i++; continue;
400
405
  }
401
406
 
402
- /* skipping a code span */
403
407
  if (data[i] == '`') {
408
+ size_t span_nb = 0, bt;
404
409
  size_t tmp_i = 0;
405
410
 
406
- i++;
407
- while (i < size && data[i] != '`') {
408
- if (!tmp_i && data[i] == c) tmp_i = i;
409
- i++;
411
+ /* counting the number of opening backticks */
412
+ while (i < size && data[i] == '`') {
413
+ i++; span_nb++;
410
414
  }
411
415
 
412
- if (i >= size)
413
- return tmp_i;
416
+ if (i >= size) return 0;
414
417
 
415
- i++;
418
+ /* finding the matching closing sequence */
419
+ bt = 0;
420
+ while (i < size && bt < span_nb) {
421
+ if (!tmp_i && data[i] == c) tmp_i = i;
422
+ if (data[i] == '`') bt++;
423
+ else bt = 0;
424
+ i++;
425
+ }
426
+
427
+ if (i >= size) return tmp_i;
428
+ i++;
416
429
  }
417
430
  /* skipping a link */
418
431
  else if (data[i] == '[') {
419
432
  size_t tmp_i = 0;
420
- char cc;
433
+ uint8_t cc;
421
434
 
422
435
  i++;
423
436
  while (i < size && data[i] != ']') {
@@ -426,18 +439,26 @@ find_emph_char(char *data, size_t size, char c)
426
439
  }
427
440
 
428
441
  i++;
429
- while (i < size && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
442
+ while (i < size && (data[i] == ' ' || data[i] == '\n'))
430
443
  i++;
431
444
 
432
445
  if (i >= size)
433
446
  return tmp_i;
434
447
 
435
- if (data[i] != '[' && data[i] != '(') { /* not a link*/
436
- if (tmp_i) return tmp_i;
437
- else continue;
448
+ switch (data[i]) {
449
+ case '[':
450
+ cc = ']'; break;
451
+
452
+ case '(':
453
+ cc = ')'; break;
454
+
455
+ default:
456
+ if (tmp_i)
457
+ return tmp_i;
458
+ else
459
+ continue;
438
460
  }
439
461
 
440
- cc = data[i];
441
462
  i++;
442
463
  while (i < size && data[i] != cc) {
443
464
  if (!tmp_i && data[i] == c) tmp_i = i;
@@ -457,7 +478,7 @@ find_emph_char(char *data, size_t size, char c)
457
478
  /* parse_emph1 • parsing single emphase */
458
479
  /* closed by a symbol not preceded by whitespace and not followed by symbol */
459
480
  static size_t
460
- parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
481
+ parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
461
482
  {
462
483
  size_t i = 0, len;
463
484
  struct buf *work = 0;
@@ -474,15 +495,10 @@ parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c
474
495
  i += len;
475
496
  if (i >= size) return 0;
476
497
 
477
- if (i + 1 < size && data[i + 1] == c) {
478
- i++;
479
- continue;
480
- }
481
-
482
- if (data[i] == c && !isspace(data[i - 1])) {
498
+ if (data[i] == c && !_isspace(data[i - 1])) {
483
499
 
484
500
  if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
485
- if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1])))
501
+ if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
486
502
  continue;
487
503
  }
488
504
 
@@ -499,9 +515,9 @@ parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c
499
515
 
500
516
  /* parse_emph2 • parsing single emphase */
501
517
  static size_t
502
- parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
518
+ parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
503
519
  {
504
- int (*render_method)(struct buf *ob, struct buf *text, void *opaque);
520
+ int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
505
521
  size_t i = 0, len;
506
522
  struct buf *work = 0;
507
523
  int r;
@@ -510,13 +526,13 @@ parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c
510
526
 
511
527
  if (!render_method)
512
528
  return 0;
513
-
529
+
514
530
  while (i < size) {
515
531
  len = find_emph_char(data + i, size - i, c);
516
532
  if (!len) return 0;
517
533
  i += len;
518
534
 
519
- if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !isspace(data[i - 1])) {
535
+ if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
520
536
  work = rndr_newbuf(rndr, BUFFER_SPAN);
521
537
  parse_inline(work, rndr, data, i);
522
538
  r = render_method(ob, work, rndr->opaque);
@@ -531,7 +547,7 @@ parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c
531
547
  /* parse_emph3 • parsing single emphase */
532
548
  /* finds the first closing tag, and delegates to the other emph */
533
549
  static size_t
534
- parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c)
550
+ parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
535
551
  {
536
552
  size_t i = 0, len;
537
553
  int r;
@@ -542,7 +558,7 @@ parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c
542
558
  i += len;
543
559
 
544
560
  /* skip whitespace preceded symbols */
545
- if (data[i] != c || isspace(data[i - 1]))
561
+ if (data[i] != c || _isspace(data[i - 1]))
546
562
  continue;
547
563
 
548
564
  if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
@@ -567,46 +583,46 @@ parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c
567
583
  else return len - 1;
568
584
  }
569
585
  }
570
- return 0;
586
+ return 0;
571
587
  }
572
588
 
573
589
  /* char_emphasis • single and double emphasis parsing */
574
590
  static size_t
575
- char_emphasis(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
591
+ char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
576
592
  {
577
- char c = data[0];
593
+ uint8_t c = data[0];
578
594
  size_t ret;
579
595
 
580
596
  if (size > 2 && data[1] != c) {
581
597
  /* whitespace cannot follow an opening emphasis;
582
598
  * strikethrough only takes two characters '~~' */
583
- if (c == '~' || isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
599
+ if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
584
600
  return 0;
585
601
 
586
602
  return ret + 1;
587
603
  }
588
604
 
589
605
  if (size > 3 && data[1] == c && data[2] != c) {
590
- if (isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
606
+ if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
591
607
  return 0;
592
608
 
593
609
  return ret + 2;
594
610
  }
595
611
 
596
612
  if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
597
- if (c == '~' || isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
613
+ if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
598
614
  return 0;
599
615
 
600
616
  return ret + 3;
601
617
  }
602
618
 
603
- return 0;
619
+ return 0;
604
620
  }
605
621
 
606
622
 
607
623
  /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
608
624
  static size_t
609
- char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
625
+ char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
610
626
  {
611
627
  if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
612
628
  return 0;
@@ -621,7 +637,7 @@ char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, s
621
637
 
622
638
  /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
623
639
  static size_t
624
- char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
640
+ char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
625
641
  {
626
642
  size_t end, nb = 0, i, f_begin, f_end;
627
643
 
@@ -641,16 +657,16 @@ char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, si
641
657
 
642
658
  /* trimming outside whitespaces */
643
659
  f_begin = nb;
644
- while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
660
+ while (f_begin < end && data[f_begin] == ' ')
645
661
  f_begin++;
646
662
 
647
663
  f_end = end - nb;
648
- while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
664
+ while (f_end > nb && data[f_end-1] == ' ')
649
665
  f_end--;
650
666
 
651
667
  /* real code span */
652
668
  if (f_begin < f_end) {
653
- struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
669
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
654
670
  if (!rndr->cb.codespan(ob, &work, rndr->opaque))
655
671
  end = 0;
656
672
  } else {
@@ -664,10 +680,10 @@ char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, si
664
680
 
665
681
  /* char_escape • '\\' backslash escape */
666
682
  static size_t
667
- char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
683
+ char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
668
684
  {
669
685
  static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>";
670
- struct buf work = { 0, 0, 0, 0, 0 };
686
+ struct buf work = { 0, 0, 0, 0 };
671
687
 
672
688
  if (size > 1) {
673
689
  if (strchr(escape_chars, data[1]) == NULL)
@@ -687,10 +703,10 @@ char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size
687
703
  /* char_entity • '&' escaped when it doesn't belong to an entity */
688
704
  /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
689
705
  static size_t
690
- char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
706
+ char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
691
707
  {
692
708
  size_t end = 1;
693
- struct buf work;
709
+ struct buf work = { 0, 0, 0, 0 };
694
710
 
695
711
  if (end < size && data[end] == '#')
696
712
  end++;
@@ -715,11 +731,11 @@ char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size
715
731
 
716
732
  /* char_langle_tag • '<' when tags or autolinks are allowed */
717
733
  static size_t
718
- char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
734
+ char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
719
735
  {
720
736
  enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
721
737
  size_t end = tag_length(data, size, &altype);
722
- struct buf work = { data, end, 0, 0, 0 };
738
+ struct buf work = { data, end, 0, 0 };
723
739
  int ret = 0;
724
740
 
725
741
  if (end > 2) {
@@ -740,7 +756,7 @@ char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset,
740
756
  }
741
757
 
742
758
  static size_t
743
- char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
759
+ char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
744
760
  {
745
761
  struct buf *link, *link_url;
746
762
  size_t link_len, rewind;
@@ -765,7 +781,7 @@ char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset
765
781
  }
766
782
 
767
783
  static size_t
768
- char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
784
+ char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
769
785
  {
770
786
  struct buf *link;
771
787
  size_t link_len, rewind;
@@ -785,7 +801,7 @@ char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offs
785
801
  }
786
802
 
787
803
  static size_t
788
- char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
804
+ char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
789
805
  {
790
806
  struct buf *link;
791
807
  size_t link_len, rewind;
@@ -806,7 +822,7 @@ char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset
806
822
 
807
823
  /* char_link • '[': parsing a link or an image */
808
824
  static size_t
809
- char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
825
+ char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
810
826
  {
811
827
  int is_img = (offset && data[-1] == '!'), level;
812
828
  size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
@@ -847,7 +863,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
847
863
 
848
864
  /* skip any amount of whitespace or newline */
849
865
  /* (this is much more laxist than original markdown syntax) */
850
- while (i < size && isspace(data[i]))
866
+ while (i < size && _isspace(data[i]))
851
867
  i++;
852
868
 
853
869
  /* inline style link */
@@ -855,7 +871,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
855
871
  /* skipping initial whitespace */
856
872
  i++;
857
873
 
858
- while (i < size && isspace(data[i]))
874
+ while (i < size && _isspace(data[i]))
859
875
  i++;
860
876
 
861
877
  link_b = i;
@@ -885,7 +901,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
885
901
 
886
902
  /* skipping whitespaces after title */
887
903
  title_e = i - 1;
888
- while (title_e > title_b && isspace(data[title_e]))
904
+ while (title_e > title_b && _isspace(data[title_e]))
889
905
  title_e--;
890
906
 
891
907
  /* checking for closing quote presence */
@@ -896,7 +912,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
896
912
  }
897
913
 
898
914
  /* remove whitespace at the end of the link */
899
- while (link_e > link_b && isspace(data[link_e - 1]))
915
+ while (link_e > link_b && _isspace(data[link_e - 1]))
900
916
  link_e--;
901
917
 
902
918
  /* remove optional angle brackets around the link */
@@ -919,7 +935,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
919
935
 
920
936
  /* reference style link */
921
937
  else if (i < size && data[i] == '[') {
922
- struct buf id = { 0, 0, 0, 0, 0 };
938
+ struct buf id = { 0, 0, 0, 0 };
923
939
  struct link_ref *lr;
924
940
 
925
941
  /* looking for the id */
@@ -953,8 +969,9 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
953
969
  id.size = link_e - link_b;
954
970
  }
955
971
 
956
- lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
957
- if (!lr) goto cleanup;
972
+ lr = find_link_ref(rndr->refs, id.data, id.size);
973
+ if (!lr)
974
+ goto cleanup;
958
975
 
959
976
  /* keeping link and title from link_ref */
960
977
  link = lr->link;
@@ -964,7 +981,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
964
981
 
965
982
  /* shortcut reference style link */
966
983
  else {
967
- struct buf id = { 0, 0, 0, 0, 0 };
984
+ struct buf id = { 0, 0, 0, 0 };
968
985
  struct link_ref *lr;
969
986
 
970
987
  /* crafting the id */
@@ -987,8 +1004,9 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
987
1004
  }
988
1005
 
989
1006
  /* finding the link_ref */
990
- lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
991
- if (!lr) goto cleanup;
1007
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1008
+ if (!lr)
1009
+ goto cleanup;
992
1010
 
993
1011
  /* keeping link and title from link_ref */
994
1012
  link = lr->link;
@@ -1027,7 +1045,7 @@ cleanup:
1027
1045
  }
1028
1046
 
1029
1047
  static size_t
1030
- char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
1048
+ char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1031
1049
  {
1032
1050
  size_t sup_start, sup_len;
1033
1051
  struct buf *sup;
@@ -1049,7 +1067,7 @@ char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset,
1049
1067
  } else {
1050
1068
  sup_start = sup_len = 1;
1051
1069
 
1052
- while (sup_len < size && !isspace(data[sup_len]))
1070
+ while (sup_len < size && !_isspace(data[sup_len]))
1053
1071
  sup_len++;
1054
1072
  }
1055
1073
 
@@ -1070,20 +1088,23 @@ char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset,
1070
1088
 
1071
1089
  /* is_empty • returns the line length when it is empty, 0 otherwise */
1072
1090
  static size_t
1073
- is_empty(char *data, size_t size)
1091
+ is_empty(uint8_t *data, size_t size)
1074
1092
  {
1075
1093
  size_t i;
1094
+
1076
1095
  for (i = 0; i < size && data[i] != '\n'; i++)
1077
- if (data[i] != ' ' && data[i] != '\t') return 0;
1096
+ if (data[i] != ' ')
1097
+ return 0;
1098
+
1078
1099
  return i + 1;
1079
1100
  }
1080
1101
 
1081
1102
  /* is_hrule • returns whether a line is a horizontal rule */
1082
1103
  static int
1083
- is_hrule(char *data, size_t size)
1104
+ is_hrule(uint8_t *data, size_t size)
1084
1105
  {
1085
1106
  size_t i = 0, n = 0;
1086
- char c;
1107
+ uint8_t c;
1087
1108
 
1088
1109
  /* skipping initial spaces */
1089
1110
  if (size < 3) return 0;
@@ -1091,7 +1112,7 @@ is_hrule(char *data, size_t size)
1091
1112
  if (data[1] == ' ') { i++;
1092
1113
  if (data[2] == ' ') { i++; } } }
1093
1114
 
1094
- /* looking at the hrule char */
1115
+ /* looking at the hrule uint8_t */
1095
1116
  if (i + 2 >= size
1096
1117
  || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1097
1118
  return 0;
@@ -1100,19 +1121,21 @@ is_hrule(char *data, size_t size)
1100
1121
  /* the whole line must be the char or whitespace */
1101
1122
  while (i < size && data[i] != '\n') {
1102
1123
  if (data[i] == c) n++;
1103
- else if (data[i] != ' ' && data[i] != '\t')
1124
+ else if (data[i] != ' ')
1104
1125
  return 0;
1105
- i++; }
1126
+
1127
+ i++;
1128
+ }
1106
1129
 
1107
1130
  return n >= 3;
1108
1131
  }
1109
1132
 
1110
1133
  /* check if a line is a code fence; return its size if it is */
1111
1134
  static size_t
1112
- is_codefence(char *data, size_t size, struct buf *syntax)
1135
+ is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1113
1136
  {
1114
1137
  size_t i = 0, n = 0;
1115
- char c;
1138
+ uint8_t c;
1116
1139
 
1117
1140
  /* skipping initial spaces */
1118
1141
  if (size < 3) return 0;
@@ -1120,13 +1143,13 @@ is_codefence(char *data, size_t size, struct buf *syntax)
1120
1143
  if (data[1] == ' ') { i++;
1121
1144
  if (data[2] == ' ') { i++; } } }
1122
1145
 
1123
- /* looking at the hrule char */
1146
+ /* looking at the hrule uint8_t */
1124
1147
  if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1125
1148
  return 0;
1126
1149
 
1127
1150
  c = data[i];
1128
1151
 
1129
- /* the whole line must be the char or whitespace */
1152
+ /* the whole line must be the uint8_t or whitespace */
1130
1153
  while (i < size && data[i] == c) {
1131
1154
  n++; i++;
1132
1155
  }
@@ -1137,7 +1160,7 @@ is_codefence(char *data, size_t size, struct buf *syntax)
1137
1160
  if (syntax != NULL) {
1138
1161
  size_t syn = 0;
1139
1162
 
1140
- while (i < size && (data[i] == ' ' || data[i] == '\t'))
1163
+ while (i < size && data[i] == ' ')
1141
1164
  i++;
1142
1165
 
1143
1166
  syntax->data = data + i;
@@ -1154,16 +1177,16 @@ is_codefence(char *data, size_t size, struct buf *syntax)
1154
1177
 
1155
1178
  /* strip all whitespace at the beginning and the end
1156
1179
  * of the {} block */
1157
- while (syn > 0 && isspace(syntax->data[0])) {
1180
+ while (syn > 0 && _isspace(syntax->data[0])) {
1158
1181
  syntax->data++; syn--;
1159
1182
  }
1160
1183
 
1161
- while (syn > 0 && isspace(syntax->data[syn - 1]))
1184
+ while (syn > 0 && _isspace(syntax->data[syn - 1]))
1162
1185
  syn--;
1163
1186
 
1164
1187
  i++;
1165
1188
  } else {
1166
- while (i < size && !isspace(data[i])) {
1189
+ while (i < size && !_isspace(data[i])) {
1167
1190
  syn++; i++;
1168
1191
  }
1169
1192
  }
@@ -1172,7 +1195,7 @@ is_codefence(char *data, size_t size, struct buf *syntax)
1172
1195
  }
1173
1196
 
1174
1197
  while (i < size && data[i] != '\n') {
1175
- if (!isspace(data[i]))
1198
+ if (!_isspace(data[i]))
1176
1199
  return 0;
1177
1200
 
1178
1201
  i++;
@@ -1183,7 +1206,7 @@ is_codefence(char *data, size_t size, struct buf *syntax)
1183
1206
 
1184
1207
  /* is_atxheader • returns whether the line is a hash-prefixed header */
1185
1208
  static int
1186
- is_atxheader(struct render *rndr, char *data, size_t size)
1209
+ is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1187
1210
  {
1188
1211
  if (data[0] != '#')
1189
1212
  return 0;
@@ -1194,7 +1217,7 @@ is_atxheader(struct render *rndr, char *data, size_t size)
1194
1217
  while (level < size && level < 6 && data[level] == '#')
1195
1218
  level++;
1196
1219
 
1197
- if (level < size && data[level] != ' ' && data[level] != '\t')
1220
+ if (level < size && data[level] != ' ')
1198
1221
  return 0;
1199
1222
  }
1200
1223
 
@@ -1203,92 +1226,126 @@ is_atxheader(struct render *rndr, char *data, size_t size)
1203
1226
 
1204
1227
  /* is_headerline • returns whether the line is a setext-style hdr underline */
1205
1228
  static int
1206
- is_headerline(char *data, size_t size)
1229
+ is_headerline(uint8_t *data, size_t size)
1207
1230
  {
1208
1231
  size_t i = 0;
1209
1232
 
1210
1233
  /* test of level 1 header */
1211
1234
  if (data[i] == '=') {
1212
1235
  for (i = 1; i < size && data[i] == '='; i++);
1213
- while (i < size && (data[i] == ' ' || data[i] == '\t')) i++;
1236
+ while (i < size && data[i] == ' ') i++;
1214
1237
  return (i >= size || data[i] == '\n') ? 1 : 0; }
1215
1238
 
1216
1239
  /* test of level 2 header */
1217
1240
  if (data[i] == '-') {
1218
1241
  for (i = 1; i < size && data[i] == '-'; i++);
1219
- while (i < size && (data[i] == ' ' || data[i] == '\t')) i++;
1242
+ while (i < size && data[i] == ' ') i++;
1220
1243
  return (i >= size || data[i] == '\n') ? 2 : 0; }
1221
1244
 
1222
1245
  return 0;
1223
1246
  }
1224
1247
 
1248
+ static int
1249
+ is_next_headerline(uint8_t *data, size_t size)
1250
+ {
1251
+ size_t i = 0;
1252
+
1253
+ while (i < size && data[i] != '\n')
1254
+ i++;
1255
+
1256
+ if (++i >= size)
1257
+ return 0;
1258
+
1259
+ return is_headerline(data + i, size - i);
1260
+ }
1261
+
1225
1262
  /* prefix_quote • returns blockquote prefix length */
1226
1263
  static size_t
1227
- prefix_quote(char *data, size_t size)
1264
+ prefix_quote(uint8_t *data, size_t size)
1228
1265
  {
1229
1266
  size_t i = 0;
1230
1267
  if (i < size && data[i] == ' ') i++;
1231
1268
  if (i < size && data[i] == ' ') i++;
1232
1269
  if (i < size && data[i] == ' ') i++;
1270
+
1233
1271
  if (i < size && data[i] == '>') {
1234
- if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
1272
+ if (i + 1 < size && data[i + 1] == ' ')
1235
1273
  return i + 2;
1236
- else return i + 1; }
1237
- else return 0;
1274
+
1275
+ return i + 1;
1276
+ }
1277
+
1278
+ return 0;
1238
1279
  }
1239
1280
 
1240
1281
  /* prefix_code • returns prefix length for block code*/
1241
1282
  static size_t
1242
- prefix_code(char *data, size_t size)
1283
+ prefix_code(uint8_t *data, size_t size)
1243
1284
  {
1244
- if (size > 0 && data[0] == '\t') return 1;
1245
1285
  if (size > 3 && data[0] == ' ' && data[1] == ' '
1246
- && data[2] == ' ' && data[3] == ' ') return 4;
1286
+ && data[2] == ' ' && data[3] == ' ') return 4;
1287
+
1247
1288
  return 0;
1248
1289
  }
1249
1290
 
1250
1291
  /* prefix_oli • returns ordered list item prefix */
1251
1292
  static size_t
1252
- prefix_oli(char *data, size_t size)
1293
+ prefix_oli(uint8_t *data, size_t size)
1253
1294
  {
1254
1295
  size_t i = 0;
1296
+
1255
1297
  if (i < size && data[i] == ' ') i++;
1256
1298
  if (i < size && data[i] == ' ') i++;
1257
1299
  if (i < size && data[i] == ' ') i++;
1258
- if (i >= size || data[i] < '0' || data[i] > '9') return 0;
1259
- while (i < size && data[i] >= '0' && data[i] <= '9') i++;
1260
- if (i + 1 >= size || data[i] != '.'
1261
- || (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
1300
+
1301
+ if (i >= size || data[i] < '0' || data[i] > '9')
1302
+ return 0;
1303
+
1304
+ while (i < size && data[i] >= '0' && data[i] <= '9')
1305
+ i++;
1306
+
1307
+ if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1308
+ return 0;
1309
+
1310
+ if (is_next_headerline(data + i, size - i))
1311
+ return 0;
1312
+
1262
1313
  return i + 2;
1263
1314
  }
1264
1315
 
1265
1316
  /* prefix_uli • returns ordered list item prefix */
1266
1317
  static size_t
1267
- prefix_uli(char *data, size_t size)
1318
+ prefix_uli(uint8_t *data, size_t size)
1268
1319
  {
1269
1320
  size_t i = 0;
1321
+
1270
1322
  if (i < size && data[i] == ' ') i++;
1271
1323
  if (i < size && data[i] == ' ') i++;
1272
1324
  if (i < size && data[i] == ' ') i++;
1273
- if (i + 1 >= size
1274
- || (data[i] != '*' && data[i] != '+' && data[i] != '-')
1275
- || (data[i + 1] != ' ' && data[i + 1] != '\t'))
1325
+
1326
+ if (i + 1 >= size ||
1327
+ (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1328
+ data[i + 1] != ' ')
1329
+ return 0;
1330
+
1331
+ if (is_next_headerline(data + i, size - i))
1276
1332
  return 0;
1333
+
1277
1334
  return i + 2;
1278
1335
  }
1279
1336
 
1280
1337
 
1281
- /* parse_block • parsing of one block, returning next char to parse */
1282
- static void parse_block(struct buf *ob, struct render *rndr,
1283
- char *data, size_t size);
1338
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
1339
+ static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1340
+ uint8_t *data, size_t size);
1284
1341
 
1285
1342
 
1286
1343
  /* parse_blockquote • handles parsing of a blockquote fragment */
1287
1344
  static size_t
1288
- parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
1345
+ parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1289
1346
  {
1290
1347
  size_t beg, end = 0, pre, work_size = 0;
1291
- char *work_data = 0;
1348
+ uint8_t *work_data = 0;
1292
1349
  struct buf *out = 0;
1293
1350
 
1294
1351
  out = rndr_newbuf(rndr, BUFFER_BLOCK);
@@ -1326,15 +1383,15 @@ parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
1326
1383
  }
1327
1384
 
1328
1385
  static size_t
1329
- parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render);
1386
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1330
1387
 
1331
1388
  /* parse_blockquote • handles parsing of a regular paragraph */
1332
1389
  static size_t
1333
- parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
1390
+ parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1334
1391
  {
1335
1392
  size_t i = 0, end = 0;
1336
1393
  int level = 0;
1337
- struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
1394
+ struct buf work = { data, 0, 0, 0 };
1338
1395
 
1339
1396
  while (i < size) {
1340
1397
  for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
@@ -1410,11 +1467,11 @@ parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
1410
1467
 
1411
1468
  /* parse_fencedcode • handles parsing of a block-level code fragment */
1412
1469
  static size_t
1413
- parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1470
+ parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1414
1471
  {
1415
1472
  size_t beg, end;
1416
1473
  struct buf *work = 0;
1417
- struct buf lang = { 0, 0, 0, 0, 0 };
1474
+ struct buf lang = { 0, 0, 0, 0 };
1418
1475
 
1419
1476
  beg = is_codefence(data, size, &lang);
1420
1477
  if (beg == 0) return 0;
@@ -1453,7 +1510,7 @@ parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1453
1510
  }
1454
1511
 
1455
1512
  static size_t
1456
- parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1513
+ parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1457
1514
  {
1458
1515
  size_t beg, end, pre;
1459
1516
  struct buf *work = 0;
@@ -1496,7 +1553,7 @@ parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size)
1496
1553
  /* parse_listitem • parsing of a single list item */
1497
1554
  /* assuming initial prefix is already removed */
1498
1555
  static size_t
1499
- parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags)
1556
+ parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1500
1557
  {
1501
1558
  struct buf *work = 0, *inter = 0;
1502
1559
  size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
@@ -1546,7 +1603,6 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
1546
1603
  i++;
1547
1604
 
1548
1605
  pre = i;
1549
- if (data[beg] == '\t') { i = 1; pre = 8; }
1550
1606
 
1551
1607
  /* checking for a new item */
1552
1608
  if ((prefix_uli(data + beg + i, end - beg - i) &&
@@ -1562,7 +1618,7 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
1562
1618
  sublist = work->size;
1563
1619
  }
1564
1620
  /* joining only indented stuff after empty lines */
1565
- else if (in_empty && i < 4 && data[beg] != '\t') {
1621
+ else if (in_empty && i < 4) {
1566
1622
  *flags |= MKD_LI_END;
1567
1623
  break;
1568
1624
  }
@@ -1586,7 +1642,7 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
1586
1642
  /* intermediate render of block li */
1587
1643
  if (sublist && sublist < work->size) {
1588
1644
  parse_block(inter, rndr, work->data, sublist);
1589
- parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1645
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1590
1646
  }
1591
1647
  else
1592
1648
  parse_block(inter, rndr, work->data, work->size);
@@ -1612,7 +1668,7 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
1612
1668
 
1613
1669
  /* parse_list • parsing ordered or unordered list block */
1614
1670
  static size_t
1615
- parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags)
1671
+ parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1616
1672
  {
1617
1673
  struct buf *work = 0;
1618
1674
  size_t i = 0, j;
@@ -1635,7 +1691,7 @@ parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int fla
1635
1691
 
1636
1692
  /* parse_atxheader • parsing of atx-style headers */
1637
1693
  static size_t
1638
- parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
1694
+ parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1639
1695
  {
1640
1696
  size_t level = 0;
1641
1697
  size_t i, end, skip;
@@ -1643,7 +1699,7 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
1643
1699
  while (level < size && level < 6 && data[level] == '#')
1644
1700
  level++;
1645
1701
 
1646
- for (i = level; i < size && (data[i] == ' ' || data[i] == '\t'); i++);
1702
+ for (i = level; i < size && data[i] == ' '; i++);
1647
1703
 
1648
1704
  for (end = i; end < size && data[end] != '\n'; end++);
1649
1705
  skip = end;
@@ -1651,7 +1707,7 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
1651
1707
  while (end && data[end - 1] == '#')
1652
1708
  end--;
1653
1709
 
1654
- while (end && (data[end - 1] == ' ' || data[end - 1] == '\t'))
1710
+ while (end && data[end - 1] == ' ')
1655
1711
  end--;
1656
1712
 
1657
1713
  if (end > i) {
@@ -1672,20 +1728,18 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
1672
1728
  /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1673
1729
  /* returns the length on match, 0 otherwise */
1674
1730
  static size_t
1675
- htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size)
1731
+ htmlblock_end(const char *tag, size_t tag_len, struct sd_markdown *rndr, uint8_t *data, size_t size)
1676
1732
  {
1677
1733
  size_t i, w;
1678
1734
 
1679
- /* assuming data[0] == '<' && data[1] == '/' already tested */
1680
-
1681
1735
  /* checking if tag is a match */
1682
- if (tag->size + 3 >= size
1683
- || strncasecmp(data + 2, tag->text, tag->size)
1684
- || data[tag->size + 2] != '>')
1736
+ if (tag_len + 3 >= size ||
1737
+ strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1738
+ data[tag_len + 2] != '>')
1685
1739
  return 0;
1686
1740
 
1687
1741
  /* checking white lines */
1688
- i = tag->size + 3;
1742
+ i = tag_len + 3;
1689
1743
  w = 0;
1690
1744
  if (i < size && (w = is_empty(data + i, size - i)) == 0)
1691
1745
  return 0; /* non-blank after tag */
@@ -1706,16 +1760,25 @@ htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size
1706
1760
 
1707
1761
  /* parse_htmlblock • parsing of inline HTML block */
1708
1762
  static size_t
1709
- parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render)
1763
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1710
1764
  {
1711
1765
  size_t i, j = 0;
1712
- struct html_tag *curtag;
1766
+ const char *curtag = NULL;
1713
1767
  int found;
1714
- struct buf work = { data, 0, 0, 0, 0 };
1768
+ struct buf work = { data, 0, 0, 0 };
1715
1769
 
1716
1770
  /* identification of the opening tag */
1717
- if (size < 2 || data[0] != '<') return 0;
1718
- curtag = find_block_tag(data + 1, size - 1);
1771
+ if (size < 2 || data[0] != '<')
1772
+ return 0;
1773
+
1774
+ i = 1;
1775
+
1776
+ /* look for the closing `>` in the opening tag */
1777
+ while (i < size && data[i] != '>' && data[i] != ' ')
1778
+ i++;
1779
+
1780
+ if (i < size && data[i] == '>')
1781
+ curtag = find_block_tag((char *)data + 1, i - 1);
1719
1782
 
1720
1783
  /* handling of special cases */
1721
1784
  if (!curtag) {
@@ -1737,7 +1800,7 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
1737
1800
  if (do_render && rndr->cb.blockhtml)
1738
1801
  rndr->cb.blockhtml(ob, &work, rndr->opaque);
1739
1802
  return work.size;
1740
- }
1803
+ }
1741
1804
  }
1742
1805
 
1743
1806
  /* HR, which is the only self-closing block tag considered */
@@ -1755,7 +1818,7 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
1755
1818
  rndr->cb.blockhtml(ob, &work, rndr->opaque);
1756
1819
  return work.size;
1757
1820
  }
1758
- }
1821
+ }
1759
1822
  }
1760
1823
 
1761
1824
  /* no special case recognised */
@@ -1769,24 +1832,25 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
1769
1832
 
1770
1833
  /* if not found, trying a second pass looking for indented match */
1771
1834
  /* but not if tag is "ins" or "del" (following original Markdown.pl) */
1772
- if (curtag != INS_TAG && curtag != DEL_TAG) {
1835
+ if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
1836
+ size_t tag_size = strlen(curtag);
1773
1837
  i = 1;
1774
1838
  while (i < size) {
1775
1839
  i++;
1776
1840
  while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
1777
1841
  i++;
1778
1842
 
1779
- if (i + 2 + curtag->size >= size)
1843
+ if (i + 2 + tag_size >= size)
1780
1844
  break;
1781
1845
 
1782
- j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1);
1846
+ j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1783
1847
 
1784
1848
  if (j) {
1785
1849
  i += j - 1;
1786
1850
  found = 1;
1787
1851
  break;
1788
1852
  }
1789
- }
1853
+ }
1790
1854
  }
1791
1855
 
1792
1856
  if (!found) return 0;
@@ -1800,11 +1864,14 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
1800
1864
  }
1801
1865
 
1802
1866
  static void
1803
- parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, size_t columns, int *col_data)
1867
+ parse_table_row(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, size_t columns, int *col_data)
1804
1868
  {
1805
1869
  size_t i = 0, col;
1806
1870
  struct buf *row_work = 0;
1807
1871
 
1872
+ if (!rndr->cb.table_cell || !rndr->cb.table_row)
1873
+ return;
1874
+
1808
1875
  row_work = rndr_newbuf(rndr, BUFFER_SPAN);
1809
1876
 
1810
1877
  if (i < size && data[i] == '|')
@@ -1816,7 +1883,7 @@ parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, si
1816
1883
 
1817
1884
  cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
1818
1885
 
1819
- while (i < size && isspace(data[i]))
1886
+ while (i < size && _isspace(data[i]))
1820
1887
  i++;
1821
1888
 
1822
1889
  cell_start = i;
@@ -1826,31 +1893,28 @@ parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, si
1826
1893
 
1827
1894
  cell_end = i - 1;
1828
1895
 
1829
- while (cell_end > cell_start && isspace(data[cell_end]))
1896
+ while (cell_end > cell_start && _isspace(data[cell_end]))
1830
1897
  cell_end--;
1831
1898
 
1832
1899
  parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
1833
- if (rndr->cb.table_cell)
1834
- rndr->cb.table_cell(row_work, cell_work, col_data ? col_data[col] : 0, rndr->opaque);
1900
+ rndr->cb.table_cell(row_work, cell_work, col_data[col], rndr->opaque);
1835
1901
 
1836
1902
  rndr_popbuf(rndr, BUFFER_SPAN);
1837
1903
  i++;
1838
1904
  }
1839
1905
 
1840
1906
  for (; col < columns; ++col) {
1841
- struct buf empty_cell = {0, 0, 0, 0, 0};
1842
- if (rndr->cb.table_cell)
1843
- rndr->cb.table_cell(row_work, &empty_cell, col_data ? col_data[col] : 0, rndr->opaque);
1907
+ struct buf empty_cell = { 0, 0, 0, 0 };
1908
+ rndr->cb.table_cell(row_work, &empty_cell, col_data[col], rndr->opaque);
1844
1909
  }
1845
1910
 
1846
- if (rndr->cb.table_row)
1847
- rndr->cb.table_row(ob, row_work, rndr->opaque);
1911
+ rndr->cb.table_row(ob, row_work, rndr->opaque);
1848
1912
 
1849
1913
  rndr_popbuf(rndr, BUFFER_SPAN);
1850
1914
  }
1851
1915
 
1852
1916
  static size_t
1853
- parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size, size_t *columns, int **column_data)
1917
+ parse_table_header(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, size_t *columns, int **column_data)
1854
1918
  {
1855
1919
  int pipes;
1856
1920
  size_t i = 0, col, header_end, under_end;
@@ -1886,7 +1950,9 @@ parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size,
1886
1950
  for (col = 0; col < *columns && i < under_end; ++col) {
1887
1951
  size_t dashes = 0;
1888
1952
 
1889
- while (i < under_end && (data[i] == ' ' || data[i] == '\t'))
1953
+ (*column_data)[col] |= MKD_TABLE_HEADER;
1954
+
1955
+ while (i < under_end && data[i] == ' ')
1890
1956
  i++;
1891
1957
 
1892
1958
  if (data[i] == ':') {
@@ -1903,7 +1969,7 @@ parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size,
1903
1969
  dashes++;
1904
1970
  }
1905
1971
 
1906
- while (i < under_end && (data[i] == ' ' || data[i] == '\t'))
1972
+ while (i < under_end && data[i] == ' ')
1907
1973
  i++;
1908
1974
 
1909
1975
  if (i < under_end && data[i] != '|')
@@ -1923,7 +1989,7 @@ parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size,
1923
1989
  }
1924
1990
 
1925
1991
  static size_t
1926
- parse_table(struct buf *ob, struct render *rndr, char *data, size_t size)
1992
+ parse_table(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1927
1993
  {
1928
1994
  size_t i;
1929
1995
 
@@ -1968,16 +2034,16 @@ parse_table(struct buf *ob, struct render *rndr, char *data, size_t size)
1968
2034
  return i;
1969
2035
  }
1970
2036
 
1971
- /* parse_block • parsing of one block, returning next char to parse */
2037
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
1972
2038
  static void
1973
- parse_block(struct buf *ob, struct render *rndr, char *data, size_t size)
2039
+ parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1974
2040
  {
1975
2041
  size_t beg, end, i;
1976
- char *txt_data;
2042
+ uint8_t *txt_data;
1977
2043
  beg = 0;
1978
2044
 
1979
2045
  if (rndr->work_bufs[BUFFER_SPAN].size +
1980
- rndr->work_bufs[BUFFER_BLOCK].size > (int)rndr->max_nesting)
2046
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
1981
2047
  return;
1982
2048
 
1983
2049
  while (beg < size) {
@@ -2037,7 +2103,7 @@ parse_block(struct buf *ob, struct render *rndr, char *data, size_t size)
2037
2103
 
2038
2104
  /* is_ref • returns whether a line is a reference or not */
2039
2105
  static int
2040
- is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
2106
+ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2041
2107
  {
2042
2108
  /* int n; */
2043
2109
  size_t i = 0;
@@ -2045,8 +2111,6 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
2045
2111
  size_t link_offset, link_end;
2046
2112
  size_t title_offset, title_end;
2047
2113
  size_t line_end;
2048
- struct link_ref *lr;
2049
- /* struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
2050
2114
 
2051
2115
  /* up to 3 optional leading spaces */
2052
2116
  if (beg + 3 >= end) return 0;
@@ -2069,23 +2133,27 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
2069
2133
  i++;
2070
2134
  if (i >= end || data[i] != ':') return 0;
2071
2135
  i++;
2072
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i++;
2136
+ while (i < end && data[i] == ' ') i++;
2073
2137
  if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2074
2138
  i++;
2075
2139
  if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2076
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i++;
2140
+ while (i < end && data[i] == ' ') i++;
2077
2141
  if (i >= end) return 0;
2078
2142
 
2079
2143
  /* link: whitespace-free sequence, optionally between angle brackets */
2080
- if (data[i] == '<') i++;
2144
+ if (data[i] == '<')
2145
+ i++;
2146
+
2081
2147
  link_offset = i;
2082
- while (i < end && data[i] != ' ' && data[i] != '\t'
2083
- && data[i] != '\n' && data[i] != '\r') i++;
2148
+
2149
+ while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2150
+ i++;
2151
+
2084
2152
  if (data[i - 1] == '>') link_end = i - 1;
2085
2153
  else link_end = i;
2086
2154
 
2087
2155
  /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2088
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i++;
2156
+ while (i < end && data[i] == ' ') i++;
2089
2157
  if (i < end && data[i] != '\n' && data[i] != '\r'
2090
2158
  && data[i] != '\'' && data[i] != '"' && data[i] != '(')
2091
2159
  return 0;
@@ -2098,7 +2166,7 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
2098
2166
  /* optional (space|tab)* spacer after a newline */
2099
2167
  if (line_end) {
2100
2168
  i = line_end + 1;
2101
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i++; }
2169
+ while (i < end && data[i] == ' ') i++; }
2102
2170
 
2103
2171
  /* optional title: any non-newline sequence enclosed in '"()
2104
2172
  alone on its line */
@@ -2114,31 +2182,38 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
2114
2182
  else title_end = i;
2115
2183
  /* stepping back */
2116
2184
  i -= 1;
2117
- while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
2185
+ while (i > title_offset && data[i] == ' ')
2118
2186
  i -= 1;
2119
2187
  if (i > title_offset
2120
2188
  && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2121
2189
  line_end = title_end;
2122
2190
  title_end = i; } }
2123
- if (!line_end) return 0; /* garbage after the link */
2191
+
2192
+ if (!line_end)
2193
+ return 0; /* garbage after the link */
2124
2194
 
2125
2195
  /* a valid ref has been found, filling-in return structures */
2126
- if (last) *last = line_end;
2127
- if (!refs) return 1;
2128
- lr = arr_item(refs, arr_newitem(refs));
2129
- lr->id = bufnew(id_end - id_offset);
2130
- bufput(lr->id, data + id_offset, id_end - id_offset);
2131
- lr->link = bufnew(link_end - link_offset);
2132
- bufput(lr->link, data + link_offset, link_end - link_offset);
2133
- if (title_end > title_offset) {
2134
- lr->title = bufnew(title_end - title_offset);
2135
- bufput(lr->title, data + title_offset,
2136
- title_end - title_offset); }
2137
- else lr->title = 0;
2138
- return 1;
2196
+ if (last)
2197
+ *last = line_end;
2198
+
2199
+ if (refs) {
2200
+ struct link_ref *ref;
2201
+
2202
+ ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2203
+
2204
+ ref->link = bufnew(link_end - link_offset);
2205
+ bufput(ref->link, data + link_offset, link_end - link_offset);
2206
+
2207
+ if (title_end > title_offset) {
2208
+ ref->title = bufnew(title_end - title_offset);
2209
+ bufput(ref->title, data + title_offset, title_end - title_offset);
2210
+ }
2211
+ }
2212
+
2213
+ return 1;
2139
2214
  }
2140
2215
 
2141
- static void expand_tabs(struct buf *ob, const char *line, size_t size)
2216
+ static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2142
2217
  {
2143
2218
  size_t i = 0, tab = 0;
2144
2219
 
@@ -2167,93 +2242,100 @@ static void expand_tabs(struct buf *ob, const char *line, size_t size)
2167
2242
  * EXPORTED FUNCTIONS *
2168
2243
  **********************/
2169
2244
 
2170
- /* markdown • parses the input buffer and renders it into the output buffer */
2171
- void
2172
- sd_markdown(struct buf *ob,
2173
- const struct buf *ib,
2245
+ struct sd_markdown *
2246
+ sd_markdown_new(
2174
2247
  unsigned int extensions,
2248
+ size_t max_nesting,
2175
2249
  const struct sd_callbacks *callbacks,
2176
- void *opaque) {
2177
-
2178
- static const float MARKDOWN_GROW_FACTOR = 1.4f;
2179
-
2180
- struct link_ref *lr;
2181
- struct buf *text;
2182
- size_t i, beg, end;
2183
- struct render rndr;
2184
-
2185
- /* filling the render structure */
2186
- if (!callbacks)
2187
- return;
2250
+ void *opaque)
2251
+ {
2252
+ struct sd_markdown *md = NULL;
2188
2253
 
2189
- text = bufnew(64);
2190
- if (!text)
2191
- return;
2254
+ assert(max_nesting > 0 && callbacks);
2192
2255
 
2193
- /* Preallocate enough space for our buffer to avoid expanding while copying */
2194
- bufgrow(text, ib->size);
2256
+ md = malloc(sizeof(struct sd_markdown));
2257
+ if (!md)
2258
+ return NULL;
2195
2259
 
2196
- memcpy(&rndr.cb, callbacks, sizeof(struct sd_callbacks));
2197
- arr_init(&rndr.refs, sizeof (struct link_ref));
2198
- parr_init(&rndr.work_bufs[BUFFER_BLOCK]);
2199
- parr_init(&rndr.work_bufs[BUFFER_SPAN]);
2260
+ memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2200
2261
 
2201
- /* for (i = 0; i < 256; i++)
2202
- rndr.active_char[i] = 0; */
2262
+ stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2263
+ stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2203
2264
 
2204
- memset(rndr.active_char, 0x0, 256);
2265
+ memset(md->active_char, 0x0, 256);
2205
2266
 
2206
- if (rndr.cb.emphasis || rndr.cb.double_emphasis || rndr.cb.triple_emphasis) {
2207
- rndr.active_char['*'] = MD_CHAR_EMPHASIS;
2208
- rndr.active_char['_'] = MD_CHAR_EMPHASIS;
2267
+ if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2268
+ md->active_char['*'] = MD_CHAR_EMPHASIS;
2269
+ md->active_char['_'] = MD_CHAR_EMPHASIS;
2209
2270
  if (extensions & MKDEXT_STRIKETHROUGH)
2210
- rndr.active_char['~'] = MD_CHAR_EMPHASIS;
2271
+ md->active_char['~'] = MD_CHAR_EMPHASIS;
2211
2272
  }
2212
2273
 
2213
- if (rndr.cb.codespan)
2214
- rndr.active_char['`'] = MD_CHAR_CODESPAN;
2274
+ if (md->cb.codespan)
2275
+ md->active_char['`'] = MD_CHAR_CODESPAN;
2215
2276
 
2216
- if (rndr.cb.linebreak)
2217
- rndr.active_char['\n'] = MD_CHAR_LINEBREAK;
2277
+ if (md->cb.linebreak)
2278
+ md->active_char['\n'] = MD_CHAR_LINEBREAK;
2218
2279
 
2219
- if (rndr.cb.image || rndr.cb.link)
2220
- rndr.active_char['['] = MD_CHAR_LINK;
2280
+ if (md->cb.image || md->cb.link)
2281
+ md->active_char['['] = MD_CHAR_LINK;
2221
2282
 
2222
- rndr.active_char['<'] = MD_CHAR_LANGLE;
2223
- rndr.active_char['\\'] = MD_CHAR_ESCAPE;
2224
- rndr.active_char['&'] = MD_CHAR_ENTITITY;
2283
+ md->active_char['<'] = MD_CHAR_LANGLE;
2284
+ md->active_char['\\'] = MD_CHAR_ESCAPE;
2285
+ md->active_char['&'] = MD_CHAR_ENTITITY;
2225
2286
 
2226
2287
  if (extensions & MKDEXT_AUTOLINK) {
2227
- rndr.active_char[':'] = MD_CHAR_AUTOLINK_URL;
2228
- rndr.active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2229
- rndr.active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2288
+ md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2289
+ md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2290
+ md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2230
2291
  }
2231
2292
 
2232
2293
  if (extensions & MKDEXT_SUPERSCRIPT)
2233
- rndr.active_char['^'] = MD_CHAR_SUPERSCRIPT;
2294
+ md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2234
2295
 
2235
2296
  /* Extension data */
2236
- rndr.ext_flags = extensions;
2237
- rndr.opaque = opaque;
2238
- rndr.max_nesting = 16;
2297
+ md->ext_flags = extensions;
2298
+ md->opaque = opaque;
2299
+ md->max_nesting = max_nesting;
2300
+
2301
+ return md;
2302
+ }
2303
+
2304
+ void
2305
+ sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2306
+ {
2307
+ static const float MARKDOWN_GROW_FACTOR = 1.4f;
2308
+
2309
+ struct buf *text;
2310
+ size_t beg, end;
2311
+
2312
+ text = bufnew(64);
2313
+ if (!text)
2314
+ return;
2315
+
2316
+ /* Preallocate enough space for our buffer to avoid expanding while copying */
2317
+ bufgrow(text, doc_size);
2318
+
2319
+ /* reset the references table */
2320
+ memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2239
2321
 
2240
2322
  /* first pass: looking for references, copying everything else */
2241
2323
  beg = 0;
2242
- while (beg < ib->size) /* iterating over lines */
2243
- if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
2324
+ while (beg < doc_size) /* iterating over lines */
2325
+ if (is_ref(document, beg, doc_size, &end, md->refs))
2244
2326
  beg = end;
2245
2327
  else { /* skipping to the next line */
2246
2328
  end = beg;
2247
- while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r')
2329
+ while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2248
2330
  end++;
2249
2331
 
2250
2332
  /* adding the line body if present */
2251
2333
  if (end > beg)
2252
- expand_tabs(text, ib->data + beg, end - beg);
2334
+ expand_tabs(text, document + beg, end - beg);
2253
2335
 
2254
- while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) {
2336
+ while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2255
2337
  /* add one \n per newline */
2256
- if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n'))
2338
+ if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2257
2339
  bufputc(text, '\n');
2258
2340
  end++;
2259
2341
  }
@@ -2261,50 +2343,47 @@ sd_markdown(struct buf *ob,
2261
2343
  beg = end;
2262
2344
  }
2263
2345
 
2264
- /* sorting the reference array */
2265
- if (rndr.refs.size)
2266
- qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit, cmp_link_ref_sort);
2267
-
2268
2346
  /* pre-grow the output buffer to minimize allocations */
2269
2347
  bufgrow(ob, text->size * MARKDOWN_GROW_FACTOR);
2270
2348
 
2271
2349
  /* second pass: actual rendering */
2272
- if (rndr.cb.doc_header)
2273
- rndr.cb.doc_header(ob, rndr.opaque);
2350
+ if (md->cb.doc_header)
2351
+ md->cb.doc_header(ob, md->opaque);
2274
2352
 
2275
2353
  if (text->size) {
2276
2354
  /* adding a final newline if not already present */
2277
2355
  if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
2278
2356
  bufputc(text, '\n');
2279
2357
 
2280
- parse_block(ob, &rndr, text->data, text->size);
2358
+ parse_block(ob, md, text->data, text->size);
2281
2359
  }
2282
2360
 
2283
- if (rndr.cb.doc_footer)
2284
- rndr.cb.doc_footer(ob, rndr.opaque);
2361
+ if (md->cb.doc_footer)
2362
+ md->cb.doc_footer(ob, md->opaque);
2285
2363
 
2286
2364
  /* clean-up */
2287
2365
  bufrelease(text);
2288
- lr = rndr.refs.base;
2289
- for (i = 0; i < (size_t)rndr.refs.size; i++) {
2290
- bufrelease(lr[i].id);
2291
- bufrelease(lr[i].link);
2292
- bufrelease(lr[i].title);
2293
- }
2366
+ free_link_refs(md->refs);
2294
2367
 
2295
- arr_free(&rndr.refs);
2368
+ assert(md->work_bufs[BUFFER_SPAN].size == 0);
2369
+ assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2370
+ }
2371
+
2372
+ void
2373
+ sd_markdown_free(struct sd_markdown *md)
2374
+ {
2375
+ size_t i;
2296
2376
 
2297
- assert(rndr.work_bufs[BUFFER_SPAN].size == 0);
2298
- assert(rndr.work_bufs[BUFFER_BLOCK].size == 0);
2377
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2378
+ bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2299
2379
 
2300
- for (i = 0; i < (size_t)rndr.work_bufs[BUFFER_SPAN].asize; ++i)
2301
- bufrelease(rndr.work_bufs[BUFFER_SPAN].item[i]);
2380
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2381
+ bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2302
2382
 
2303
- for (i = 0; i < (size_t)rndr.work_bufs[BUFFER_BLOCK].asize; ++i)
2304
- bufrelease(rndr.work_bufs[BUFFER_BLOCK].item[i]);
2383
+ stack_free(&md->work_bufs[BUFFER_SPAN]);
2384
+ stack_free(&md->work_bufs[BUFFER_BLOCK]);
2305
2385
 
2306
- parr_free(&rndr.work_bufs[BUFFER_SPAN]);
2307
- parr_free(&rndr.work_bufs[BUFFER_BLOCK]);
2386
+ free(md);
2308
2387
  }
2309
2388
 
2310
2389
  void