greenmat 3.2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/COPYING +14 -0
  3. data/Gemfile +9 -0
  4. data/README.md +36 -0
  5. data/Rakefile +62 -0
  6. data/bin/greenmat +7 -0
  7. data/ext/greenmat/autolink.c +296 -0
  8. data/ext/greenmat/autolink.h +49 -0
  9. data/ext/greenmat/buffer.c +196 -0
  10. data/ext/greenmat/buffer.h +83 -0
  11. data/ext/greenmat/extconf.rb +6 -0
  12. data/ext/greenmat/gm_markdown.c +161 -0
  13. data/ext/greenmat/gm_render.c +534 -0
  14. data/ext/greenmat/greenmat.h +30 -0
  15. data/ext/greenmat/houdini.h +29 -0
  16. data/ext/greenmat/houdini_href_e.c +108 -0
  17. data/ext/greenmat/houdini_html_e.c +83 -0
  18. data/ext/greenmat/html.c +826 -0
  19. data/ext/greenmat/html.h +84 -0
  20. data/ext/greenmat/html_blocks.h +229 -0
  21. data/ext/greenmat/html_smartypants.c +445 -0
  22. data/ext/greenmat/markdown.c +2912 -0
  23. data/ext/greenmat/markdown.h +138 -0
  24. data/ext/greenmat/stack.c +62 -0
  25. data/ext/greenmat/stack.h +26 -0
  26. data/greenmat.gemspec +72 -0
  27. data/lib/greenmat.rb +92 -0
  28. data/lib/greenmat/compat.rb +73 -0
  29. data/lib/greenmat/render_man.rb +65 -0
  30. data/lib/greenmat/render_strip.rb +48 -0
  31. data/test/benchmark.rb +24 -0
  32. data/test/custom_render_test.rb +28 -0
  33. data/test/greenmat_compat_test.rb +38 -0
  34. data/test/html5_test.rb +69 -0
  35. data/test/html_render_test.rb +241 -0
  36. data/test/html_toc_render_test.rb +76 -0
  37. data/test/markdown_test.rb +337 -0
  38. data/test/pathological_inputs_test.rb +34 -0
  39. data/test/safe_render_test.rb +36 -0
  40. data/test/smarty_html_test.rb +45 -0
  41. data/test/smarty_pants_test.rb +48 -0
  42. data/test/stripdown_render_test.rb +40 -0
  43. data/test/test_helper.rb +33 -0
  44. metadata +158 -0
@@ -0,0 +1,2912 @@
1
+ /* markdown.c - generic markdown parser */
2
+
3
+ /*
4
+ * Copyright (c) 2009, Natacha Porté
5
+ * Copyright (c) 2011, Vicent Marti
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ */
19
+
20
+ #include "markdown.h"
21
+ #include "stack.h"
22
+
23
+ #include <assert.h>
24
+ #include <string.h>
25
+ #include <ctype.h>
26
+ #include <stdio.h>
27
+
28
+ #if defined(_WIN32)
29
+ #define strncasecmp _strnicmp
30
+ #endif
31
+
32
+ #define REF_TABLE_SIZE 8
33
+
34
+ #define BUFFER_BLOCK 0
35
+ #define BUFFER_SPAN 1
36
+
37
+ #define MKD_LI_END 8 /* internal list flag */
38
+
39
+ #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
40
+ #define GPERF_DOWNCASE 1
41
+ #define GPERF_CASE_STRNCMP 1
42
+ #include "html_blocks.h"
43
+
44
+ /***************
45
+ * LOCAL TYPES *
46
+ ***************/
47
+
48
+ /* link_ref: reference to a link */
49
+ struct link_ref {
50
+ unsigned int id;
51
+
52
+ struct buf *link;
53
+ struct buf *title;
54
+
55
+ struct link_ref *next;
56
+ };
57
+
58
+ /* footnote_ref: reference to a footnote */
59
+ struct footnote_ref {
60
+ unsigned int id;
61
+
62
+ int is_used;
63
+ unsigned int num;
64
+
65
+ struct buf *contents;
66
+ };
67
+
68
+ /* footnote_item: an item in a footnote_list */
69
+ struct footnote_item {
70
+ struct footnote_ref *ref;
71
+ struct footnote_item *next;
72
+ };
73
+
74
+ /* footnote_list: linked list of footnote_item */
75
+ struct footnote_list {
76
+ unsigned int count;
77
+ struct footnote_item *head;
78
+ struct footnote_item *tail;
79
+ };
80
+
81
+ /* char_trigger: function pointer to render active chars */
82
+ /* returns the number of chars taken care of */
83
+ /* data is the pointer of the beginning of the span */
84
+ /* offset is the number of valid chars before data */
85
+ struct sd_markdown;
86
+ typedef size_t
87
+ (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
88
+
89
+ static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
90
+ static size_t char_underline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
91
+ static size_t char_highlight(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
92
+ static size_t char_quote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
93
+ static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
94
+ static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
95
+ static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
96
+ static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
97
+ static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
98
+ static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
99
+ static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
100
+ static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
101
+ static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
102
+ static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
103
+
104
+ enum markdown_char_t {
105
+ MD_CHAR_NONE = 0,
106
+ MD_CHAR_EMPHASIS,
107
+ MD_CHAR_CODESPAN,
108
+ MD_CHAR_LINEBREAK,
109
+ MD_CHAR_LINK,
110
+ MD_CHAR_LANGLE,
111
+ MD_CHAR_ESCAPE,
112
+ MD_CHAR_ENTITITY,
113
+ MD_CHAR_AUTOLINK_URL,
114
+ MD_CHAR_AUTOLINK_EMAIL,
115
+ MD_CHAR_AUTOLINK_WWW,
116
+ MD_CHAR_SUPERSCRIPT,
117
+ MD_CHAR_QUOTE
118
+ };
119
+
120
+ static char_trigger markdown_char_ptrs[] = {
121
+ NULL,
122
+ &char_emphasis,
123
+ &char_codespan,
124
+ &char_linebreak,
125
+ &char_link,
126
+ &char_langle_tag,
127
+ &char_escape,
128
+ &char_entity,
129
+ &char_autolink_url,
130
+ &char_autolink_email,
131
+ &char_autolink_www,
132
+ &char_superscript,
133
+ &char_quote
134
+ };
135
+
136
+ /* render • structure containing one particular render */
137
+ struct sd_markdown {
138
+ struct sd_callbacks cb;
139
+ void *opaque;
140
+
141
+ struct link_ref *refs[REF_TABLE_SIZE];
142
+ struct footnote_list footnotes_found;
143
+ struct footnote_list footnotes_used;
144
+ uint8_t active_char[256];
145
+ struct stack work_bufs[2];
146
+ unsigned int ext_flags;
147
+ size_t max_nesting;
148
+ int in_link_body;
149
+ };
150
+
151
+ /***************************
152
+ * HELPER FUNCTIONS *
153
+ ***************************/
154
+
155
+ static inline struct buf *
156
+ rndr_newbuf(struct sd_markdown *rndr, int type)
157
+ {
158
+ static const size_t buf_size[2] = {256, 64};
159
+ struct buf *work = NULL;
160
+ struct stack *pool = &rndr->work_bufs[type];
161
+
162
+ if (pool->size < pool->asize &&
163
+ pool->item[pool->size] != NULL) {
164
+ work = pool->item[pool->size++];
165
+ work->size = 0;
166
+ } else {
167
+ work = bufnew(buf_size[type]);
168
+ greenmat_stack_push(pool, work);
169
+ }
170
+
171
+ return work;
172
+ }
173
+
174
+ static inline void
175
+ rndr_popbuf(struct sd_markdown *rndr, int type)
176
+ {
177
+ rndr->work_bufs[type].size--;
178
+ }
179
+
180
+ static void
181
+ unscape_text(struct buf *ob, struct buf *src)
182
+ {
183
+ size_t i = 0, org;
184
+ while (i < src->size) {
185
+ org = i;
186
+ while (i < src->size && src->data[i] != '\\')
187
+ i++;
188
+
189
+ if (i > org)
190
+ bufput(ob, src->data + org, i - org);
191
+
192
+ if (i + 1 >= src->size)
193
+ break;
194
+
195
+ bufputc(ob, src->data[i + 1]);
196
+ i += 2;
197
+ }
198
+ }
199
+
200
+ static unsigned int
201
+ hash_link_ref(const uint8_t *link_ref, size_t length)
202
+ {
203
+ size_t i;
204
+ unsigned int hash = 0;
205
+
206
+ for (i = 0; i < length; ++i)
207
+ hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
208
+
209
+ return hash;
210
+ }
211
+
212
+ static struct link_ref *
213
+ add_link_ref(
214
+ struct link_ref **references,
215
+ const uint8_t *name, size_t name_size)
216
+ {
217
+ struct link_ref *ref = calloc(1, sizeof(struct link_ref));
218
+
219
+ if (!ref)
220
+ return NULL;
221
+
222
+ ref->id = hash_link_ref(name, name_size);
223
+ ref->next = references[ref->id % REF_TABLE_SIZE];
224
+
225
+ references[ref->id % REF_TABLE_SIZE] = ref;
226
+ return ref;
227
+ }
228
+
229
+ static struct link_ref *
230
+ find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
231
+ {
232
+ unsigned int hash = hash_link_ref(name, length);
233
+ struct link_ref *ref = NULL;
234
+
235
+ ref = references[hash % REF_TABLE_SIZE];
236
+
237
+ while (ref != NULL) {
238
+ if (ref->id == hash)
239
+ return ref;
240
+
241
+ ref = ref->next;
242
+ }
243
+
244
+ return NULL;
245
+ }
246
+
247
+ static void
248
+ free_link_refs(struct link_ref **references)
249
+ {
250
+ size_t i;
251
+
252
+ for (i = 0; i < REF_TABLE_SIZE; ++i) {
253
+ struct link_ref *r = references[i];
254
+ struct link_ref *next;
255
+
256
+ while (r) {
257
+ next = r->next;
258
+ bufrelease(r->link);
259
+ bufrelease(r->title);
260
+ free(r);
261
+ r = next;
262
+ }
263
+ }
264
+ }
265
+
266
+ static struct footnote_ref *
267
+ create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
268
+ {
269
+ struct footnote_ref *ref = calloc(1, sizeof(struct footnote_ref));
270
+ if (!ref)
271
+ return NULL;
272
+
273
+ ref->id = hash_link_ref(name, name_size);
274
+
275
+ return ref;
276
+ }
277
+
278
+ static int
279
+ add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
280
+ {
281
+ struct footnote_item *item = calloc(1, sizeof(struct footnote_item));
282
+ if (!item)
283
+ return 0;
284
+ item->ref = ref;
285
+
286
+ if (list->head == NULL) {
287
+ list->head = list->tail = item;
288
+ } else {
289
+ list->tail->next = item;
290
+ list->tail = item;
291
+ }
292
+ list->count++;
293
+
294
+ return 1;
295
+ }
296
+
297
+ static struct footnote_ref *
298
+ find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
299
+ {
300
+ unsigned int hash = hash_link_ref(name, length);
301
+ struct footnote_item *item = NULL;
302
+
303
+ item = list->head;
304
+
305
+ while (item != NULL) {
306
+ if (item->ref->id == hash)
307
+ return item->ref;
308
+ item = item->next;
309
+ }
310
+
311
+ return NULL;
312
+ }
313
+
314
+ static void
315
+ free_footnote_ref(struct footnote_ref *ref)
316
+ {
317
+ bufrelease(ref->contents);
318
+ free(ref);
319
+ }
320
+
321
+ static void
322
+ free_footnote_list(struct footnote_list *list, int free_refs)
323
+ {
324
+ struct footnote_item *item = list->head;
325
+ struct footnote_item *next;
326
+
327
+ while (item) {
328
+ next = item->next;
329
+ if (free_refs)
330
+ free_footnote_ref(item->ref);
331
+ free(item);
332
+ item = next;
333
+ }
334
+ }
335
+
336
+ /*
337
+ Wrap isalnum so that characters outside of the ASCII range don't count.
338
+ */
339
+ static inline int
340
+ _isalnum(int c)
341
+ {
342
+ return isalnum(c) && c < 0x7f;
343
+ }
344
+
345
+ /*
346
+ * Check whether a char is a Markdown space.
347
+
348
+ * Right now we only consider spaces the actual
349
+ * space and a newline: tabs and carriage returns
350
+ * are filtered out during the preprocessing phase.
351
+ *
352
+ * If we wanted to actually be UTF-8 compliant, we
353
+ * should instead extract an Unicode codepoint from
354
+ * this character and check for space properties.
355
+ */
356
+ static inline int
357
+ _isspace(int c)
358
+ {
359
+ return c == ' ' || c == '\n';
360
+ }
361
+
362
+ /****************************
363
+ * INLINE PARSING FUNCTIONS *
364
+ ****************************/
365
+
366
+ /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
367
+ /* this is less strict than the original markdown e-mail address matching */
368
+ static size_t
369
+ is_mail_autolink(uint8_t *data, size_t size)
370
+ {
371
+ size_t i = 0, nb = 0;
372
+
373
+ /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
374
+ for (i = 0; i < size; ++i) {
375
+ if (_isalnum(data[i]))
376
+ continue;
377
+
378
+ switch (data[i]) {
379
+ case '@':
380
+ nb++;
381
+
382
+ case '-':
383
+ case '.':
384
+ case '_':
385
+ break;
386
+
387
+ case '>':
388
+ return (nb == 1) ? i + 1 : 0;
389
+
390
+ default:
391
+ return 0;
392
+ }
393
+ }
394
+
395
+ return 0;
396
+ }
397
+
398
+ /* tag_length • returns the length of the given tag, or 0 is it's not valid */
399
+ static size_t
400
+ tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
401
+ {
402
+ size_t i, j;
403
+
404
+ /* a valid tag can't be shorter than 3 chars */
405
+ if (size < 3) return 0;
406
+
407
+ /* begins with a '<' optionally followed by '/', followed by letter or number */
408
+ if (data[0] != '<') return 0;
409
+ i = (data[1] == '/') ? 2 : 1;
410
+
411
+ if (!_isalnum(data[i]))
412
+ return 0;
413
+
414
+ /* scheme test */
415
+ *autolink = MKDA_NOT_AUTOLINK;
416
+
417
+ /* try to find the beginning of an URI */
418
+ while (i < size && (_isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
419
+ i++;
420
+
421
+ if (i > 1 && data[i] == '@') {
422
+ if ((j = is_mail_autolink(data + i, size - i)) != 0) {
423
+ *autolink = MKDA_EMAIL;
424
+ return i + j;
425
+ }
426
+ }
427
+
428
+ if (i > 2 && data[i] == ':') {
429
+ *autolink = MKDA_NORMAL;
430
+ i++;
431
+ }
432
+
433
+ /* completing autolink test: no whitespace or ' or " */
434
+ if (i >= size)
435
+ *autolink = MKDA_NOT_AUTOLINK;
436
+
437
+ else if (*autolink) {
438
+ j = i;
439
+
440
+ while (i < size) {
441
+ if (data[i] == '\\') i += 2;
442
+ else if (data[i] == '>' || data[i] == '\'' ||
443
+ data[i] == '"' || data[i] == ' ' || data[i] == '\n')
444
+ break;
445
+ else i++;
446
+ }
447
+
448
+ if (i >= size) return 0;
449
+ if (i > j && data[i] == '>') return i + 1;
450
+ /* one of the forbidden chars has been found */
451
+ *autolink = MKDA_NOT_AUTOLINK;
452
+ }
453
+
454
+ /* looking for sometinhg looking like a tag end */
455
+ while (i < size && data[i] != '>') i++;
456
+ if (i >= size) return 0;
457
+ return i + 1;
458
+ }
459
+
460
+ /* parse_inline • parses inline markdown elements */
461
+ static void
462
+ parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
463
+ {
464
+ size_t i = 0, end = 0;
465
+ uint8_t action = 0;
466
+ struct buf work = { 0, 0, 0, 0 };
467
+
468
+ if (rndr->work_bufs[BUFFER_SPAN].size +
469
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
470
+ return;
471
+
472
+ while (i < size) {
473
+ /* copying inactive chars into the output */
474
+ while (end < size && (action = rndr->active_char[data[end]]) == 0) {
475
+ end++;
476
+ }
477
+
478
+ if (rndr->cb.normal_text) {
479
+ work.data = data + i;
480
+ work.size = end - i;
481
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
482
+ }
483
+ else
484
+ bufput(ob, data + i, end - i);
485
+
486
+ if (end >= size) break;
487
+ i = end;
488
+
489
+ end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
490
+ if (!end) /* no action from the callback */
491
+ end = i + 1;
492
+ else {
493
+ i += end;
494
+ end = i;
495
+ }
496
+ }
497
+ }
498
+
499
+ /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
500
+ static size_t
501
+ find_emph_char(uint8_t *data, size_t size, uint8_t c)
502
+ {
503
+ size_t i = 1;
504
+
505
+ while (i < size) {
506
+ while (i < size && data[i] != c && data[i] != '[')
507
+ i++;
508
+
509
+ if (i == size)
510
+ return 0;
511
+
512
+ /* not counting escaped chars */
513
+ if (i && data[i - 1] == '\\') {
514
+ i++; continue;
515
+ }
516
+
517
+ if (data[i] == c)
518
+ return i;
519
+
520
+ if (data[i] == '`') {
521
+ size_t span_nb = 0, bt;
522
+ size_t tmp_i = 0;
523
+
524
+ /* counting the number of opening backticks */
525
+ while (i < size && data[i] == '`') {
526
+ i++; span_nb++;
527
+ }
528
+
529
+ if (i >= size) return 0;
530
+
531
+ /* finding the matching closing sequence */
532
+ bt = 0;
533
+ while (i < size && bt < span_nb) {
534
+ if (!tmp_i && data[i] == c) tmp_i = i;
535
+ if (data[i] == '`') bt++;
536
+ else bt = 0;
537
+ i++;
538
+ }
539
+
540
+ if (i >= size) return tmp_i;
541
+ }
542
+ /* skipping a link */
543
+ else if (data[i] == '[') {
544
+ size_t tmp_i = 0;
545
+ uint8_t cc;
546
+
547
+ i++;
548
+ while (i < size && data[i] != ']') {
549
+ if (!tmp_i && data[i] == c) tmp_i = i;
550
+ i++;
551
+ }
552
+
553
+ i++;
554
+ while (i < size && (data[i] == ' ' || data[i] == '\n'))
555
+ i++;
556
+
557
+ if (i >= size)
558
+ return tmp_i;
559
+
560
+ switch (data[i]) {
561
+ case '[':
562
+ cc = ']'; break;
563
+
564
+ case '(':
565
+ cc = ')'; break;
566
+
567
+ default:
568
+ if (tmp_i)
569
+ return tmp_i;
570
+ else
571
+ continue;
572
+ }
573
+
574
+ i++;
575
+ while (i < size && data[i] != cc) {
576
+ if (!tmp_i && data[i] == c) tmp_i = i;
577
+ i++;
578
+ }
579
+
580
+ if (i >= size)
581
+ return tmp_i;
582
+
583
+ i++;
584
+ }
585
+ }
586
+
587
+ return 0;
588
+ }
589
+
590
+ /* parse_emph1 • parsing single emphase */
591
+ /* closed by a symbol not preceded by whitespace and not followed by symbol */
592
+ static size_t
593
+ parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
594
+ {
595
+ size_t i = 0, len;
596
+ struct buf *work = 0;
597
+ int r;
598
+
599
+ /* skipping one symbol if coming from emph3 */
600
+ if (size > 1 && data[0] == c && data[1] == c) i = 1;
601
+
602
+ while (i < size) {
603
+ len = find_emph_char(data + i, size - i, c);
604
+ if (!len) return 0;
605
+ i += len;
606
+ if (i >= size) return 0;
607
+
608
+ if (data[i] == c && !_isspace(data[i - 1])) {
609
+
610
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
611
+ if (i + i < size && _isalnum(data[i + 1]))
612
+ continue;
613
+ }
614
+
615
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
616
+ parse_inline(work, rndr, data, i);
617
+
618
+ if (rndr->ext_flags & MKDEXT_UNDERLINE && c == '_')
619
+ r = rndr->cb.underline(ob, work, rndr->opaque);
620
+ else
621
+ r = rndr->cb.emphasis(ob, work, rndr->opaque);
622
+
623
+ rndr_popbuf(rndr, BUFFER_SPAN);
624
+ return r ? i + 1 : 0;
625
+ }
626
+ }
627
+
628
+ return 0;
629
+ }
630
+
631
+ /* parse_emph2 • parsing single emphase */
632
+ static size_t
633
+ parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
634
+ {
635
+ size_t i = 0, len;
636
+ struct buf *work = 0;
637
+ int r;
638
+
639
+ while (i < size) {
640
+ len = find_emph_char(data + i, size - i, c);
641
+ if (!len) return 0;
642
+ i += len;
643
+
644
+ if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
645
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
646
+ parse_inline(work, rndr, data, i);
647
+
648
+ if (c == '~')
649
+ r = rndr->cb.strikethrough(ob, work, rndr->opaque);
650
+ else if (c == '=')
651
+ r = rndr->cb.highlight(ob, work, rndr->opaque);
652
+ else
653
+ r = rndr->cb.double_emphasis(ob, work, rndr->opaque);
654
+
655
+ rndr_popbuf(rndr, BUFFER_SPAN);
656
+ return r ? i + 2 : 0;
657
+ }
658
+ i++;
659
+ }
660
+ return 0;
661
+ }
662
+
663
+ /* parse_emph3 • parsing single emphase */
664
+ /* finds the first closing tag, and delegates to the other emph */
665
+ static size_t
666
+ parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
667
+ {
668
+ size_t i = 0, len;
669
+ int r;
670
+
671
+ while (i < size) {
672
+ len = find_emph_char(data + i, size - i, c);
673
+ if (!len) return 0;
674
+ i += len;
675
+
676
+ /* skip whitespace preceded symbols */
677
+ if (data[i] != c || _isspace(data[i - 1]))
678
+ continue;
679
+
680
+ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
681
+ /* triple symbol found */
682
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
683
+
684
+ parse_inline(work, rndr, data, i);
685
+ r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
686
+ rndr_popbuf(rndr, BUFFER_SPAN);
687
+ return r ? i + 3 : 0;
688
+
689
+ } else if (i + 1 < size && data[i + 1] == c) {
690
+ /* double symbol found, handing over to emph1 */
691
+ len = parse_emph1(ob, rndr, data - 2, size + 2, c);
692
+ if (!len) return 0;
693
+ else return len - 2;
694
+
695
+ } else {
696
+ /* single symbol found, handing over to emph2 */
697
+ len = parse_emph2(ob, rndr, data - 1, size + 1, c);
698
+ if (!len) return 0;
699
+ else return len - 1;
700
+ }
701
+ }
702
+ return 0;
703
+ }
704
+
705
+ /* char_emphasis • single and double emphasis parsing */
706
+ static size_t
707
+ char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
708
+ {
709
+ uint8_t c = data[0];
710
+ size_t ret;
711
+
712
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
713
+ if (offset > 0 && _isalnum(data[-1]))
714
+ return 0;
715
+ }
716
+
717
+ if (size > 2 && data[1] != c) {
718
+ /* whitespace cannot follow an opening emphasis;
719
+ * strikethrough only takes two characters '~~' */
720
+ if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
721
+ return 0;
722
+
723
+ return ret + 1;
724
+ }
725
+
726
+ if (size > 3 && data[1] == c && data[2] != c) {
727
+ if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
728
+ return 0;
729
+
730
+ return ret + 2;
731
+ }
732
+
733
+ if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
734
+ if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
735
+ return 0;
736
+
737
+ return ret + 3;
738
+ }
739
+
740
+ return 0;
741
+ }
742
+
743
+
744
+ /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
745
+ static size_t
746
+ char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
747
+ {
748
+ if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
749
+ return 0;
750
+
751
+ /* removing the last space from ob and rendering */
752
+ while (ob->size && ob->data[ob->size - 1] == ' ')
753
+ ob->size--;
754
+
755
+ return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
756
+ }
757
+
758
+
759
+ /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
760
+ static size_t
761
+ char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
762
+ {
763
+ size_t end, nb = 0, i, f_begin, f_end;
764
+
765
+ /* counting the number of backticks in the delimiter */
766
+ while (nb < size && data[nb] == '`')
767
+ nb++;
768
+
769
+ /* finding the next delimiter */
770
+ i = 0;
771
+ for (end = nb; end < size && i < nb; end++) {
772
+ if (data[end] == '`') i++;
773
+ else i = 0;
774
+ }
775
+
776
+ if (i < nb && end >= size)
777
+ return 0; /* no matching delimiter */
778
+
779
+ /* trimming outside whitespaces */
780
+ f_begin = nb;
781
+ while (f_begin < end && data[f_begin] == ' ')
782
+ f_begin++;
783
+
784
+ f_end = end - nb;
785
+ while (f_end > nb && data[f_end-1] == ' ')
786
+ f_end--;
787
+
788
+ /* real code span */
789
+ if (f_begin < f_end) {
790
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
791
+ if (!rndr->cb.codespan(ob, &work, rndr->opaque))
792
+ end = 0;
793
+ } else {
794
+ if (!rndr->cb.codespan(ob, 0, rndr->opaque))
795
+ end = 0;
796
+ }
797
+
798
+ return end;
799
+ }
800
+
801
+ /* char_quote • '"' parsing a quote */
802
+ static size_t
803
+ char_quote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
804
+ {
805
+ size_t end, nq = 0, i, f_begin, f_end;
806
+
807
+ /* counting the number of quotes in the delimiter */
808
+ while (nq < size && data[nq] == '"')
809
+ nq++;
810
+
811
+ /* finding the next delimiter */
812
+ i = 0;
813
+ for (end = nq; end < size && i < nq; end++) {
814
+ if (data[end] == '"') i++;
815
+ else i = 0;
816
+ }
817
+
818
+ if (i < nq && end >= size)
819
+ return 0; /* no matching delimiter */
820
+
821
+ /* trimming outside whitespaces */
822
+ f_begin = nq;
823
+ while (f_begin < end && data[f_begin] == ' ')
824
+ f_begin++;
825
+
826
+ f_end = end - nq;
827
+ while (f_end > nq && data[f_end-1] == ' ')
828
+ f_end--;
829
+
830
+ /* real quote */
831
+ if (f_begin < f_end) {
832
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
833
+ if (!rndr->cb.quote(ob, &work, rndr->opaque))
834
+ end = 0;
835
+ } else {
836
+ if (!rndr->cb.quote(ob, 0, rndr->opaque))
837
+ end = 0;
838
+ }
839
+
840
+ return end;
841
+ }
842
+
843
+
844
+ /* char_escape • '\\' backslash escape */
845
+ static size_t
846
+ char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
847
+ {
848
+ static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=";
849
+ struct buf work = { 0, 0, 0, 0 };
850
+
851
+ if (size > 1) {
852
+ if (strchr(escape_chars, data[1]) == NULL)
853
+ return 0;
854
+
855
+ if (rndr->cb.normal_text) {
856
+ work.data = data + 1;
857
+ work.size = 1;
858
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
859
+ }
860
+ else bufputc(ob, data[1]);
861
+ } else if (size == 1) {
862
+ bufputc(ob, data[0]);
863
+ }
864
+
865
+ return 2;
866
+ }
867
+
868
+ /* char_entity • '&' escaped when it doesn't belong to an entity */
869
+ /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
870
+ static size_t
871
+ char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
872
+ {
873
+ size_t end = 1;
874
+ struct buf work = { 0, 0, 0, 0 };
875
+
876
+ if (end < size && data[end] == '#')
877
+ end++;
878
+
879
+ while (end < size && _isalnum(data[end]))
880
+ end++;
881
+
882
+ if (end < size && data[end] == ';')
883
+ end++; /* real entity */
884
+ else
885
+ return 0; /* lone '&' */
886
+
887
+ if (rndr->cb.entity) {
888
+ work.data = data;
889
+ work.size = end;
890
+ rndr->cb.entity(ob, &work, rndr->opaque);
891
+ }
892
+ else bufput(ob, data, end);
893
+
894
+ return end;
895
+ }
896
+
897
+ /* char_langle_tag • '<' when tags or autolinks are allowed */
898
+ static size_t
899
+ char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
900
+ {
901
+ enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
902
+ size_t end = tag_length(data, size, &altype);
903
+ struct buf work = { data, end, 0, 0 };
904
+ int ret = 0;
905
+
906
+ if (end > 2) {
907
+ if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
908
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
909
+ work.data = data + 1;
910
+ work.size = end - 2;
911
+ unscape_text(u_link, &work);
912
+ ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
913
+ rndr_popbuf(rndr, BUFFER_SPAN);
914
+ }
915
+ else if (rndr->cb.raw_html_tag)
916
+ ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
917
+ }
918
+
919
+ if (!ret) return 0;
920
+ else return end;
921
+ }
922
+
923
+ static size_t
924
+ char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
925
+ {
926
+ struct buf *link, *link_url, *link_text;
927
+ size_t link_len, rewind;
928
+
929
+ if (!rndr->cb.link || rndr->in_link_body)
930
+ return 0;
931
+
932
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
933
+
934
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
935
+ link_url = rndr_newbuf(rndr, BUFFER_SPAN);
936
+ BUFPUTSL(link_url, "http://");
937
+ bufput(link_url, link->data, link->size);
938
+
939
+ ob->size -= rewind;
940
+ if (rndr->cb.normal_text) {
941
+ link_text = rndr_newbuf(rndr, BUFFER_SPAN);
942
+ rndr->cb.normal_text(link_text, link, rndr->opaque);
943
+ rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
944
+ rndr_popbuf(rndr, BUFFER_SPAN);
945
+ } else {
946
+ rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
947
+ }
948
+ rndr_popbuf(rndr, BUFFER_SPAN);
949
+ }
950
+
951
+ rndr_popbuf(rndr, BUFFER_SPAN);
952
+ return link_len;
953
+ }
954
+
955
+ static size_t
956
+ char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
957
+ {
958
+ struct buf *link;
959
+ size_t link_len, rewind;
960
+
961
+ if (!rndr->cb.autolink || rndr->in_link_body)
962
+ return 0;
963
+
964
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
965
+
966
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
967
+ ob->size -= rewind;
968
+ rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
969
+ }
970
+
971
+ rndr_popbuf(rndr, BUFFER_SPAN);
972
+ return link_len;
973
+ }
974
+
975
+ static size_t
976
+ char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
977
+ {
978
+ struct buf *link;
979
+ size_t link_len, rewind;
980
+
981
+ if (!rndr->cb.autolink || rndr->in_link_body)
982
+ return 0;
983
+
984
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
985
+
986
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, SD_AUTOLINK_SHORT_DOMAINS)) > 0) {
987
+ ob->size -= rewind;
988
+ rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
989
+ }
990
+
991
+ rndr_popbuf(rndr, BUFFER_SPAN);
992
+ return link_len;
993
+ }
994
+
995
+ /* char_link • '[': parsing a link or an image */
996
+ static size_t
997
+ char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
998
+ {
999
+ int is_img = (offset && data[-1] == '!'), level;
1000
+ size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
1001
+ struct buf *content = 0;
1002
+ struct buf *link = 0;
1003
+ struct buf *title = 0;
1004
+ struct buf *u_link = 0;
1005
+ size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
1006
+ int text_has_nl = 0, ret = 0;
1007
+ int in_title = 0, qtype = 0;
1008
+
1009
+ /* checking whether the correct renderer exists */
1010
+ if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
1011
+ goto cleanup;
1012
+
1013
+ /* looking for the matching closing bracket */
1014
+ for (level = 1; i < size; i++) {
1015
+ if (data[i] == '\n')
1016
+ text_has_nl = 1;
1017
+
1018
+ else if (data[i - 1] == '\\')
1019
+ continue;
1020
+
1021
+ else if (data[i] == '[')
1022
+ level++;
1023
+
1024
+ else if (data[i] == ']') {
1025
+ level--;
1026
+ if (level <= 0)
1027
+ break;
1028
+ }
1029
+ }
1030
+
1031
+ if (i >= size)
1032
+ goto cleanup;
1033
+
1034
+ txt_e = i;
1035
+ i++;
1036
+
1037
+ /* footnote link */
1038
+ if (rndr->ext_flags & MKDEXT_FOOTNOTES && data[1] == '^') {
1039
+ if (txt_e < 3)
1040
+ goto cleanup;
1041
+
1042
+ struct buf id = { 0, 0, 0, 0 };
1043
+ struct footnote_ref *fr;
1044
+
1045
+ id.data = data + 2;
1046
+ id.size = txt_e - 2;
1047
+
1048
+ fr = find_footnote_ref(&rndr->footnotes_found, id.data, id.size);
1049
+
1050
+ /* mark footnote used */
1051
+ if (fr && !fr->is_used) {
1052
+ if(!add_footnote_ref(&rndr->footnotes_used, fr))
1053
+ goto cleanup;
1054
+ fr->is_used = 1;
1055
+ fr->num = rndr->footnotes_used.count;
1056
+ }
1057
+
1058
+ /* render */
1059
+ if (fr && rndr->cb.footnote_ref)
1060
+ ret = rndr->cb.footnote_ref(ob, fr->num, rndr->opaque);
1061
+
1062
+ goto cleanup;
1063
+ }
1064
+
1065
+ /* skip any amount of whitespace or newline */
1066
+ /* (this is much more laxist than original markdown syntax) */
1067
+ while (i < size && _isspace(data[i]))
1068
+ i++;
1069
+
1070
+ /* inline style link */
1071
+ if (i < size && data[i] == '(') {
1072
+ /* skipping initial whitespace */
1073
+ i++;
1074
+
1075
+ while (i < size && _isspace(data[i]))
1076
+ i++;
1077
+
1078
+ link_b = i;
1079
+
1080
+ /* looking for link end: ' " ) */
1081
+ /* Count the number of open parenthesis */
1082
+ size_t nb_p = 0;
1083
+
1084
+ while (i < size) {
1085
+ if (data[i] == '\\') i += 2;
1086
+ else if (data[i] == '(' && i != 0) {
1087
+ nb_p++; i++;
1088
+ }
1089
+ else if (data[i] == ')') {
1090
+ if (nb_p == 0) break;
1091
+ else nb_p--; i++;
1092
+ } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1093
+ else i++;
1094
+ }
1095
+
1096
+ if (i >= size) goto cleanup;
1097
+ link_e = i;
1098
+
1099
+ /* looking for title end if present */
1100
+ if (data[i] == '\'' || data[i] == '"') {
1101
+ qtype = data[i];
1102
+ in_title = 1;
1103
+ i++;
1104
+ title_b = i;
1105
+
1106
+ while (i < size) {
1107
+ if (data[i] == '\\') i += 2;
1108
+ else if (data[i] == qtype) {in_title = 0; i++;}
1109
+ else if ((data[i] == ')') && !in_title) break;
1110
+ else i++;
1111
+ }
1112
+
1113
+ if (i >= size) goto cleanup;
1114
+
1115
+ /* skipping whitespaces after title */
1116
+ title_e = i - 1;
1117
+ while (title_e > title_b && _isspace(data[title_e]))
1118
+ title_e--;
1119
+
1120
+ /* checking for closing quote presence */
1121
+ if (data[title_e] != '\'' && data[title_e] != '"') {
1122
+ title_b = title_e = 0;
1123
+ link_e = i;
1124
+ }
1125
+ }
1126
+
1127
+ /* remove whitespace at the end of the link */
1128
+ while (link_e > link_b && _isspace(data[link_e - 1]))
1129
+ link_e--;
1130
+
1131
+ /* remove optional angle brackets around the link */
1132
+ if (data[link_b] == '<') link_b++;
1133
+ if (data[link_e - 1] == '>') link_e--;
1134
+
1135
+ /* building escaped link and title */
1136
+ if (link_e > link_b) {
1137
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
1138
+ bufput(link, data + link_b, link_e - link_b);
1139
+ }
1140
+
1141
+ if (title_e > title_b) {
1142
+ title = rndr_newbuf(rndr, BUFFER_SPAN);
1143
+ bufput(title, data + title_b, title_e - title_b);
1144
+ }
1145
+
1146
+ i++;
1147
+ }
1148
+
1149
+ /* reference style link */
1150
+ else if (i < size && data[i] == '[') {
1151
+ struct buf id = { 0, 0, 0, 0 };
1152
+ struct link_ref *lr;
1153
+
1154
+ /* looking for the id */
1155
+ i++;
1156
+ link_b = i;
1157
+ while (i < size && data[i] != ']') i++;
1158
+ if (i >= size) goto cleanup;
1159
+ link_e = i;
1160
+
1161
+ /* finding the link_ref */
1162
+ if (link_b == link_e) {
1163
+ if (text_has_nl) {
1164
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1165
+ size_t j;
1166
+
1167
+ for (j = 1; j < txt_e; j++) {
1168
+ if (data[j] != '\n')
1169
+ bufputc(b, data[j]);
1170
+ else if (data[j - 1] != ' ')
1171
+ bufputc(b, ' ');
1172
+ }
1173
+
1174
+ id.data = b->data;
1175
+ id.size = b->size;
1176
+ } else {
1177
+ id.data = data + 1;
1178
+ id.size = txt_e - 1;
1179
+ }
1180
+ } else {
1181
+ id.data = data + link_b;
1182
+ id.size = link_e - link_b;
1183
+ }
1184
+
1185
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1186
+ if (!lr)
1187
+ goto cleanup;
1188
+
1189
+ /* keeping link and title from link_ref */
1190
+ link = lr->link;
1191
+ title = lr->title;
1192
+ i++;
1193
+ }
1194
+
1195
+ /* shortcut reference style link */
1196
+ else {
1197
+ struct buf id = { 0, 0, 0, 0 };
1198
+ struct link_ref *lr;
1199
+
1200
+ /* crafting the id */
1201
+ if (text_has_nl) {
1202
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1203
+ size_t j;
1204
+
1205
+ for (j = 1; j < txt_e; j++) {
1206
+ if (data[j] != '\n')
1207
+ bufputc(b, data[j]);
1208
+ else if (data[j - 1] != ' ')
1209
+ bufputc(b, ' ');
1210
+ }
1211
+
1212
+ id.data = b->data;
1213
+ id.size = b->size;
1214
+ } else {
1215
+ id.data = data + 1;
1216
+ id.size = txt_e - 1;
1217
+ }
1218
+
1219
+ /* finding the link_ref */
1220
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1221
+ if (!lr)
1222
+ goto cleanup;
1223
+
1224
+ /* keeping link and title from link_ref */
1225
+ link = lr->link;
1226
+ title = lr->title;
1227
+
1228
+ /* rewinding the whitespace */
1229
+ i = txt_e + 1;
1230
+ }
1231
+
1232
+ /* building content: img alt is escaped, link content is parsed */
1233
+ if (txt_e > 1) {
1234
+ content = rndr_newbuf(rndr, BUFFER_SPAN);
1235
+ if (is_img) {
1236
+ bufput(content, data + 1, txt_e - 1);
1237
+ } else {
1238
+ /* disable autolinking when parsing inline the
1239
+ * content of a link */
1240
+ rndr->in_link_body = 1;
1241
+ parse_inline(content, rndr, data + 1, txt_e - 1);
1242
+ rndr->in_link_body = 0;
1243
+ }
1244
+ }
1245
+
1246
+ if (link) {
1247
+ u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1248
+ unscape_text(u_link, link);
1249
+ }
1250
+
1251
+ /* calling the relevant rendering function */
1252
+ if (is_img) {
1253
+ if (ob->size && ob->data[ob->size - 1] == '!')
1254
+ ob->size -= 1;
1255
+
1256
+ ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1257
+ } else {
1258
+ ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1259
+ }
1260
+
1261
+ /* cleanup */
1262
+ cleanup:
1263
+ rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1264
+ return ret ? i : 0;
1265
+ }
1266
+
1267
+ static size_t
1268
+ char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1269
+ {
1270
+ size_t sup_start, sup_len;
1271
+ struct buf *sup;
1272
+
1273
+ if (!rndr->cb.superscript)
1274
+ return 0;
1275
+
1276
+ if (size < 2)
1277
+ return 0;
1278
+
1279
+ if (data[1] == '(') {
1280
+ sup_start = sup_len = 2;
1281
+
1282
+ while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1283
+ sup_len++;
1284
+
1285
+ if (sup_len == size)
1286
+ return 0;
1287
+ } else {
1288
+ sup_start = sup_len = 1;
1289
+
1290
+ while (sup_len < size && !_isspace(data[sup_len]))
1291
+ sup_len++;
1292
+ }
1293
+
1294
+ if (sup_len - sup_start == 0)
1295
+ return (sup_start == 2) ? 3 : 0;
1296
+
1297
+ sup = rndr_newbuf(rndr, BUFFER_SPAN);
1298
+ parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1299
+ rndr->cb.superscript(ob, sup, rndr->opaque);
1300
+ rndr_popbuf(rndr, BUFFER_SPAN);
1301
+
1302
+ return (sup_start == 2) ? sup_len + 1 : sup_len;
1303
+ }
1304
+
1305
+ /*********************************
1306
+ * BLOCK-LEVEL PARSING FUNCTIONS *
1307
+ *********************************/
1308
+
1309
+ /* is_empty • returns the line length when it is empty, 0 otherwise */
1310
+ static size_t
1311
+ is_empty(const uint8_t *data, size_t size)
1312
+ {
1313
+ size_t i;
1314
+
1315
+ for (i = 0; i < size && data[i] != '\n'; i++)
1316
+ if (data[i] != ' ')
1317
+ return 0;
1318
+
1319
+ return i + 1;
1320
+ }
1321
+
1322
+ /* is_hrule • returns whether a line is a horizontal rule */
1323
+ static int
1324
+ is_hrule(uint8_t *data, size_t size)
1325
+ {
1326
+ size_t i = 0, n = 0;
1327
+ uint8_t c;
1328
+
1329
+ /* skipping initial spaces */
1330
+ if (size < 3) return 0;
1331
+ if (data[0] == ' ') { i++;
1332
+ if (data[1] == ' ') { i++;
1333
+ if (data[2] == ' ') { i++; } } }
1334
+
1335
+ /* looking at the hrule uint8_t */
1336
+ if (i + 2 >= size
1337
+ || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1338
+ return 0;
1339
+ c = data[i];
1340
+
1341
+ /* the whole line must be the char or whitespace */
1342
+ while (i < size && data[i] != '\n') {
1343
+ if (data[i] == c) n++;
1344
+ else if (data[i] != ' ')
1345
+ return 0;
1346
+
1347
+ i++;
1348
+ }
1349
+
1350
+ return n >= 3;
1351
+ }
1352
+
1353
+ /* check if a line begins with a code fence; return the
1354
+ * width of the code fence */
1355
+ static size_t
1356
+ prefix_codefence(uint8_t *data, size_t size)
1357
+ {
1358
+ size_t i = 0, n = 0;
1359
+ uint8_t c;
1360
+
1361
+ /* skipping initial spaces */
1362
+ if (size < 3) return 0;
1363
+ if (data[0] == ' ') { i++;
1364
+ if (data[1] == ' ') { i++;
1365
+ if (data[2] == ' ') { i++; } } }
1366
+
1367
+ /* looking at the hrule uint8_t */
1368
+ if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1369
+ return 0;
1370
+
1371
+ c = data[i];
1372
+
1373
+ /* the whole line must be the uint8_t or whitespace */
1374
+ while (i < size && data[i] == c) {
1375
+ n++; i++;
1376
+ }
1377
+
1378
+ if (n < 3)
1379
+ return 0;
1380
+
1381
+ return i;
1382
+ }
1383
+
1384
+ /* check if a line is a code fence; return its size if it is */
1385
+ static size_t
1386
+ is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1387
+ {
1388
+ size_t i = 0, syn_len = 0;
1389
+ uint8_t *syn_start;
1390
+
1391
+ i = prefix_codefence(data, size);
1392
+ if (i == 0)
1393
+ return 0;
1394
+
1395
+ while (i < size && data[i] == ' ')
1396
+ i++;
1397
+
1398
+ syn_start = data + i;
1399
+
1400
+ if (i < size && data[i] == '{') {
1401
+ i++; syn_start++;
1402
+
1403
+ while (i < size && data[i] != '}' && data[i] != '\n') {
1404
+ syn_len++; i++;
1405
+ }
1406
+
1407
+ if (i == size || data[i] != '}')
1408
+ return 0;
1409
+
1410
+ /* strip all whitespace at the beginning and the end
1411
+ * of the {} block */
1412
+ while (syn_len > 0 && _isspace(syn_start[0])) {
1413
+ syn_start++; syn_len--;
1414
+ }
1415
+
1416
+ while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1417
+ syn_len--;
1418
+
1419
+ i++;
1420
+ } else {
1421
+ while (i < size && !_isspace(data[i])) {
1422
+ syn_len++; i++;
1423
+ }
1424
+ }
1425
+
1426
+ if (syntax) {
1427
+ syntax->data = syn_start;
1428
+ syntax->size = syn_len;
1429
+ }
1430
+
1431
+ while (i < size && data[i] != '\n') {
1432
+ if (!_isspace(data[i]))
1433
+ return 0;
1434
+
1435
+ i++;
1436
+ }
1437
+
1438
+ return i + 1;
1439
+ }
1440
+
1441
+ /* is_atxheader • returns whether the line is a hash-prefixed header */
1442
+ static int
1443
+ is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1444
+ {
1445
+ if (data[0] != '#')
1446
+ return 0;
1447
+
1448
+ if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1449
+ size_t level = 0;
1450
+
1451
+ while (level < size && level < 6 && data[level] == '#')
1452
+ level++;
1453
+
1454
+ if (level < size && data[level] != ' ')
1455
+ return 0;
1456
+ }
1457
+
1458
+ return 1;
1459
+ }
1460
+
1461
+ /* is_headerline • returns whether the line is a setext-style hdr underline */
1462
+ static int
1463
+ is_headerline(uint8_t *data, size_t size)
1464
+ {
1465
+ size_t i = 0;
1466
+
1467
+ /* test of level 1 header */
1468
+ if (data[i] == '=') {
1469
+ for (i = 1; i < size && data[i] == '='; i++);
1470
+ while (i < size && data[i] == ' ') i++;
1471
+ return (i >= size || data[i] == '\n') ? 1 : 0; }
1472
+
1473
+ /* test of level 2 header */
1474
+ if (data[i] == '-') {
1475
+ for (i = 1; i < size && data[i] == '-'; i++);
1476
+ while (i < size && data[i] == ' ') i++;
1477
+ return (i >= size || data[i] == '\n') ? 2 : 0; }
1478
+
1479
+ return 0;
1480
+ }
1481
+
1482
+ static int
1483
+ is_next_headerline(uint8_t *data, size_t size)
1484
+ {
1485
+ size_t i = 0;
1486
+
1487
+ while (i < size && data[i] != '\n')
1488
+ i++;
1489
+
1490
+ if (++i >= size)
1491
+ return 0;
1492
+
1493
+ return is_headerline(data + i, size - i);
1494
+ }
1495
+
1496
+ /* prefix_quote • returns blockquote prefix length */
1497
+ static size_t
1498
+ prefix_quote(uint8_t *data, size_t size)
1499
+ {
1500
+ size_t i = 0;
1501
+ if (i < size && data[i] == ' ') i++;
1502
+ if (i < size && data[i] == ' ') i++;
1503
+ if (i < size && data[i] == ' ') i++;
1504
+
1505
+ if (i < size && data[i] == '>') {
1506
+ if (i + 1 < size && data[i + 1] == ' ')
1507
+ return i + 2;
1508
+
1509
+ return i + 1;
1510
+ }
1511
+
1512
+ return 0;
1513
+ }
1514
+
1515
+ /* prefix_code • returns prefix length for block code*/
1516
+ static size_t
1517
+ prefix_code(uint8_t *data, size_t size)
1518
+ {
1519
+ if (size > 3 && data[0] == ' ' && data[1] == ' '
1520
+ && data[2] == ' ' && data[3] == ' ') return 4;
1521
+
1522
+ return 0;
1523
+ }
1524
+
1525
+ /* prefix_oli • returns ordered list item prefix */
1526
+ static size_t
1527
+ prefix_oli(uint8_t *data, size_t size)
1528
+ {
1529
+ size_t i = 0;
1530
+
1531
+ if (i < size && data[i] == ' ') i++;
1532
+ if (i < size && data[i] == ' ') i++;
1533
+ if (i < size && data[i] == ' ') i++;
1534
+
1535
+ if (i >= size || data[i] < '0' || data[i] > '9')
1536
+ return 0;
1537
+
1538
+ while (i < size && data[i] >= '0' && data[i] <= '9')
1539
+ i++;
1540
+
1541
+ if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1542
+ return 0;
1543
+
1544
+ if (is_next_headerline(data + i, size - i))
1545
+ return 0;
1546
+
1547
+ return i + 2;
1548
+ }
1549
+
1550
+ /* prefix_uli • returns unordered list item prefix */
1551
+ static size_t
1552
+ prefix_uli(uint8_t *data, size_t size)
1553
+ {
1554
+ size_t i = 0;
1555
+
1556
+ if (i < size && data[i] == ' ') i++;
1557
+ if (i < size && data[i] == ' ') i++;
1558
+ if (i < size && data[i] == ' ') i++;
1559
+
1560
+ if (i + 1 >= size ||
1561
+ (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1562
+ data[i + 1] != ' ')
1563
+ return 0;
1564
+
1565
+ if (is_next_headerline(data + i, size - i))
1566
+ return 0;
1567
+
1568
+ return i + 2;
1569
+ }
1570
+
1571
+
1572
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
1573
+ static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1574
+ uint8_t *data, size_t size);
1575
+
1576
+
1577
+ /* parse_blockquote • handles parsing of a blockquote fragment */
1578
+ static size_t
1579
+ parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1580
+ {
1581
+ size_t beg, end = 0, pre, work_size = 0;
1582
+ uint8_t *work_data = 0;
1583
+ struct buf *out = 0;
1584
+
1585
+ out = rndr_newbuf(rndr, BUFFER_BLOCK);
1586
+ beg = 0;
1587
+ while (beg < size) {
1588
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1589
+
1590
+ pre = prefix_quote(data + beg, end - beg);
1591
+
1592
+ if (pre)
1593
+ beg += pre; /* skipping prefix */
1594
+
1595
+ /* empty line followed by non-quote line */
1596
+ else if (is_empty(data + beg, end - beg) &&
1597
+ (end >= size || (prefix_quote(data + end, size - end) == 0 &&
1598
+ !is_empty(data + end, size - end))))
1599
+ break;
1600
+
1601
+ if (beg < end) { /* copy into the in-place working buffer */
1602
+ /* bufput(work, data + beg, end - beg); */
1603
+ if (!work_data)
1604
+ work_data = data + beg;
1605
+ else if (data + beg != work_data + work_size)
1606
+ memmove(work_data + work_size, data + beg, end - beg);
1607
+ work_size += end - beg;
1608
+ }
1609
+ beg = end;
1610
+ }
1611
+
1612
+ parse_block(out, rndr, work_data, work_size);
1613
+ if (rndr->cb.blockquote)
1614
+ rndr->cb.blockquote(ob, out, rndr->opaque);
1615
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1616
+ return end;
1617
+ }
1618
+
1619
+ static size_t
1620
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1621
+
1622
+ /* parse_paragraph • handles parsing of a regular paragraph */
1623
+ static size_t
1624
+ parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1625
+ {
1626
+ size_t i = 0, end = 0;
1627
+ int level = 0, last_is_empty = 1;
1628
+ struct buf work = { data, 0, 0, 0 };
1629
+
1630
+ while (i < size) {
1631
+ for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1632
+
1633
+ if (is_empty(data + i, size - i))
1634
+ break;
1635
+
1636
+ if (!last_is_empty && (level = is_headerline(data + i, size - i)) != 0)
1637
+ break;
1638
+
1639
+ last_is_empty = 0;
1640
+
1641
+ if (is_atxheader(rndr, data + i, size - i) ||
1642
+ is_hrule(data + i, size - i) ||
1643
+ prefix_quote(data + i, size - i)) {
1644
+ end = i;
1645
+ break;
1646
+ }
1647
+
1648
+ /*
1649
+ * Early termination of a paragraph with the same logic
1650
+ * as Markdown 1.0.0. If this logic is applied, the
1651
+ * Markdown 1.0.3 test suite won't pass cleanly
1652
+ *
1653
+ * :: If the first character in a new line is not a letter,
1654
+ * let's check to see if there's some kind of block starting
1655
+ * here
1656
+ */
1657
+ if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalpha(data[i])) {
1658
+ if (prefix_oli(data + i, size - i) ||
1659
+ prefix_uli(data + i, size - i)) {
1660
+ end = i;
1661
+ break;
1662
+ }
1663
+
1664
+ /* see if an html block starts here */
1665
+ if (data[i] == '<' && rndr->cb.blockhtml &&
1666
+ parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1667
+ end = i;
1668
+ break;
1669
+ }
1670
+
1671
+ /* see if a code fence starts here */
1672
+ if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1673
+ is_codefence(data + i, size - i, NULL) != 0) {
1674
+ end = i;
1675
+ break;
1676
+ }
1677
+ }
1678
+
1679
+ i = end;
1680
+ }
1681
+
1682
+ work.size = i;
1683
+ while (work.size && data[work.size - 1] == '\n')
1684
+ work.size--;
1685
+
1686
+ if (!level) {
1687
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1688
+ parse_inline(tmp, rndr, work.data, work.size);
1689
+ if (rndr->cb.paragraph)
1690
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1691
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1692
+ } else {
1693
+ struct buf *header_work;
1694
+
1695
+ if (work.size) {
1696
+ size_t beg;
1697
+ i = work.size;
1698
+ work.size -= 1;
1699
+
1700
+ while (work.size && data[work.size] != '\n')
1701
+ work.size -= 1;
1702
+
1703
+ beg = work.size + 1;
1704
+ while (work.size && data[work.size - 1] == '\n')
1705
+ work.size -= 1;
1706
+
1707
+ if (work.size > 0) {
1708
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1709
+ parse_inline(tmp, rndr, work.data, work.size);
1710
+
1711
+ if (rndr->cb.paragraph)
1712
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1713
+
1714
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1715
+ work.data += beg;
1716
+ work.size = i - beg;
1717
+ }
1718
+ else work.size = i;
1719
+ }
1720
+
1721
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1722
+ parse_inline(header_work, rndr, work.data, work.size);
1723
+
1724
+ if (rndr->cb.header)
1725
+ rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1726
+
1727
+ rndr_popbuf(rndr, BUFFER_SPAN);
1728
+ }
1729
+
1730
+ return end;
1731
+ }
1732
+
1733
+ /* parse_fencedcode • handles parsing of a block-level code fragment */
1734
+ static size_t
1735
+ parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1736
+ {
1737
+ size_t beg, end;
1738
+ struct buf *work = 0;
1739
+ struct buf lang = { 0, 0, 0, 0 };
1740
+
1741
+ beg = is_codefence(data, size, &lang);
1742
+ if (beg == 0) return 0;
1743
+
1744
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1745
+
1746
+ while (beg < size) {
1747
+ size_t fence_end;
1748
+ struct buf fence_trail = { 0, 0, 0, 0 };
1749
+
1750
+ fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1751
+ if (fence_end != 0 && fence_trail.size == 0) {
1752
+ beg += fence_end;
1753
+ break;
1754
+ }
1755
+
1756
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1757
+
1758
+ if (beg < end) {
1759
+ /* verbatim copy to the working buffer,
1760
+ escaping entities */
1761
+ if (is_empty(data + beg, end - beg))
1762
+ bufputc(work, '\n');
1763
+ else bufput(work, data + beg, end - beg);
1764
+ }
1765
+ beg = end;
1766
+ }
1767
+
1768
+ if (work->size && work->data[work->size - 1] != '\n')
1769
+ bufputc(work, '\n');
1770
+
1771
+ if (rndr->cb.blockcode)
1772
+ rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1773
+
1774
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1775
+ return beg;
1776
+ }
1777
+
1778
+ static size_t
1779
+ parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1780
+ {
1781
+ size_t beg, end, pre;
1782
+ struct buf *work = 0;
1783
+
1784
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1785
+
1786
+ beg = 0;
1787
+ while (beg < size) {
1788
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1789
+ pre = prefix_code(data + beg, end - beg);
1790
+
1791
+ if (pre)
1792
+ beg += pre; /* skipping prefix */
1793
+ else if (!is_empty(data + beg, end - beg))
1794
+ /* non-empty non-prefixed line breaks the pre */
1795
+ break;
1796
+
1797
+ if (beg < end) {
1798
+ /* verbatim copy to the working buffer,
1799
+ escaping entities */
1800
+ if (is_empty(data + beg, end - beg))
1801
+ bufputc(work, '\n');
1802
+ else bufput(work, data + beg, end - beg);
1803
+ }
1804
+ beg = end;
1805
+ }
1806
+
1807
+ while (work->size && work->data[work->size - 1] == '\n')
1808
+ work->size -= 1;
1809
+
1810
+ bufputc(work, '\n');
1811
+
1812
+ if (rndr->cb.blockcode)
1813
+ rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1814
+
1815
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1816
+ return beg;
1817
+ }
1818
+
1819
+ /* parse_listitem • parsing of a single list item */
1820
+ /* assuming initial prefix is already removed */
1821
+ static size_t
1822
+ parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1823
+ {
1824
+ struct buf *work = 0, *inter = 0;
1825
+ size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1826
+ int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1827
+
1828
+ /* keeping track of the first indentation prefix */
1829
+ while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1830
+ orgpre++;
1831
+
1832
+ beg = prefix_uli(data, size);
1833
+ if (!beg)
1834
+ beg = prefix_oli(data, size);
1835
+
1836
+ if (!beg)
1837
+ return 0;
1838
+
1839
+ /* skipping to the beginning of the following line */
1840
+ end = beg;
1841
+ while (end < size && data[end - 1] != '\n')
1842
+ end++;
1843
+
1844
+ /* getting working buffers */
1845
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
1846
+ inter = rndr_newbuf(rndr, BUFFER_SPAN);
1847
+
1848
+ /* putting the first line into the working buffer */
1849
+ bufput(work, data + beg, end - beg);
1850
+ beg = end;
1851
+
1852
+ /* process the following lines */
1853
+ while (beg < size) {
1854
+ size_t has_next_uli = 0, has_next_oli = 0;
1855
+
1856
+ end++;
1857
+
1858
+ while (end < size && data[end - 1] != '\n')
1859
+ end++;
1860
+
1861
+ /* process an empty line */
1862
+ if (is_empty(data + beg, end - beg)) {
1863
+ in_empty = 1;
1864
+ beg = end;
1865
+ continue;
1866
+ }
1867
+
1868
+ /* calculating the indentation */
1869
+ i = 0;
1870
+ while (i < 4 && beg + i < end && data[beg + i] == ' ')
1871
+ i++;
1872
+
1873
+ pre = i;
1874
+
1875
+ if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1876
+ if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1877
+ in_fence = !in_fence;
1878
+ }
1879
+
1880
+ /* Only check for new list items if we are **not** inside
1881
+ * a fenced code block */
1882
+ if (!in_fence) {
1883
+ has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1884
+ has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1885
+ }
1886
+
1887
+ /* checking for ul/ol switch */
1888
+ if (in_empty && (
1889
+ ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1890
+ (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1891
+ *flags |= MKD_LI_END;
1892
+ break; /* the following item must have same list type */
1893
+ }
1894
+
1895
+ /* checking for a new item */
1896
+ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1897
+ if (in_empty)
1898
+ has_inside_empty = 1;
1899
+
1900
+ if (pre == orgpre) /* the following item must have */
1901
+ break; /* the same indentation */
1902
+
1903
+ if (!sublist)
1904
+ sublist = work->size;
1905
+ }
1906
+ /* joining only indented stuff after empty lines */
1907
+ else if (in_empty && i < 4 && data[beg] != '\t') {
1908
+ *flags |= MKD_LI_END;
1909
+ break;
1910
+ }
1911
+ else if (in_empty) {
1912
+ bufputc(work, '\n');
1913
+ has_inside_empty = 1;
1914
+ }
1915
+
1916
+ in_empty = 0;
1917
+
1918
+ /* adding the line without prefix into the working buffer */
1919
+ bufput(work, data + beg + i, end - beg - i);
1920
+ beg = end;
1921
+ }
1922
+
1923
+ /* render of li contents */
1924
+ if (has_inside_empty)
1925
+ *flags |= MKD_LI_BLOCK;
1926
+
1927
+ if (*flags & MKD_LI_BLOCK) {
1928
+ /* intermediate render of block li */
1929
+ if (sublist && sublist < work->size) {
1930
+ parse_block(inter, rndr, work->data, sublist);
1931
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1932
+ }
1933
+ else
1934
+ parse_block(inter, rndr, work->data, work->size);
1935
+ } else {
1936
+ /* intermediate render of inline li */
1937
+ if (sublist && sublist < work->size) {
1938
+ parse_inline(inter, rndr, work->data, sublist);
1939
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1940
+ }
1941
+ else
1942
+ parse_inline(inter, rndr, work->data, work->size);
1943
+ }
1944
+
1945
+ /* render of li itself */
1946
+ if (rndr->cb.listitem)
1947
+ rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1948
+
1949
+ rndr_popbuf(rndr, BUFFER_SPAN);
1950
+ rndr_popbuf(rndr, BUFFER_SPAN);
1951
+ return beg;
1952
+ }
1953
+
1954
+
1955
+ /* parse_list • parsing ordered or unordered list block */
1956
+ static size_t
1957
+ parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1958
+ {
1959
+ struct buf *work = 0;
1960
+ size_t i = 0, j;
1961
+
1962
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1963
+
1964
+ while (i < size) {
1965
+ j = parse_listitem(work, rndr, data + i, size - i, &flags);
1966
+ i += j;
1967
+
1968
+ if (!j || (flags & MKD_LI_END))
1969
+ break;
1970
+ }
1971
+
1972
+ if (rndr->cb.list)
1973
+ rndr->cb.list(ob, work, flags, rndr->opaque);
1974
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1975
+ return i;
1976
+ }
1977
+
1978
+ /* parse_atxheader • parsing of atx-style headers */
1979
+ static size_t
1980
+ parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1981
+ {
1982
+ size_t level = 0;
1983
+ size_t i, end, skip;
1984
+
1985
+ while (level < size && level < 6 && data[level] == '#')
1986
+ level++;
1987
+
1988
+ for (i = level; i < size && data[i] == ' '; i++);
1989
+
1990
+ for (end = i; end < size && data[end] != '\n'; end++);
1991
+ skip = end;
1992
+
1993
+ while (end && data[end - 1] == '#')
1994
+ end--;
1995
+
1996
+ while (end && data[end - 1] == ' ')
1997
+ end--;
1998
+
1999
+ if (end > i) {
2000
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
2001
+
2002
+ parse_inline(work, rndr, data + i, end - i);
2003
+
2004
+ if (rndr->cb.header)
2005
+ rndr->cb.header(ob, work, (int)level, rndr->opaque);
2006
+
2007
+ rndr_popbuf(rndr, BUFFER_SPAN);
2008
+ }
2009
+
2010
+ return skip;
2011
+ }
2012
+
2013
+ /* parse_footnote_def • parse a single footnote definition */
2014
+ static void
2015
+ parse_footnote_def(struct buf *ob, struct sd_markdown *rndr, unsigned int num, uint8_t *data, size_t size)
2016
+ {
2017
+ struct buf *work = 0;
2018
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
2019
+
2020
+ parse_block(work, rndr, data, size);
2021
+
2022
+ if (rndr->cb.footnote_def)
2023
+ rndr->cb.footnote_def(ob, work, num, rndr->opaque);
2024
+ rndr_popbuf(rndr, BUFFER_SPAN);
2025
+ }
2026
+
2027
+ /* parse_footnote_list • render the contents of the footnotes */
2028
+ static void
2029
+ parse_footnote_list(struct buf *ob, struct sd_markdown *rndr, struct footnote_list *footnotes)
2030
+ {
2031
+ struct buf *work = 0;
2032
+ struct footnote_item *item;
2033
+ struct footnote_ref *ref;
2034
+
2035
+ if (footnotes->count == 0)
2036
+ return;
2037
+
2038
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
2039
+
2040
+ item = footnotes->head;
2041
+ while (item) {
2042
+ ref = item->ref;
2043
+ parse_footnote_def(work, rndr, ref->num, ref->contents->data, ref->contents->size);
2044
+ item = item->next;
2045
+ }
2046
+
2047
+ if (rndr->cb.footnotes)
2048
+ rndr->cb.footnotes(ob, work, rndr->opaque);
2049
+ rndr_popbuf(rndr, BUFFER_BLOCK);
2050
+ }
2051
+
2052
+ /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
2053
+ /* returns the length on match, 0 otherwise */
2054
+ static size_t
2055
+ htmlblock_end_tag(
2056
+ const char *tag,
2057
+ size_t tag_len,
2058
+ struct sd_markdown *rndr,
2059
+ uint8_t *data,
2060
+ size_t size)
2061
+ {
2062
+ size_t i, w;
2063
+
2064
+ /* checking if tag is a match */
2065
+ if (tag_len + 3 >= size ||
2066
+ strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2067
+ data[tag_len + 2] != '>')
2068
+ return 0;
2069
+
2070
+ /* checking white lines */
2071
+ i = tag_len + 3;
2072
+ w = 0;
2073
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
2074
+ return 0; /* non-blank after tag */
2075
+ i += w;
2076
+ w = 0;
2077
+
2078
+ if (i < size)
2079
+ w = is_empty(data + i, size - i);
2080
+
2081
+ return i + w;
2082
+ }
2083
+
2084
+ static size_t
2085
+ htmlblock_end(const char *curtag,
2086
+ struct sd_markdown *rndr,
2087
+ uint8_t *data,
2088
+ size_t size,
2089
+ int start_of_line)
2090
+ {
2091
+ size_t tag_size = strlen(curtag);
2092
+ size_t i = 1, end_tag;
2093
+ int block_lines = 0;
2094
+
2095
+ while (i < size) {
2096
+ i++;
2097
+ while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
2098
+ if (data[i] == '\n')
2099
+ block_lines++;
2100
+
2101
+ i++;
2102
+ }
2103
+
2104
+ /* If we are only looking for unindented tags, skip the tag
2105
+ * if it doesn't follow a newline.
2106
+ *
2107
+ * The only exception to this is if the tag is still on the
2108
+ * initial line; in that case it still counts as a closing
2109
+ * tag
2110
+ */
2111
+ if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
2112
+ continue;
2113
+
2114
+ if (i + 2 + tag_size >= size)
2115
+ break;
2116
+
2117
+ end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
2118
+ if (end_tag)
2119
+ return i + end_tag - 1;
2120
+ }
2121
+
2122
+ return 0;
2123
+ }
2124
+
2125
+
2126
+ /* parse_htmlblock • parsing of inline HTML block */
2127
+ static size_t
2128
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
2129
+ {
2130
+ size_t i, j = 0, tag_end;
2131
+ const char *curtag = NULL;
2132
+ struct buf work = { data, 0, 0, 0 };
2133
+
2134
+ /* identification of the opening tag */
2135
+ if (size < 2 || data[0] != '<')
2136
+ return 0;
2137
+
2138
+ i = 1;
2139
+ while (i < size && data[i] != '>' && data[i] != ' ')
2140
+ i++;
2141
+
2142
+ if (i < size)
2143
+ curtag = find_block_tag((char *)data + 1, (int)i - 1);
2144
+
2145
+ /* handling of special cases */
2146
+ if (!curtag) {
2147
+
2148
+ /* HTML comment, laxist form */
2149
+ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2150
+ i = 5;
2151
+
2152
+ while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2153
+ i++;
2154
+
2155
+ i++;
2156
+
2157
+ if (i < size)
2158
+ j = is_empty(data + i, size - i);
2159
+
2160
+ if (j) {
2161
+ work.size = i + j;
2162
+ if (do_render && rndr->cb.blockhtml)
2163
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2164
+ return work.size;
2165
+ }
2166
+ }
2167
+
2168
+ /* HR, which is the only self-closing block tag considered */
2169
+ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2170
+ i = 3;
2171
+ while (i < size && data[i] != '>')
2172
+ i++;
2173
+
2174
+ if (i + 1 < size) {
2175
+ i++;
2176
+ j = is_empty(data + i, size - i);
2177
+ if (j) {
2178
+ work.size = i + j;
2179
+ if (do_render && rndr->cb.blockhtml)
2180
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2181
+ return work.size;
2182
+ }
2183
+ }
2184
+ }
2185
+
2186
+ /* no special case recognised */
2187
+ return 0;
2188
+ }
2189
+
2190
+ /* looking for an unindented matching closing tag */
2191
+ /* followed by a blank line */
2192
+ tag_end = htmlblock_end(curtag, rndr, data, size, 1);
2193
+
2194
+ /* if not found, trying a second pass looking for indented match */
2195
+ /* but not if tag is "ins" or "del" (following original Markdown.pl) */
2196
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
2197
+ tag_end = htmlblock_end(curtag, rndr, data, size, 0);
2198
+ }
2199
+
2200
+ if (!tag_end)
2201
+ return 0;
2202
+
2203
+ /* the end of the block has been found */
2204
+ work.size = tag_end;
2205
+ if (do_render && rndr->cb.blockhtml)
2206
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2207
+
2208
+ return tag_end;
2209
+ }
2210
+
2211
+ static void
2212
+ parse_table_row(
2213
+ struct buf *ob,
2214
+ struct sd_markdown *rndr,
2215
+ uint8_t *data,
2216
+ size_t size,
2217
+ size_t columns,
2218
+ int *col_data,
2219
+ int header_flag)
2220
+ {
2221
+ size_t i = 0, col;
2222
+ struct buf *row_work = 0;
2223
+
2224
+ if (!rndr->cb.table_cell || !rndr->cb.table_row)
2225
+ return;
2226
+
2227
+ row_work = rndr_newbuf(rndr, BUFFER_SPAN);
2228
+
2229
+ if (i < size && data[i] == '|')
2230
+ i++;
2231
+
2232
+ for (col = 0; col < columns && i < size; ++col) {
2233
+ size_t cell_start, cell_end;
2234
+ struct buf *cell_work;
2235
+
2236
+ cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2237
+
2238
+ while (i < size && _isspace(data[i]))
2239
+ i++;
2240
+
2241
+ cell_start = i;
2242
+
2243
+ while (i < size && data[i] != '|')
2244
+ i++;
2245
+
2246
+ cell_end = i - 1;
2247
+
2248
+ while (cell_end > cell_start && _isspace(data[cell_end]))
2249
+ cell_end--;
2250
+
2251
+ parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2252
+ rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2253
+
2254
+ rndr_popbuf(rndr, BUFFER_SPAN);
2255
+ i++;
2256
+ }
2257
+
2258
+ for (; col < columns; ++col) {
2259
+ struct buf empty_cell = { 0, 0, 0, 0 };
2260
+ rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2261
+ }
2262
+
2263
+ rndr->cb.table_row(ob, row_work, rndr->opaque);
2264
+
2265
+ rndr_popbuf(rndr, BUFFER_SPAN);
2266
+ }
2267
+
2268
+ static size_t
2269
+ parse_table_header(
2270
+ struct buf *ob,
2271
+ struct sd_markdown *rndr,
2272
+ uint8_t *data,
2273
+ size_t size,
2274
+ size_t *columns,
2275
+ int **column_data)
2276
+ {
2277
+ int pipes;
2278
+ size_t i = 0, col, header_end, under_end;
2279
+
2280
+ pipes = 0;
2281
+ while (i < size && data[i] != '\n')
2282
+ if (data[i++] == '|')
2283
+ pipes++;
2284
+
2285
+ if (i == size || pipes == 0)
2286
+ return 0;
2287
+
2288
+ header_end = i;
2289
+
2290
+ while (header_end > 0 && _isspace(data[header_end - 1]))
2291
+ header_end--;
2292
+
2293
+ if (data[0] == '|')
2294
+ pipes--;
2295
+
2296
+ if (header_end && data[header_end - 1] == '|')
2297
+ pipes--;
2298
+
2299
+ *columns = pipes + 1;
2300
+ *column_data = calloc(*columns, sizeof(int));
2301
+
2302
+ /* Parse the header underline */
2303
+ i++;
2304
+ if (i < size && data[i] == '|')
2305
+ i++;
2306
+
2307
+ under_end = i;
2308
+ while (under_end < size && data[under_end] != '\n')
2309
+ under_end++;
2310
+
2311
+ for (col = 0; col < *columns && i < under_end; ++col) {
2312
+ size_t dashes = 0;
2313
+
2314
+ while (i < under_end && data[i] == ' ')
2315
+ i++;
2316
+
2317
+ if (data[i] == ':') {
2318
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2319
+ dashes++;
2320
+ }
2321
+
2322
+ while (i < under_end && data[i] == '-') {
2323
+ i++; dashes++;
2324
+ }
2325
+
2326
+ if (i < under_end && data[i] == ':') {
2327
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2328
+ dashes++;
2329
+ }
2330
+
2331
+ while (i < under_end && data[i] == ' ')
2332
+ i++;
2333
+
2334
+ if (i < under_end && data[i] != '|' && data[i] != '+')
2335
+ break;
2336
+
2337
+ if (dashes < 3)
2338
+ break;
2339
+
2340
+ i++;
2341
+ }
2342
+
2343
+ if (col < *columns)
2344
+ return 0;
2345
+
2346
+ parse_table_row(
2347
+ ob, rndr, data,
2348
+ header_end,
2349
+ *columns,
2350
+ *column_data,
2351
+ MKD_TABLE_HEADER
2352
+ );
2353
+
2354
+ return under_end + 1;
2355
+ }
2356
+
2357
+ static size_t
2358
+ parse_table(
2359
+ struct buf *ob,
2360
+ struct sd_markdown *rndr,
2361
+ uint8_t *data,
2362
+ size_t size)
2363
+ {
2364
+ size_t i;
2365
+
2366
+ struct buf *header_work = 0;
2367
+ struct buf *body_work = 0;
2368
+
2369
+ size_t columns;
2370
+ int *col_data = NULL;
2371
+
2372
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2373
+ body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2374
+
2375
+ i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2376
+ if (i > 0) {
2377
+
2378
+ while (i < size) {
2379
+ size_t row_start;
2380
+ int pipes = 0;
2381
+
2382
+ row_start = i;
2383
+
2384
+ while (i < size && data[i] != '\n')
2385
+ if (data[i++] == '|')
2386
+ pipes++;
2387
+
2388
+ if (pipes == 0 || i == size) {
2389
+ i = row_start;
2390
+ break;
2391
+ }
2392
+
2393
+ parse_table_row(
2394
+ body_work,
2395
+ rndr,
2396
+ data + row_start,
2397
+ i - row_start,
2398
+ columns,
2399
+ col_data, 0
2400
+ );
2401
+
2402
+ i++;
2403
+ }
2404
+
2405
+ if (rndr->cb.table)
2406
+ rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2407
+ }
2408
+
2409
+ free(col_data);
2410
+ rndr_popbuf(rndr, BUFFER_SPAN);
2411
+ rndr_popbuf(rndr, BUFFER_BLOCK);
2412
+ return i;
2413
+ }
2414
+
2415
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
2416
+ static void
2417
+ parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2418
+ {
2419
+ size_t beg, end, i;
2420
+ uint8_t *txt_data;
2421
+ beg = 0;
2422
+
2423
+ if (rndr->work_bufs[BUFFER_SPAN].size +
2424
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2425
+ return;
2426
+
2427
+ while (beg < size) {
2428
+ txt_data = data + beg;
2429
+ end = size - beg;
2430
+
2431
+ if (is_atxheader(rndr, txt_data, end))
2432
+ beg += parse_atxheader(ob, rndr, txt_data, end);
2433
+
2434
+ else if (data[beg] == '<' && rndr->cb.blockhtml &&
2435
+ (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2436
+ beg += i;
2437
+
2438
+ else if ((i = is_empty(txt_data, end)) != 0)
2439
+ beg += i;
2440
+
2441
+ else if (is_hrule(txt_data, end)) {
2442
+ if (rndr->cb.hrule)
2443
+ rndr->cb.hrule(ob, rndr->opaque);
2444
+
2445
+ while (beg < size && data[beg] != '\n')
2446
+ beg++;
2447
+
2448
+ beg++;
2449
+ }
2450
+
2451
+ else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2452
+ (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2453
+ beg += i;
2454
+
2455
+ else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2456
+ (i = parse_table(ob, rndr, txt_data, end)) != 0)
2457
+ beg += i;
2458
+
2459
+ else if (prefix_quote(txt_data, end))
2460
+ beg += parse_blockquote(ob, rndr, txt_data, end);
2461
+
2462
+ else if (!(rndr->ext_flags & MKDEXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
2463
+ beg += parse_blockcode(ob, rndr, txt_data, end);
2464
+
2465
+ else if (prefix_uli(txt_data, end))
2466
+ beg += parse_list(ob, rndr, txt_data, end, 0);
2467
+
2468
+ else if (prefix_oli(txt_data, end))
2469
+ beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2470
+
2471
+ else
2472
+ beg += parse_paragraph(ob, rndr, txt_data, end);
2473
+ }
2474
+ }
2475
+
2476
+
2477
+
2478
+ /*********************
2479
+ * REFERENCE PARSING *
2480
+ *********************/
2481
+
2482
+ /* is_footnote • returns whether a line is a footnote definition or not */
2483
+ static int
2484
+ is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list)
2485
+ {
2486
+ size_t i = 0;
2487
+ struct buf *contents = 0;
2488
+ size_t ind = 0;
2489
+ int in_empty = 0;
2490
+ size_t start = 0;
2491
+
2492
+ size_t id_offset, id_end;
2493
+
2494
+ /* up to 3 optional leading spaces */
2495
+ if (beg + 3 >= end) return 0;
2496
+ if (data[beg] == ' ') { i = 1;
2497
+ if (data[beg + 1] == ' ') { i = 2;
2498
+ if (data[beg + 2] == ' ') { i = 3;
2499
+ if (data[beg + 3] == ' ') return 0; } } }
2500
+ i += beg;
2501
+
2502
+ /* id part: caret followed by anything between brackets */
2503
+ if (data[i] != '[') return 0;
2504
+ i++;
2505
+ if (i >= end || data[i] != '^') return 0;
2506
+ i++;
2507
+ id_offset = i;
2508
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2509
+ i++;
2510
+ if (i >= end || data[i] != ']') return 0;
2511
+ id_end = i;
2512
+
2513
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
2514
+ i++;
2515
+ if (i >= end || data[i] != ':') return 0;
2516
+ i++;
2517
+
2518
+ /* getting content buffer */
2519
+ contents = bufnew(64);
2520
+
2521
+ start = i;
2522
+
2523
+ /* process lines similiar to a list item */
2524
+ while (i < end) {
2525
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2526
+
2527
+ /* process an empty line */
2528
+ if (is_empty(data + start, i - start)) {
2529
+ in_empty = 1;
2530
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2531
+ i++;
2532
+ if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
2533
+ }
2534
+ start = i;
2535
+ continue;
2536
+ }
2537
+
2538
+ /* calculating the indentation */
2539
+ ind = 0;
2540
+ while (ind < 4 && start + ind < end && data[start + ind] == ' ')
2541
+ ind++;
2542
+
2543
+ /* joining only indented stuff after empty lines;
2544
+ * note that now we only require 1 space of indentation
2545
+ * to continue, just like lists */
2546
+ if (ind == 0) {
2547
+ if (start == id_end + 2 && data[start] == '\t') {}
2548
+ else break;
2549
+ }
2550
+ else if (in_empty) {
2551
+ bufputc(contents, '\n');
2552
+ }
2553
+
2554
+ in_empty = 0;
2555
+
2556
+ /* adding the line into the content buffer */
2557
+ bufput(contents, data + start + ind, i - start - ind);
2558
+ /* add carriage return */
2559
+ if (i < end) {
2560
+ bufput(contents, "\n", 1);
2561
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2562
+ i++;
2563
+ if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
2564
+ }
2565
+ }
2566
+ start = i;
2567
+ }
2568
+
2569
+ if (last)
2570
+ *last = start;
2571
+
2572
+ if (list) {
2573
+ struct footnote_ref *ref;
2574
+ ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
2575
+ if (!ref)
2576
+ return 0;
2577
+ if (!add_footnote_ref(list, ref)) {
2578
+ free_footnote_ref(ref);
2579
+ return 0;
2580
+ }
2581
+ ref->contents = contents;
2582
+ }
2583
+
2584
+ return 1;
2585
+ }
2586
+
2587
+ /* is_ref • returns whether a line is a reference or not */
2588
+ static int
2589
+ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2590
+ {
2591
+ /* int n; */
2592
+ size_t i = 0;
2593
+ size_t id_offset, id_end;
2594
+ size_t link_offset, link_end;
2595
+ size_t title_offset, title_end;
2596
+ size_t line_end;
2597
+
2598
+ /* up to 3 optional leading spaces */
2599
+ if (beg + 3 >= end) return 0;
2600
+ if (data[beg] == ' ') { i = 1;
2601
+ if (data[beg + 1] == ' ') { i = 2;
2602
+ if (data[beg + 2] == ' ') { i = 3;
2603
+ if (data[beg + 3] == ' ') return 0; } } }
2604
+ i += beg;
2605
+
2606
+ /* id part: anything but a newline between brackets */
2607
+ if (data[i] != '[') return 0;
2608
+ i++;
2609
+ id_offset = i;
2610
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2611
+ i++;
2612
+ if (i >= end || data[i] != ']') return 0;
2613
+ id_end = i;
2614
+
2615
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
2616
+ i++;
2617
+ if (i >= end || data[i] != ':') return 0;
2618
+ i++;
2619
+ while (i < end && strchr("\t ", data[i])) i++;
2620
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2621
+ i++;
2622
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2623
+ while (i < end && strchr("\t ", data[i])) i++;
2624
+ if (i >= end) return 0;
2625
+
2626
+ /* link: whitespace-free sequence, optionally between angle brackets */
2627
+ if (data[i] == '<')
2628
+ i++;
2629
+
2630
+ link_offset = i;
2631
+
2632
+ while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2633
+ i++;
2634
+
2635
+ if (data[i - 1] == '>') link_end = i - 1;
2636
+ else link_end = i;
2637
+
2638
+ /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2639
+ while (i < end && strchr("\t ", data[i])) i++;
2640
+ if (i < end && data[i] != '\n' && data[i] != '\r'
2641
+ && data[i] != '\'' && data[i] != '"' && data[i] != '(')
2642
+ return 0;
2643
+ line_end = 0;
2644
+ /* computing end-of-line */
2645
+ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2646
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2647
+ line_end = i + 1;
2648
+
2649
+ /* optional (space|tab)* spacer after a newline */
2650
+ if (line_end) {
2651
+ i = line_end + 1;
2652
+ while (i < end && strchr("\t ", data[i])) i++; }
2653
+
2654
+ /* optional title: any non-newline sequence enclosed in '"()
2655
+ alone on its line */
2656
+ title_offset = title_end = 0;
2657
+ if (i + 1 < end
2658
+ && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2659
+ i++;
2660
+ title_offset = i;
2661
+ /* looking for EOL */
2662
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2663
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2664
+ title_end = i + 1;
2665
+ else title_end = i;
2666
+ /* stepping back */
2667
+ i -= 1;
2668
+ while (i > title_offset && data[i] == ' ')
2669
+ i -= 1;
2670
+ if (i > title_offset
2671
+ && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2672
+ line_end = title_end;
2673
+ title_end = i; } }
2674
+
2675
+ if (!line_end || link_end == link_offset)
2676
+ return 0; /* garbage after the link empty link */
2677
+
2678
+ /* a valid ref has been found, filling-in return structures */
2679
+ if (last)
2680
+ *last = line_end;
2681
+
2682
+ if (refs) {
2683
+ struct link_ref *ref;
2684
+
2685
+ ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2686
+ if (!ref)
2687
+ return 0;
2688
+
2689
+ ref->link = bufnew(link_end - link_offset);
2690
+ bufput(ref->link, data + link_offset, link_end - link_offset);
2691
+
2692
+ if (title_end > title_offset) {
2693
+ ref->title = bufnew(title_end - title_offset);
2694
+ bufput(ref->title, data + title_offset, title_end - title_offset);
2695
+ }
2696
+ }
2697
+
2698
+ return 1;
2699
+ }
2700
+
2701
+ static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2702
+ {
2703
+ size_t i = 0, tab = 0;
2704
+
2705
+ while (i < size) {
2706
+ size_t org = i;
2707
+
2708
+ while (i < size && line[i] != '\t') {
2709
+ i++; tab++;
2710
+ }
2711
+
2712
+ if (i > org)
2713
+ bufput(ob, line + org, i - org);
2714
+
2715
+ if (i >= size)
2716
+ break;
2717
+
2718
+ do {
2719
+ bufputc(ob, ' '); tab++;
2720
+ } while (tab % 4);
2721
+
2722
+ i++;
2723
+ }
2724
+ }
2725
+
2726
+ /**********************
2727
+ * EXPORTED FUNCTIONS *
2728
+ **********************/
2729
+
2730
+ struct sd_markdown *
2731
+ sd_markdown_new(
2732
+ unsigned int extensions,
2733
+ size_t max_nesting,
2734
+ const struct sd_callbacks *callbacks,
2735
+ void *opaque)
2736
+ {
2737
+ struct sd_markdown *md = NULL;
2738
+
2739
+ assert(max_nesting > 0 && callbacks);
2740
+
2741
+ md = malloc(sizeof(struct sd_markdown));
2742
+ if (!md)
2743
+ return NULL;
2744
+
2745
+ memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2746
+
2747
+ greenmat_stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2748
+ greenmat_stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2749
+
2750
+ memset(md->active_char, 0x0, 256);
2751
+
2752
+ if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2753
+ md->active_char['*'] = MD_CHAR_EMPHASIS;
2754
+ md->active_char['_'] = MD_CHAR_EMPHASIS;
2755
+ if (extensions & MKDEXT_STRIKETHROUGH)
2756
+ md->active_char['~'] = MD_CHAR_EMPHASIS;
2757
+ if (extensions & MKDEXT_HIGHLIGHT)
2758
+ md->active_char['='] = MD_CHAR_EMPHASIS;
2759
+ }
2760
+
2761
+ if (md->cb.codespan)
2762
+ md->active_char['`'] = MD_CHAR_CODESPAN;
2763
+
2764
+ if (md->cb.linebreak)
2765
+ md->active_char['\n'] = MD_CHAR_LINEBREAK;
2766
+
2767
+ if (md->cb.image || md->cb.link)
2768
+ md->active_char['['] = MD_CHAR_LINK;
2769
+
2770
+ md->active_char['<'] = MD_CHAR_LANGLE;
2771
+ md->active_char['\\'] = MD_CHAR_ESCAPE;
2772
+ md->active_char['&'] = MD_CHAR_ENTITITY;
2773
+
2774
+ if (extensions & MKDEXT_AUTOLINK) {
2775
+ md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2776
+ md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2777
+ md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2778
+ }
2779
+
2780
+ if (extensions & MKDEXT_SUPERSCRIPT)
2781
+ md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2782
+
2783
+ if (extensions & MKDEXT_QUOTE)
2784
+ md->active_char['"'] = MD_CHAR_QUOTE;
2785
+
2786
+ /* Extension data */
2787
+ md->ext_flags = extensions;
2788
+ md->opaque = opaque;
2789
+ md->max_nesting = max_nesting;
2790
+ md->in_link_body = 0;
2791
+
2792
+ return md;
2793
+ }
2794
+
2795
+ void
2796
+ sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2797
+ {
2798
+ #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2799
+ static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2800
+
2801
+ struct buf *text;
2802
+ size_t beg, end;
2803
+ int in_fence = 0;
2804
+
2805
+ text = bufnew(64);
2806
+ if (!text)
2807
+ return;
2808
+
2809
+ /* Preallocate enough space for our buffer to avoid expanding while copying */
2810
+ bufgrow(text, doc_size);
2811
+
2812
+ /* reset the references table */
2813
+ memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2814
+
2815
+ int footnotes_enabled = md->ext_flags & MKDEXT_FOOTNOTES;
2816
+ int codefences_enabled = md->ext_flags & MKDEXT_FENCED_CODE;
2817
+
2818
+ /* reset the footnotes lists */
2819
+ if (footnotes_enabled) {
2820
+ memset(&md->footnotes_found, 0x0, sizeof(md->footnotes_found));
2821
+ memset(&md->footnotes_used, 0x0, sizeof(md->footnotes_used));
2822
+ }
2823
+
2824
+ /* first pass: looking for references, copying everything else */
2825
+ beg = 0;
2826
+
2827
+ /* Skip a possible UTF-8 BOM, even though the Unicode standard
2828
+ * discourages having these in UTF-8 documents */
2829
+ if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2830
+ beg += 3;
2831
+
2832
+ while (beg < doc_size) { /* iterating over lines */
2833
+ if (codefences_enabled && (is_codefence(document + beg, doc_size - beg, NULL) != 0))
2834
+ in_fence = !in_fence;
2835
+
2836
+ if (!in_fence && footnotes_enabled && is_footnote(document, beg, doc_size, &end, &md->footnotes_found))
2837
+ beg = end;
2838
+ else if (!in_fence && is_ref(document, beg, doc_size, &end, md->refs))
2839
+ beg = end;
2840
+ else { /* skipping to the next line */
2841
+ end = beg;
2842
+ while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2843
+ end++;
2844
+
2845
+ /* adding the line body if present */
2846
+ if (end > beg)
2847
+ expand_tabs(text, document + beg, end - beg);
2848
+
2849
+ while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2850
+ /* add one \n per newline */
2851
+ if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2852
+ bufputc(text, '\n');
2853
+ end++;
2854
+ }
2855
+
2856
+ beg = end;
2857
+ }
2858
+ }
2859
+
2860
+ /* pre-grow the output buffer to minimize allocations */
2861
+ bufgrow(ob, MARKDOWN_GROW(text->size));
2862
+
2863
+ /* second pass: actual rendering */
2864
+ if (md->cb.doc_header)
2865
+ md->cb.doc_header(ob, md->opaque);
2866
+
2867
+ if (text->size) {
2868
+ /* adding a final newline if not already present */
2869
+ if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
2870
+ bufputc(text, '\n');
2871
+
2872
+ parse_block(ob, md, text->data, text->size);
2873
+ }
2874
+
2875
+ /* footnotes */
2876
+ if (footnotes_enabled)
2877
+ parse_footnote_list(ob, md, &md->footnotes_used);
2878
+
2879
+ if (md->cb.doc_footer)
2880
+ md->cb.doc_footer(ob, md->opaque);
2881
+
2882
+ /* Null-terminate the buffer */
2883
+ bufcstr(ob);
2884
+
2885
+ /* clean-up */
2886
+ bufrelease(text);
2887
+ free_link_refs(md->refs);
2888
+ if (footnotes_enabled) {
2889
+ free_footnote_list(&md->footnotes_found, 1);
2890
+ free_footnote_list(&md->footnotes_used, 0);
2891
+ }
2892
+
2893
+ assert(md->work_bufs[BUFFER_SPAN].size == 0);
2894
+ assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2895
+ }
2896
+
2897
+ void
2898
+ sd_markdown_free(struct sd_markdown *md)
2899
+ {
2900
+ size_t i;
2901
+
2902
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2903
+ bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2904
+
2905
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2906
+ bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2907
+
2908
+ greenmat_stack_free(&md->work_bufs[BUFFER_SPAN]);
2909
+ greenmat_stack_free(&md->work_bufs[BUFFER_BLOCK]);
2910
+
2911
+ free(md);
2912
+ }