commonmarker 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/ext/commonmarker/cmark/CMakeLists.txt +10 -4
  3. data/ext/commonmarker/cmark/Makefile +5 -5
  4. data/ext/commonmarker/cmark/api_test/CMakeLists.txt +1 -1
  5. data/ext/commonmarker/cmark/api_test/main.c +16 -0
  6. data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -4
  7. data/ext/commonmarker/cmark/build/CMakeFiles/2.8.10.1/CMakeSystem.cmake +4 -4
  8. data/ext/commonmarker/cmark/build/CMakeFiles/CMakeError.log +12 -12
  9. data/ext/commonmarker/cmark/build/CMakeFiles/CMakeOutput.log +97 -142
  10. data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -1
  11. data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
  12. data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
  13. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
  14. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
  15. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +2 -2
  16. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
  17. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
  18. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/buffer.c.o +0 -0
  19. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
  20. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
  21. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/houdini_html_u.c.o +0 -0
  22. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
  23. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
  24. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
  25. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/references.c.o +0 -0
  26. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
  27. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
  28. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/utf8.c.o +0 -0
  29. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
  30. data/ext/commonmarker/cmark/build/src/cmake_install.cmake +3 -3
  31. data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
  32. data/ext/commonmarker/cmark/build/src/config.h +6 -6
  33. data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
  34. data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
  35. data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +4 -4
  36. data/ext/commonmarker/cmark/changelog.txt +46 -0
  37. data/ext/commonmarker/cmark/man/man3/cmark.3 +21 -20
  38. data/ext/commonmarker/cmark/src/CMakeLists.txt +4 -6
  39. data/ext/commonmarker/cmark/src/bench.h +8 -8
  40. data/ext/commonmarker/cmark/src/blocks.c +917 -947
  41. data/ext/commonmarker/cmark/src/buffer.c +213 -288
  42. data/ext/commonmarker/cmark/src/buffer.h +19 -21
  43. data/ext/commonmarker/cmark/src/chunk.h +78 -82
  44. data/ext/commonmarker/cmark/src/cmark.c +9 -17
  45. data/ext/commonmarker/cmark/src/cmark.h +113 -157
  46. data/ext/commonmarker/cmark/src/cmark_ctype.c +24 -35
  47. data/ext/commonmarker/cmark/src/commonmark.c +390 -425
  48. data/ext/commonmarker/cmark/src/config.h.in +6 -6
  49. data/ext/commonmarker/cmark/src/houdini.h +21 -15
  50. data/ext/commonmarker/cmark/src/houdini_href_e.c +50 -57
  51. data/ext/commonmarker/cmark/src/houdini_html_e.c +36 -51
  52. data/ext/commonmarker/cmark/src/houdini_html_u.c +119 -124
  53. data/ext/commonmarker/cmark/src/html.c +289 -307
  54. data/ext/commonmarker/cmark/src/inlines.c +976 -1030
  55. data/ext/commonmarker/cmark/src/inlines.h +4 -2
  56. data/ext/commonmarker/cmark/src/iterator.c +96 -126
  57. data/ext/commonmarker/cmark/src/iterator.h +5 -5
  58. data/ext/commonmarker/cmark/src/latex.c +379 -401
  59. data/ext/commonmarker/cmark/src/main.c +168 -175
  60. data/ext/commonmarker/cmark/src/man.c +212 -226
  61. data/ext/commonmarker/cmark/src/node.c +746 -839
  62. data/ext/commonmarker/cmark/src/node.h +47 -48
  63. data/ext/commonmarker/cmark/src/parser.h +14 -14
  64. data/ext/commonmarker/cmark/src/references.c +101 -111
  65. data/ext/commonmarker/cmark/src/references.h +10 -8
  66. data/ext/commonmarker/cmark/src/render.c +144 -167
  67. data/ext/commonmarker/cmark/src/render.h +22 -41
  68. data/ext/commonmarker/cmark/src/scanners.c +27695 -20903
  69. data/ext/commonmarker/cmark/src/scanners.h +2 -1
  70. data/ext/commonmarker/cmark/src/scanners.re +1 -1
  71. data/ext/commonmarker/cmark/src/utf8.c +276 -419
  72. data/ext/commonmarker/cmark/src/utf8.h +6 -6
  73. data/ext/commonmarker/cmark/src/xml.c +129 -144
  74. data/ext/commonmarker/cmark/test/CMakeLists.txt +4 -4
  75. data/ext/commonmarker/cmark/test/smart_punct.txt +8 -0
  76. data/ext/commonmarker/cmark/test/spec.txt +109 -47
  77. data/lib/commonmarker/version.rb +1 -1
  78. metadata +2 -2
@@ -13,7 +13,6 @@
13
13
  #include "scanners.h"
14
14
  #include "inlines.h"
15
15
 
16
-
17
16
  static const char *EMDASH = "\xE2\x80\x94";
18
17
  static const char *ENDASH = "\xE2\x80\x93";
19
18
  static const char *ELLIPSES = "\xE2\x80\xA6";
@@ -22,7 +21,6 @@ static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
22
21
  static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
23
22
  static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
24
23
 
25
-
26
24
  // Macros for creating various kinds of simple.
27
25
  #define make_str(s) make_literal(CMARK_NODE_TEXT, s)
28
26
  #define make_code(s) make_literal(CMARK_NODE_CODE, s)
@@ -33,209 +31,189 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
33
31
  #define make_strong() make_simple(CMARK_NODE_STRONG)
34
32
 
35
33
  typedef struct delimiter {
36
- struct delimiter *previous;
37
- struct delimiter *next;
38
- cmark_node *inl_text;
39
- bufsize_t position;
40
- unsigned char delim_char;
41
- bool can_open;
42
- bool can_close;
43
- bool active;
34
+ struct delimiter *previous;
35
+ struct delimiter *next;
36
+ cmark_node *inl_text;
37
+ bufsize_t position;
38
+ unsigned char delim_char;
39
+ bool can_open;
40
+ bool can_close;
41
+ bool active;
44
42
  } delimiter;
45
43
 
46
44
  typedef struct {
47
- cmark_chunk input;
48
- bufsize_t pos;
49
- cmark_reference_map *refmap;
50
- delimiter *last_delim;
45
+ cmark_chunk input;
46
+ bufsize_t pos;
47
+ cmark_reference_map *refmap;
48
+ delimiter *last_delim;
51
49
  } subject;
52
50
 
53
- static inline bool
54
- S_is_line_end_char(char c)
55
- {
56
- return (c == '\n' || c == '\r');
51
+ static inline bool S_is_line_end_char(char c) {
52
+ return (c == '\n' || c == '\r');
57
53
  }
58
54
 
59
- static delimiter*
60
- S_insert_emph(subject *subj, delimiter *opener, delimiter *closer);
55
+ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
56
+ delimiter *closer);
61
57
 
62
- static int parse_inline(subject* subj, cmark_node * parent, int options);
58
+ static int parse_inline(subject *subj, cmark_node *parent, int options);
63
59
 
64
60
  static void subject_from_buf(subject *e, cmark_strbuf *buffer,
65
61
  cmark_reference_map *refmap);
66
62
  static bufsize_t subject_find_special_char(subject *subj, int options);
67
63
 
68
64
  // Create an inline with a literal string value.
69
- static inline cmark_node* make_literal(cmark_node_type t, cmark_chunk s)
70
- {
71
- cmark_node * e = (cmark_node *)calloc(1, sizeof(*e));
72
- if(e != NULL) {
73
- e->type = t;
74
- e->as.literal = s;
75
- e->next = NULL;
76
- e->prev = NULL;
77
- e->parent = NULL;
78
- e->first_child = NULL;
79
- e->last_child = NULL;
80
- // These fields aren't used for inlines:
81
- e->start_line = 0;
82
- e->start_column = 0;
83
- e->end_line = 0;
84
- }
85
- return e;
65
+ static inline cmark_node *make_literal(cmark_node_type t, cmark_chunk s) {
66
+ cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
67
+ if (e != NULL) {
68
+ e->type = t;
69
+ e->as.literal = s;
70
+ e->next = NULL;
71
+ e->prev = NULL;
72
+ e->parent = NULL;
73
+ e->first_child = NULL;
74
+ e->last_child = NULL;
75
+ // These fields aren't used for inlines:
76
+ e->start_line = 0;
77
+ e->start_column = 0;
78
+ e->end_line = 0;
79
+ }
80
+ return e;
86
81
  }
87
82
 
88
83
  // Create an inline with no value.
89
- static inline cmark_node* make_simple(cmark_node_type t)
90
- {
91
- cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
92
- if(e != NULL) {
93
- e->type = t;
94
- e->next = NULL;
95
- e->prev = NULL;
96
- e->parent = NULL;
97
- e->first_child = NULL;
98
- e->last_child = NULL;
99
- // These fields aren't used for inlines:
100
- e->start_line = 0;
101
- e->start_column = 0;
102
- e->end_line = 0;
103
- }
104
- return e;
84
+ static inline cmark_node *make_simple(cmark_node_type t) {
85
+ cmark_node *e = (cmark_node *)calloc(1, sizeof(*e));
86
+ if (e != NULL) {
87
+ e->type = t;
88
+ e->next = NULL;
89
+ e->prev = NULL;
90
+ e->parent = NULL;
91
+ e->first_child = NULL;
92
+ e->last_child = NULL;
93
+ // These fields aren't used for inlines:
94
+ e->start_line = 0;
95
+ e->start_column = 0;
96
+ e->end_line = 0;
97
+ }
98
+ return e;
105
99
  }
106
100
 
107
101
  // Like make_str, but parses entities.
108
- static cmark_node *make_str_with_entities(cmark_chunk *content)
109
- {
110
- cmark_strbuf unescaped = GH_BUF_INIT;
111
-
112
- if (houdini_unescape_html(&unescaped, content->data, content->len)) {
113
- return make_str(cmark_chunk_buf_detach(&unescaped));
114
- } else {
115
- return make_str(*content);
116
- }
102
+ static cmark_node *make_str_with_entities(cmark_chunk *content) {
103
+ cmark_strbuf unescaped = GH_BUF_INIT;
104
+
105
+ if (houdini_unescape_html(&unescaped, content->data, content->len)) {
106
+ return make_str(cmark_chunk_buf_detach(&unescaped));
107
+ } else {
108
+ return make_str(*content);
109
+ }
117
110
  }
118
111
 
119
112
  // Duplicate a chunk by creating a copy of the buffer not by reusing the
120
113
  // buffer like cmark_chunk_dup does.
121
- static cmark_chunk chunk_clone(cmark_chunk *src)
122
- {
123
- cmark_chunk c;
124
- bufsize_t len = src->len;
114
+ static cmark_chunk chunk_clone(cmark_chunk *src) {
115
+ cmark_chunk c;
116
+ bufsize_t len = src->len;
125
117
 
126
- c.len = len;
127
- c.data = (unsigned char *)malloc(len + 1);
128
- c.alloc = 1;
129
- memcpy(c.data, src->data, len);
130
- c.data[len] = '\0';
118
+ c.len = len;
119
+ c.data = (unsigned char *)malloc(len + 1);
120
+ c.alloc = 1;
121
+ memcpy(c.data, src->data, len);
122
+ c.data[len] = '\0';
131
123
 
132
- return c;
124
+ return c;
133
125
  }
134
126
 
135
- static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
136
- {
137
- cmark_strbuf buf = GH_BUF_INIT;
127
+ static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) {
128
+ cmark_strbuf buf = GH_BUF_INIT;
138
129
 
139
- cmark_chunk_trim(url);
130
+ cmark_chunk_trim(url);
140
131
 
141
- if (url->len == 0) {
142
- cmark_chunk result = CMARK_CHUNK_EMPTY;
143
- return result;
144
- }
132
+ if (url->len == 0) {
133
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
134
+ return result;
135
+ }
145
136
 
146
- if (is_email)
147
- cmark_strbuf_puts(&buf, "mailto:");
137
+ if (is_email)
138
+ cmark_strbuf_puts(&buf, "mailto:");
148
139
 
149
- houdini_unescape_html_f(&buf, url->data, url->len);
150
- return cmark_chunk_buf_detach(&buf);
140
+ houdini_unescape_html_f(&buf, url->data, url->len);
141
+ return cmark_chunk_buf_detach(&buf);
151
142
  }
152
143
 
153
- static inline cmark_node* make_autolink(cmark_chunk url, int is_email)
154
- {
155
- cmark_node *link = make_simple(CMARK_NODE_LINK);
156
- link->as.link.url = cmark_clean_autolink(&url, is_email);
157
- link->as.link.title = cmark_chunk_literal("");
158
- cmark_node_append_child(link, make_str_with_entities(&url));
159
- return link;
144
+ static inline cmark_node *make_autolink(cmark_chunk url, int is_email) {
145
+ cmark_node *link = make_simple(CMARK_NODE_LINK);
146
+ link->as.link.url = cmark_clean_autolink(&url, is_email);
147
+ link->as.link.title = cmark_chunk_literal("");
148
+ cmark_node_append_child(link, make_str_with_entities(&url));
149
+ return link;
160
150
  }
161
151
 
162
152
  static void subject_from_buf(subject *e, cmark_strbuf *buffer,
163
- cmark_reference_map *refmap)
164
- {
165
- e->input.data = buffer->ptr;
166
- e->input.len = buffer->size;
167
- e->input.alloc = 0;
168
- e->pos = 0;
169
- e->refmap = refmap;
170
- e->last_delim = NULL;
153
+ cmark_reference_map *refmap) {
154
+ e->input.data = buffer->ptr;
155
+ e->input.len = buffer->size;
156
+ e->input.alloc = 0;
157
+ e->pos = 0;
158
+ e->refmap = refmap;
159
+ e->last_delim = NULL;
171
160
  }
172
161
 
173
- static inline int isbacktick(int c)
174
- {
175
- return (c == '`');
176
- }
162
+ static inline int isbacktick(int c) { return (c == '`'); }
177
163
 
178
- static inline unsigned char peek_char(subject *subj)
179
- {
180
- // NULL bytes should have been stripped out by now. If they're
181
- // present, it's a programming error:
182
- assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
183
- return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
164
+ static inline unsigned char peek_char(subject *subj) {
165
+ // NULL bytes should have been stripped out by now. If they're
166
+ // present, it's a programming error:
167
+ assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
168
+ return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
184
169
  }
185
170
 
186
- static inline unsigned char peek_at(subject *subj, bufsize_t pos)
187
- {
188
- return subj->input.data[pos];
171
+ static inline unsigned char peek_at(subject *subj, bufsize_t pos) {
172
+ return subj->input.data[pos];
189
173
  }
190
174
 
191
175
  // Return true if there are more characters in the subject.
192
- static inline int is_eof(subject* subj)
193
- {
194
- return (subj->pos >= subj->input.len);
176
+ static inline int is_eof(subject *subj) {
177
+ return (subj->pos >= subj->input.len);
195
178
  }
196
179
 
197
180
  // Advance the subject. Doesn't check for eof.
198
181
  #define advance(subj) (subj)->pos += 1
199
182
 
200
- static inline bool
201
- skip_spaces(subject *subj)
202
- {
203
- bool skipped = false;
204
- while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
205
- advance(subj);
206
- skipped = true;
207
- }
208
- return skipped;
183
+ static inline bool skip_spaces(subject *subj) {
184
+ bool skipped = false;
185
+ while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
186
+ advance(subj);
187
+ skipped = true;
188
+ }
189
+ return skipped;
209
190
  }
210
191
 
211
- static inline bool
212
- skip_line_end(subject *subj)
213
- {
214
- bool seen_line_end_char = false;
215
- if (peek_char(subj) == '\r') {
216
- advance(subj);
217
- seen_line_end_char = true;
218
- }
219
- if (peek_char(subj) == '\n') {
220
- advance(subj);
221
- seen_line_end_char = true;
222
- }
223
- return seen_line_end_char || is_eof(subj);
192
+ static inline bool skip_line_end(subject *subj) {
193
+ bool seen_line_end_char = false;
194
+ if (peek_char(subj) == '\r') {
195
+ advance(subj);
196
+ seen_line_end_char = true;
197
+ }
198
+ if (peek_char(subj) == '\n') {
199
+ advance(subj);
200
+ seen_line_end_char = true;
201
+ }
202
+ return seen_line_end_char || is_eof(subj);
224
203
  }
225
204
 
226
205
  // Take characters while a predicate holds, and return a string.
227
- static inline cmark_chunk take_while(subject* subj, int (*f)(int))
228
- {
229
- unsigned char c;
230
- bufsize_t startpos = subj->pos;
231
- bufsize_t len = 0;
206
+ static inline cmark_chunk take_while(subject *subj, int (*f)(int)) {
207
+ unsigned char c;
208
+ bufsize_t startpos = subj->pos;
209
+ bufsize_t len = 0;
232
210
 
233
- while ((c = peek_char(subj)) && (*f)(c)) {
234
- advance(subj);
235
- len++;
236
- }
211
+ while ((c = peek_char(subj)) && (*f)(c)) {
212
+ advance(subj);
213
+ len++;
214
+ }
237
215
 
238
- return cmark_chunk_dup(&subj->input, startpos, len);
216
+ return cmark_chunk_dup(&subj->input, startpos, len);
239
217
  }
240
218
 
241
219
  // Try to process a backtick code span that began with a
@@ -243,981 +221,949 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int))
243
221
  // parsed). Return 0 if you don't find matching closing
244
222
  // backticks, otherwise return the position in the subject
245
223
  // after the closing backticks.
246
- static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength)
247
- {
248
- // read non backticks
249
- unsigned char c;
250
- while ((c = peek_char(subj)) && c != '`') {
251
- advance(subj);
252
- }
253
- if (is_eof(subj)) {
254
- return 0; // did not find closing ticks, return 0
255
- }
256
- bufsize_t numticks = 0;
257
- while (peek_char(subj) == '`') {
258
- advance(subj);
259
- numticks++;
260
- }
261
- if (numticks != openticklength) {
262
- return(scan_to_closing_backticks(subj, openticklength));
263
- }
264
- return (subj->pos);
224
+ static bufsize_t scan_to_closing_backticks(subject *subj,
225
+ bufsize_t openticklength) {
226
+ // read non backticks
227
+ unsigned char c;
228
+ while ((c = peek_char(subj)) && c != '`') {
229
+ advance(subj);
230
+ }
231
+ if (is_eof(subj)) {
232
+ return 0; // did not find closing ticks, return 0
233
+ }
234
+ bufsize_t numticks = 0;
235
+ while (peek_char(subj) == '`') {
236
+ advance(subj);
237
+ numticks++;
238
+ }
239
+ if (numticks != openticklength) {
240
+ return (scan_to_closing_backticks(subj, openticklength));
241
+ }
242
+ return (subj->pos);
265
243
  }
266
244
 
267
245
  // Parse backtick code section or raw backticks, return an inline.
268
246
  // Assumes that the subject has a backtick at the current position.
269
- static cmark_node* handle_backticks(subject *subj)
270
- {
271
- cmark_chunk openticks = take_while(subj, isbacktick);
272
- bufsize_t startpos = subj->pos;
273
- bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
274
-
275
- if (endpos == 0) { // not found
276
- subj->pos = startpos; // rewind
277
- return make_str(openticks);
278
- } else {
279
- cmark_strbuf buf = GH_BUF_INIT;
280
-
281
- cmark_strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
282
- cmark_strbuf_trim(&buf);
283
- cmark_strbuf_normalize_whitespace(&buf);
284
-
285
- return make_code(cmark_chunk_buf_detach(&buf));
286
- }
247
+ static cmark_node *handle_backticks(subject *subj) {
248
+ cmark_chunk openticks = take_while(subj, isbacktick);
249
+ bufsize_t startpos = subj->pos;
250
+ bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
251
+
252
+ if (endpos == 0) { // not found
253
+ subj->pos = startpos; // rewind
254
+ return make_str(openticks);
255
+ } else {
256
+ cmark_strbuf buf = GH_BUF_INIT;
257
+
258
+ cmark_strbuf_set(&buf, subj->input.data + startpos,
259
+ endpos - startpos - openticks.len);
260
+ cmark_strbuf_trim(&buf);
261
+ cmark_strbuf_normalize_whitespace(&buf);
262
+
263
+ return make_code(cmark_chunk_buf_detach(&buf));
264
+ }
287
265
  }
288
266
 
289
267
  // Scan ***, **, or * and return number scanned, or 0.
290
268
  // Advances position.
291
- static int
292
- scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
293
- {
294
- int numdelims = 0;
295
- bufsize_t before_char_pos;
296
- int32_t after_char = 0;
297
- int32_t before_char = 0;
298
- int len;
299
- bool left_flanking, right_flanking;
300
-
301
- if (subj->pos == 0) {
302
- before_char = 10;
303
- } else {
304
- before_char_pos = subj->pos - 1;
305
- // walk back to the beginning of the UTF_8 sequence:
306
- while (peek_at(subj, before_char_pos) >> 6 == 2 &&
307
- before_char_pos > 0) {
308
- before_char_pos -= 1;
309
- }
310
- len = utf8proc_iterate(subj->input.data + before_char_pos,
311
- subj->pos - before_char_pos, &before_char);
312
- if (len == -1) {
313
- before_char = 10;
314
- }
315
- }
316
-
317
- if (c == '\'' || c == '"') {
318
- numdelims++;
319
- advance(subj); // limit to 1 delim for quotes
320
- } else {
321
- while (peek_char(subj) == c) {
322
- numdelims++;
323
- advance(subj);
324
- }
325
- }
326
-
327
- len = utf8proc_iterate(subj->input.data + subj->pos,
328
- subj->input.len - subj->pos, &after_char);
329
- if (len == -1) {
330
- after_char = 10;
331
- }
332
- left_flanking = numdelims > 0 && !utf8proc_is_space(after_char) &&
333
- !(utf8proc_is_punctuation(after_char) &&
334
- !utf8proc_is_space(before_char) &&
335
- !utf8proc_is_punctuation(before_char));
336
- right_flanking = numdelims > 0 && !utf8proc_is_space(before_char) &&
337
- !(utf8proc_is_punctuation(before_char) &&
338
- !utf8proc_is_space(after_char) &&
339
- !utf8proc_is_punctuation(after_char));
340
- if (c == '_') {
341
- *can_open = left_flanking &&
342
- (!right_flanking || utf8proc_is_punctuation(before_char));
343
- *can_close = right_flanking &&
344
- (!left_flanking || utf8proc_is_punctuation(after_char));
345
- } else if (c == '\'' || c == '"') {
346
- *can_open = left_flanking && !right_flanking;
347
- *can_close = right_flanking;
348
- } else {
349
- *can_open = left_flanking;
350
- *can_close = right_flanking;
351
- }
352
- return numdelims;
269
+ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
270
+ bool *can_close) {
271
+ int numdelims = 0;
272
+ bufsize_t before_char_pos;
273
+ int32_t after_char = 0;
274
+ int32_t before_char = 0;
275
+ int len;
276
+ bool left_flanking, right_flanking;
277
+
278
+ if (subj->pos == 0) {
279
+ before_char = 10;
280
+ } else {
281
+ before_char_pos = subj->pos - 1;
282
+ // walk back to the beginning of the UTF_8 sequence:
283
+ while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
284
+ before_char_pos -= 1;
285
+ }
286
+ len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
287
+ subj->pos - before_char_pos, &before_char);
288
+ if (len == -1) {
289
+ before_char = 10;
290
+ }
291
+ }
292
+
293
+ if (c == '\'' || c == '"') {
294
+ numdelims++;
295
+ advance(subj); // limit to 1 delim for quotes
296
+ } else {
297
+ while (peek_char(subj) == c) {
298
+ numdelims++;
299
+ advance(subj);
300
+ }
301
+ }
302
+
303
+ len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
304
+ subj->input.len - subj->pos, &after_char);
305
+ if (len == -1) {
306
+ after_char = 10;
307
+ }
308
+ left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
309
+ !(cmark_utf8proc_is_punctuation(after_char) &&
310
+ !cmark_utf8proc_is_space(before_char) &&
311
+ !cmark_utf8proc_is_punctuation(before_char));
312
+ right_flanking =
313
+ numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
314
+ !(cmark_utf8proc_is_punctuation(before_char) &&
315
+ !cmark_utf8proc_is_space(after_char) && !cmark_utf8proc_is_punctuation(after_char));
316
+ if (c == '_') {
317
+ *can_open = left_flanking &&
318
+ (!right_flanking || cmark_utf8proc_is_punctuation(before_char));
319
+ *can_close = right_flanking &&
320
+ (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
321
+ } else if (c == '\'' || c == '"') {
322
+ *can_open = left_flanking && !right_flanking;
323
+ *can_close = right_flanking;
324
+ } else {
325
+ *can_open = left_flanking;
326
+ *can_close = right_flanking;
327
+ }
328
+ return numdelims;
353
329
  }
354
330
 
355
331
  /*
356
332
  static void print_delimiters(subject *subj)
357
333
  {
358
- delimiter *delim;
359
- delim = subj->last_delim;
360
- while (delim != NULL) {
361
- printf("Item at stack pos %p, text pos %d: %d %d %d next(%p) prev(%p)\n",
362
- (void*)delim, delim->position, delim->delim_char,
363
- delim->can_open, delim->can_close,
364
- (void*)delim->next, (void*)delim->previous);
365
- delim = delim->previous;
366
- }
334
+ delimiter *delim;
335
+ delim = subj->last_delim;
336
+ while (delim != NULL) {
337
+ printf("Item at stack pos %p, text pos %d: %d %d %d next(%p)
338
+ prev(%p)\n",
339
+ (void*)delim, delim->position, delim->delim_char,
340
+ delim->can_open, delim->can_close,
341
+ (void*)delim->next, (void*)delim->previous);
342
+ delim = delim->previous;
343
+ }
367
344
  }
368
345
  */
369
346
 
370
- static void remove_delimiter(subject *subj, delimiter *delim)
371
- {
372
- if (delim == NULL) return;
373
- if (delim->next == NULL) {
374
- // end of list:
375
- assert(delim == subj->last_delim);
376
- subj->last_delim = delim->previous;
377
- } else {
378
- delim->next->previous = delim->previous;
379
- }
380
- if (delim->previous != NULL) {
381
- delim->previous->next = delim->next;
382
- }
383
- free(delim);
347
+ static void remove_delimiter(subject *subj, delimiter *delim) {
348
+ if (delim == NULL)
349
+ return;
350
+ if (delim->next == NULL) {
351
+ // end of list:
352
+ assert(delim == subj->last_delim);
353
+ subj->last_delim = delim->previous;
354
+ } else {
355
+ delim->next->previous = delim->previous;
356
+ }
357
+ if (delim->previous != NULL) {
358
+ delim->previous->next = delim->next;
359
+ }
360
+ free(delim);
384
361
  }
385
362
 
386
363
  static void push_delimiter(subject *subj, unsigned char c, bool can_open,
387
- bool can_close, cmark_node *inl_text)
388
- {
389
- delimiter *delim =
390
- (delimiter*)malloc(sizeof(delimiter));
391
- if (delim == NULL) {
392
- return;
393
- }
394
- delim->delim_char = c;
395
- delim->can_open = can_open;
396
- delim->can_close = can_close;
397
- delim->inl_text = inl_text;
398
- delim->previous = subj->last_delim;
399
- delim->next = NULL;
400
- if (delim->previous != NULL) {
401
- delim->previous->next = delim;
402
- }
403
- delim->position = subj->pos;
404
- delim->active = true;
405
- subj->last_delim = delim;
364
+ bool can_close, cmark_node *inl_text) {
365
+ delimiter *delim = (delimiter *)malloc(sizeof(delimiter));
366
+ if (delim == NULL) {
367
+ return;
368
+ }
369
+ delim->delim_char = c;
370
+ delim->can_open = can_open;
371
+ delim->can_close = can_close;
372
+ delim->inl_text = inl_text;
373
+ delim->previous = subj->last_delim;
374
+ delim->next = NULL;
375
+ if (delim->previous != NULL) {
376
+ delim->previous->next = delim;
377
+ }
378
+ delim->position = subj->pos;
379
+ delim->active = true;
380
+ subj->last_delim = delim;
406
381
  }
407
382
 
408
383
  // Assumes the subject has a c at the current position.
409
- static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart)
410
- {
411
- bufsize_t numdelims;
412
- cmark_node * inl_text;
413
- bool can_open, can_close;
414
- cmark_chunk contents;
415
-
416
- numdelims = scan_delims(subj, c, &can_open, &can_close);
417
-
418
- if (c == '\'' && smart) {
419
- contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
420
- } else if (c == '"' && smart) {
421
- contents = cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
422
- } else {
423
- contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
424
- }
425
-
426
- inl_text = make_str(contents);
427
-
428
- if ((can_open || can_close) &&
429
- (!(c == '\'' || c == '"') || smart)) {
430
- push_delimiter(subj, c, can_open, can_close, inl_text);
431
- }
432
-
433
- return inl_text;
384
+ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
385
+ bufsize_t numdelims;
386
+ cmark_node *inl_text;
387
+ bool can_open, can_close;
388
+ cmark_chunk contents;
389
+
390
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
391
+
392
+ if (c == '\'' && smart) {
393
+ contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
394
+ } else if (c == '"' && smart) {
395
+ contents =
396
+ cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
397
+ } else {
398
+ contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
399
+ }
400
+
401
+ inl_text = make_str(contents);
402
+
403
+ if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
404
+ push_delimiter(subj, c, can_open, can_close, inl_text);
405
+ }
406
+
407
+ return inl_text;
434
408
  }
435
409
 
436
410
  // Assumes we have a hyphen at the current position.
437
- static cmark_node* handle_hyphen(subject* subj, bool smart)
438
- {
439
- int startpos = subj->pos;
440
-
441
- advance(subj);
442
-
443
- if (!smart || peek_char(subj) != '-') {
444
- return make_str(cmark_chunk_literal("-"));
445
- }
446
-
447
- while (smart && peek_char(subj) == '-') {
448
- advance(subj);
449
- }
450
-
451
- int numhyphens = subj->pos - startpos;
452
- int en_count = 0;
453
- int em_count = 0;
454
- int i;
455
- cmark_strbuf buf = GH_BUF_INIT;
456
-
457
- if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
458
- em_count = numhyphens / 3;
459
- } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
460
- en_count = numhyphens / 2;
461
- } else if (numhyphens % 3 == 2) { // use one en dash at end
462
- en_count = 1;
463
- em_count = (numhyphens - 2) / 3;
464
- } else { // use two en dashes at the end
465
- en_count = 2;
466
- em_count = (numhyphens - 4) / 3;
467
- }
468
-
469
- for (i = em_count; i > 0; i--) {
470
- cmark_strbuf_puts(&buf, EMDASH);
471
- }
472
-
473
- for (i = en_count; i > 0; i--) {
474
- cmark_strbuf_puts(&buf, ENDASH);
475
- }
476
-
477
- return make_str(cmark_chunk_buf_detach(&buf));
411
+ static cmark_node *handle_hyphen(subject *subj, bool smart) {
412
+ int startpos = subj->pos;
413
+
414
+ advance(subj);
415
+
416
+ if (!smart || peek_char(subj) != '-') {
417
+ return make_str(cmark_chunk_literal("-"));
418
+ }
419
+
420
+ while (smart && peek_char(subj) == '-') {
421
+ advance(subj);
422
+ }
423
+
424
+ int numhyphens = subj->pos - startpos;
425
+ int en_count = 0;
426
+ int em_count = 0;
427
+ int i;
428
+ cmark_strbuf buf = GH_BUF_INIT;
429
+
430
+ if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
431
+ em_count = numhyphens / 3;
432
+ } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
433
+ en_count = numhyphens / 2;
434
+ } else if (numhyphens % 3 == 2) { // use one en dash at end
435
+ en_count = 1;
436
+ em_count = (numhyphens - 2) / 3;
437
+ } else { // use two en dashes at the end
438
+ en_count = 2;
439
+ em_count = (numhyphens - 4) / 3;
440
+ }
441
+
442
+ for (i = em_count; i > 0; i--) {
443
+ cmark_strbuf_puts(&buf, EMDASH);
444
+ }
445
+
446
+ for (i = en_count; i > 0; i--) {
447
+ cmark_strbuf_puts(&buf, ENDASH);
448
+ }
449
+
450
+ return make_str(cmark_chunk_buf_detach(&buf));
478
451
  }
479
452
 
480
453
  // Assumes we have a period at the current position.
481
- static cmark_node* handle_period(subject* subj, bool smart)
482
- {
483
- advance(subj);
484
- if (smart && peek_char(subj) == '.') {
485
- advance(subj);
486
- if (peek_char(subj) == '.') {
487
- advance(subj);
488
- return make_str(cmark_chunk_literal(ELLIPSES));
489
- } else {
490
- return make_str(cmark_chunk_literal(".."));
491
- }
492
- } else {
493
- return make_str(cmark_chunk_literal("."));
494
- }
454
+ static cmark_node *handle_period(subject *subj, bool smart) {
455
+ advance(subj);
456
+ if (smart && peek_char(subj) == '.') {
457
+ advance(subj);
458
+ if (peek_char(subj) == '.') {
459
+ advance(subj);
460
+ return make_str(cmark_chunk_literal(ELLIPSES));
461
+ } else {
462
+ return make_str(cmark_chunk_literal(".."));
463
+ }
464
+ } else {
465
+ return make_str(cmark_chunk_literal("."));
466
+ }
495
467
  }
496
468
 
497
- static void process_emphasis(subject *subj, delimiter *stack_bottom)
498
- {
499
- delimiter *closer = subj->last_delim;
500
- delimiter *opener;
501
- delimiter *old_closer;
502
- bool opener_found;
503
- delimiter *openers_bottom[128];
504
-
505
- // initialize openers_bottom:
506
- openers_bottom['*'] = stack_bottom;
507
- openers_bottom['_'] = stack_bottom;
508
- openers_bottom['\''] = stack_bottom;
509
- openers_bottom['"'] = stack_bottom;
510
-
511
- // move back to first relevant delim.
512
- while (closer != NULL && closer->previous != stack_bottom) {
513
- closer = closer->previous;
514
- }
515
-
516
- // now move forward, looking for closers, and handling each
517
- while (closer != NULL) {
518
- if (closer->can_close &&
519
- (closer->delim_char == '*' || closer->delim_char == '_' ||
520
- closer->delim_char == '"' || closer->delim_char == '\'')) {
521
- // Now look backwards for first matching opener:
522
- opener = closer->previous;
523
- opener_found = false;
524
- while (opener != NULL && opener != stack_bottom &&
525
- opener != openers_bottom[closer->delim_char]) {
526
- if (opener->delim_char == closer->delim_char &&
527
- opener->can_open) {
528
- opener_found = true;
529
- break;
530
- }
531
- opener = opener->previous;
532
- }
533
- old_closer = closer;
534
- if (closer->delim_char == '*' || closer->delim_char == '_') {
535
- if (opener_found) {
536
- closer = S_insert_emph(subj, opener, closer);
537
- } else {
538
- closer = closer->next;
539
- }
540
- } else if (closer->delim_char == '\'') {
541
- cmark_chunk_free(&closer->inl_text->as.literal);
542
- closer->inl_text->as.literal =
543
- cmark_chunk_literal(RIGHTSINGLEQUOTE);
544
- if (opener_found) {
545
- cmark_chunk_free(&opener->inl_text->as.literal);
546
- opener->inl_text->as.literal =
547
- cmark_chunk_literal(LEFTSINGLEQUOTE);
548
- }
549
- closer = closer->next;
550
- } else if (closer->delim_char == '"') {
551
- cmark_chunk_free(&closer->inl_text->as.literal);
552
- closer->inl_text->as.literal =
553
- cmark_chunk_literal(RIGHTDOUBLEQUOTE);
554
- if (opener_found) {
555
- cmark_chunk_free(&opener->inl_text->as.literal);
556
- opener->inl_text->as.literal =
557
- cmark_chunk_literal(LEFTDOUBLEQUOTE);
558
- }
559
- closer = closer->next;
560
- }
561
- if (!opener_found) {
562
- // set lower bound for future searches for openers:
563
- openers_bottom[old_closer->delim_char] = old_closer->previous;
564
- if (!old_closer->can_open) {
565
- // we can remove a closer that can't be an
566
- // opener, once we've seen there's no
567
- // matching opener:
568
- remove_delimiter(subj, old_closer);
569
- }
570
- }
571
- } else {
572
- closer = closer->next;
573
- }
574
- }
575
- // free all delimiters in list until stack_bottom:
576
- while (subj->last_delim != stack_bottom) {
577
- remove_delimiter(subj, subj->last_delim);
578
- }
469
+ static void process_emphasis(subject *subj, delimiter *stack_bottom) {
470
+ delimiter *closer = subj->last_delim;
471
+ delimiter *opener;
472
+ delimiter *old_closer;
473
+ bool opener_found;
474
+ delimiter *openers_bottom[128];
475
+
476
+ // initialize openers_bottom:
477
+ openers_bottom['*'] = stack_bottom;
478
+ openers_bottom['_'] = stack_bottom;
479
+ openers_bottom['\''] = stack_bottom;
480
+ openers_bottom['"'] = stack_bottom;
481
+
482
+ // move back to first relevant delim.
483
+ while (closer != NULL && closer->previous != stack_bottom) {
484
+ closer = closer->previous;
485
+ }
486
+
487
+ // now move forward, looking for closers, and handling each
488
+ while (closer != NULL) {
489
+ if (closer->can_close &&
490
+ (closer->delim_char == '*' || closer->delim_char == '_' ||
491
+ closer->delim_char == '"' || closer->delim_char == '\'')) {
492
+ // Now look backwards for first matching opener:
493
+ opener = closer->previous;
494
+ opener_found = false;
495
+ while (opener != NULL && opener != stack_bottom &&
496
+ opener != openers_bottom[closer->delim_char]) {
497
+ if (opener->delim_char == closer->delim_char && opener->can_open) {
498
+ opener_found = true;
499
+ break;
500
+ }
501
+ opener = opener->previous;
502
+ }
503
+ old_closer = closer;
504
+ if (closer->delim_char == '*' || closer->delim_char == '_') {
505
+ if (opener_found) {
506
+ closer = S_insert_emph(subj, opener, closer);
507
+ } else {
508
+ closer = closer->next;
509
+ }
510
+ } else if (closer->delim_char == '\'') {
511
+ cmark_chunk_free(&closer->inl_text->as.literal);
512
+ closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
513
+ if (opener_found) {
514
+ cmark_chunk_free(&opener->inl_text->as.literal);
515
+ opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
516
+ }
517
+ closer = closer->next;
518
+ } else if (closer->delim_char == '"') {
519
+ cmark_chunk_free(&closer->inl_text->as.literal);
520
+ closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
521
+ if (opener_found) {
522
+ cmark_chunk_free(&opener->inl_text->as.literal);
523
+ opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
524
+ }
525
+ closer = closer->next;
526
+ }
527
+ if (!opener_found) {
528
+ // set lower bound for future searches for openers:
529
+ openers_bottom[old_closer->delim_char] = old_closer->previous;
530
+ if (!old_closer->can_open) {
531
+ // we can remove a closer that can't be an
532
+ // opener, once we've seen there's no
533
+ // matching opener:
534
+ remove_delimiter(subj, old_closer);
535
+ }
536
+ }
537
+ } else {
538
+ closer = closer->next;
539
+ }
540
+ }
541
+ // free all delimiters in list until stack_bottom:
542
+ while (subj->last_delim != stack_bottom) {
543
+ remove_delimiter(subj, subj->last_delim);
544
+ }
579
545
  }
580
546
 
581
- static delimiter*
582
- S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
583
- {
584
- delimiter *delim, *tmp_delim;
585
- bufsize_t use_delims;
586
- cmark_node *opener_inl = opener->inl_text;
587
- cmark_node *closer_inl = closer->inl_text;
588
- bufsize_t opener_num_chars = opener_inl->as.literal.len;
589
- bufsize_t closer_num_chars = closer_inl->as.literal.len;
590
- cmark_node *tmp, *emph, *first_child, *last_child;
591
-
592
- // calculate the actual number of characters used from this closer
593
- if (closer_num_chars < 3 || opener_num_chars < 3) {
594
- use_delims = closer_num_chars <= opener_num_chars ?
595
- closer_num_chars : opener_num_chars;
596
- } else { // closer and opener both have >= 3 characters
597
- use_delims = closer_num_chars % 2 == 0 ? 2 : 1;
598
- }
599
-
600
- // remove used characters from associated inlines.
601
- opener_num_chars -= use_delims;
602
- closer_num_chars -= use_delims;
603
- opener_inl->as.literal.len = opener_num_chars;
604
- closer_inl->as.literal.len = closer_num_chars;
605
-
606
- // free delimiters between opener and closer
607
- delim = closer->previous;
608
- while (delim != NULL && delim != opener) {
609
- tmp_delim = delim->previous;
610
- remove_delimiter(subj, delim);
611
- delim = tmp_delim;
612
- }
613
-
614
- first_child = opener_inl->next;
615
- last_child = closer_inl->prev;
616
-
617
- // if opener has 0 characters, remove it and its associated inline
618
- if (opener_num_chars == 0) {
619
- // replace empty opener inline with emph
620
- cmark_chunk_free(&(opener_inl->as.literal));
621
- emph = opener_inl;
622
- emph->type = use_delims == 1 ?
623
- CMARK_NODE_EMPH : CMARK_NODE_STRONG;
624
- // remove opener from list
625
- remove_delimiter(subj, opener);
626
- } else {
627
- // create new emph or strong, and splice it in to our inlines
628
- // between the opener and closer
629
- emph = use_delims == 1 ? make_emph() : make_strong();
630
- emph->parent = opener_inl->parent;
631
- emph->prev = opener_inl;
632
- opener_inl->next = emph;
633
- }
634
-
635
- // push children below emph
636
- emph->next = closer_inl;
637
- closer_inl->prev = emph;
638
- emph->first_child = first_child;
639
- emph->last_child = last_child;
640
-
641
- // fix children pointers
642
- first_child->prev = NULL;
643
- last_child->next = NULL;
644
- for (tmp = first_child; tmp != NULL; tmp = tmp->next) {
645
- tmp->parent = emph;
646
- }
647
-
648
- // if closer has 0 characters, remove it and its associated inline
649
- if (closer_num_chars == 0) {
650
- // remove empty closer inline
651
- cmark_node_free(closer_inl);
652
- // remove closer from list
653
- tmp_delim = closer->next;
654
- remove_delimiter(subj, closer);
655
- closer = tmp_delim;
656
- }
657
-
658
- return closer;
547
+ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
548
+ delimiter *closer) {
549
+ delimiter *delim, *tmp_delim;
550
+ bufsize_t use_delims;
551
+ cmark_node *opener_inl = opener->inl_text;
552
+ cmark_node *closer_inl = closer->inl_text;
553
+ bufsize_t opener_num_chars = opener_inl->as.literal.len;
554
+ bufsize_t closer_num_chars = closer_inl->as.literal.len;
555
+ cmark_node *tmp, *emph, *first_child, *last_child;
556
+
557
+ // calculate the actual number of characters used from this closer
558
+ if (closer_num_chars < 3 || opener_num_chars < 3) {
559
+ use_delims = closer_num_chars <= opener_num_chars ? closer_num_chars
560
+ : opener_num_chars;
561
+ } else { // closer and opener both have >= 3 characters
562
+ use_delims = closer_num_chars % 2 == 0 ? 2 : 1;
563
+ }
564
+
565
+ // remove used characters from associated inlines.
566
+ opener_num_chars -= use_delims;
567
+ closer_num_chars -= use_delims;
568
+ opener_inl->as.literal.len = opener_num_chars;
569
+ closer_inl->as.literal.len = closer_num_chars;
570
+
571
+ // free delimiters between opener and closer
572
+ delim = closer->previous;
573
+ while (delim != NULL && delim != opener) {
574
+ tmp_delim = delim->previous;
575
+ remove_delimiter(subj, delim);
576
+ delim = tmp_delim;
577
+ }
578
+
579
+ first_child = opener_inl->next;
580
+ last_child = closer_inl->prev;
581
+
582
+ // if opener has 0 characters, remove it and its associated inline
583
+ if (opener_num_chars == 0) {
584
+ // replace empty opener inline with emph
585
+ cmark_chunk_free(&(opener_inl->as.literal));
586
+ emph = opener_inl;
587
+ emph->type = use_delims == 1 ? CMARK_NODE_EMPH : CMARK_NODE_STRONG;
588
+ // remove opener from list
589
+ remove_delimiter(subj, opener);
590
+ } else {
591
+ // create new emph or strong, and splice it in to our inlines
592
+ // between the opener and closer
593
+ emph = use_delims == 1 ? make_emph() : make_strong();
594
+ emph->parent = opener_inl->parent;
595
+ emph->prev = opener_inl;
596
+ opener_inl->next = emph;
597
+ }
598
+
599
+ // push children below emph
600
+ emph->next = closer_inl;
601
+ closer_inl->prev = emph;
602
+ emph->first_child = first_child;
603
+ emph->last_child = last_child;
604
+
605
+ // fix children pointers
606
+ first_child->prev = NULL;
607
+ last_child->next = NULL;
608
+ for (tmp = first_child; tmp != NULL; tmp = tmp->next) {
609
+ tmp->parent = emph;
610
+ }
611
+
612
+ // if closer has 0 characters, remove it and its associated inline
613
+ if (closer_num_chars == 0) {
614
+ // remove empty closer inline
615
+ cmark_node_free(closer_inl);
616
+ // remove closer from list
617
+ tmp_delim = closer->next;
618
+ remove_delimiter(subj, closer);
619
+ closer = tmp_delim;
620
+ }
621
+
622
+ return closer;
659
623
  }
660
624
 
661
625
  // Parse backslash-escape or just a backslash, returning an inline.
662
- static cmark_node* handle_backslash(subject *subj)
663
- {
664
- advance(subj);
665
- unsigned char nextchar = peek_char(subj);
666
- if (cmark_ispunct(nextchar)) { // only ascii symbols and newline can be escaped
667
- advance(subj);
668
- return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
669
- } else if (!is_eof(subj) && skip_line_end(subj)) {
670
- return make_linebreak();
671
- } else {
672
- return make_str(cmark_chunk_literal("\\"));
673
- }
626
+ static cmark_node *handle_backslash(subject *subj) {
627
+ advance(subj);
628
+ unsigned char nextchar = peek_char(subj);
629
+ if (cmark_ispunct(
630
+ nextchar)) { // only ascii symbols and newline can be escaped
631
+ advance(subj);
632
+ return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
633
+ } else if (!is_eof(subj) && skip_line_end(subj)) {
634
+ return make_linebreak();
635
+ } else {
636
+ return make_str(cmark_chunk_literal("\\"));
637
+ }
674
638
  }
675
639
 
676
640
  // Parse an entity or a regular "&" string.
677
641
  // Assumes the subject has an '&' character at the current position.
678
- static cmark_node* handle_entity(subject* subj)
679
- {
680
- cmark_strbuf ent = GH_BUF_INIT;
681
- bufsize_t len;
642
+ static cmark_node *handle_entity(subject *subj) {
643
+ cmark_strbuf ent = GH_BUF_INIT;
644
+ bufsize_t len;
682
645
 
683
- advance(subj);
646
+ advance(subj);
684
647
 
685
- len = houdini_unescape_ent(&ent,
686
- subj->input.data + subj->pos,
687
- subj->input.len - subj->pos
688
- );
648
+ len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
649
+ subj->input.len - subj->pos);
689
650
 
690
- if (len == 0)
691
- return make_str(cmark_chunk_literal("&"));
651
+ if (len == 0)
652
+ return make_str(cmark_chunk_literal("&"));
692
653
 
693
- subj->pos += len;
694
- return make_str(cmark_chunk_buf_detach(&ent));
654
+ subj->pos += len;
655
+ return make_str(cmark_chunk_buf_detach(&ent));
695
656
  }
696
657
 
697
658
  // Clean a URL: remove surrounding whitespace and surrounding <>,
698
659
  // and remove \ that escape punctuation.
699
- cmark_chunk cmark_clean_url(cmark_chunk *url)
700
- {
701
- cmark_strbuf buf = GH_BUF_INIT;
660
+ cmark_chunk cmark_clean_url(cmark_chunk *url) {
661
+ cmark_strbuf buf = GH_BUF_INIT;
702
662
 
703
- cmark_chunk_trim(url);
663
+ cmark_chunk_trim(url);
704
664
 
705
- if (url->len == 0) {
706
- cmark_chunk result = CMARK_CHUNK_EMPTY;
707
- return result;
708
- }
665
+ if (url->len == 0) {
666
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
667
+ return result;
668
+ }
709
669
 
710
- if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
711
- houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
712
- } else {
713
- houdini_unescape_html_f(&buf, url->data, url->len);
714
- }
670
+ if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
671
+ houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
672
+ } else {
673
+ houdini_unescape_html_f(&buf, url->data, url->len);
674
+ }
715
675
 
716
- cmark_strbuf_unescape(&buf);
717
- return cmark_chunk_buf_detach(&buf);
676
+ cmark_strbuf_unescape(&buf);
677
+ return cmark_chunk_buf_detach(&buf);
718
678
  }
719
679
 
720
- cmark_chunk cmark_clean_title(cmark_chunk *title)
721
- {
722
- cmark_strbuf buf = GH_BUF_INIT;
723
- unsigned char first, last;
724
-
725
- if (title->len == 0) {
726
- cmark_chunk result = CMARK_CHUNK_EMPTY;
727
- return result;
728
- }
729
-
730
- first = title->data[0];
731
- last = title->data[title->len - 1];
732
-
733
- // remove surrounding quotes if any:
734
- if ((first == '\'' && last == '\'') ||
735
- (first == '(' && last == ')') ||
736
- (first == '"' && last == '"')) {
737
- houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
738
- } else {
739
- houdini_unescape_html_f(&buf, title->data, title->len);
740
- }
741
-
742
- cmark_strbuf_unescape(&buf);
743
- return cmark_chunk_buf_detach(&buf);
680
+ cmark_chunk cmark_clean_title(cmark_chunk *title) {
681
+ cmark_strbuf buf = GH_BUF_INIT;
682
+ unsigned char first, last;
683
+
684
+ if (title->len == 0) {
685
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
686
+ return result;
687
+ }
688
+
689
+ first = title->data[0];
690
+ last = title->data[title->len - 1];
691
+
692
+ // remove surrounding quotes if any:
693
+ if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
694
+ (first == '"' && last == '"')) {
695
+ houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
696
+ } else {
697
+ houdini_unescape_html_f(&buf, title->data, title->len);
698
+ }
699
+
700
+ cmark_strbuf_unescape(&buf);
701
+ return cmark_chunk_buf_detach(&buf);
744
702
  }
745
703
 
746
704
  // Parse an autolink or HTML tag.
747
705
  // Assumes the subject has a '<' character at the current position.
748
- static cmark_node* handle_pointy_brace(subject* subj)
749
- {
750
- bufsize_t matchlen = 0;
751
- cmark_chunk contents;
752
-
753
- advance(subj); // advance past first <
754
-
755
- // first try to match a URL autolink
756
- matchlen = scan_autolink_uri(&subj->input, subj->pos);
757
- if (matchlen > 0) {
758
- contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
759
- subj->pos += matchlen;
760
-
761
- return make_autolink(contents, 0);
762
- }
763
-
764
- // next try to match an email autolink
765
- matchlen = scan_autolink_email(&subj->input, subj->pos);
766
- if (matchlen > 0) {
767
- contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
768
- subj->pos += matchlen;
769
-
770
- return make_autolink(contents, 1);
771
- }
772
-
773
- // finally, try to match an html tag
774
- matchlen = scan_html_tag(&subj->input, subj->pos);
775
- if (matchlen > 0) {
776
- contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
777
- subj->pos += matchlen;
778
- return make_raw_html(contents);
779
- }
780
-
781
- // if nothing matches, just return the opening <:
782
- return make_str(cmark_chunk_literal("<"));
706
+ static cmark_node *handle_pointy_brace(subject *subj) {
707
+ bufsize_t matchlen = 0;
708
+ cmark_chunk contents;
709
+
710
+ advance(subj); // advance past first <
711
+
712
+ // first try to match a URL autolink
713
+ matchlen = scan_autolink_uri(&subj->input, subj->pos);
714
+ if (matchlen > 0) {
715
+ contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
716
+ subj->pos += matchlen;
717
+
718
+ return make_autolink(contents, 0);
719
+ }
720
+
721
+ // next try to match an email autolink
722
+ matchlen = scan_autolink_email(&subj->input, subj->pos);
723
+ if (matchlen > 0) {
724
+ contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
725
+ subj->pos += matchlen;
726
+
727
+ return make_autolink(contents, 1);
728
+ }
729
+
730
+ // finally, try to match an html tag
731
+ matchlen = scan_html_tag(&subj->input, subj->pos);
732
+ if (matchlen > 0) {
733
+ contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
734
+ subj->pos += matchlen;
735
+ return make_raw_html(contents);
736
+ }
737
+
738
+ // if nothing matches, just return the opening <:
739
+ return make_str(cmark_chunk_literal("<"));
783
740
  }
784
741
 
785
742
  // Parse a link label. Returns 1 if successful.
786
743
  // Note: unescaped brackets are not allowed in labels.
787
744
  // The label begins with `[` and ends with the first `]` character
788
745
  // encountered. Backticks in labels do not start code spans.
789
- static int link_label(subject* subj, cmark_chunk *raw_label)
790
- {
791
- bufsize_t startpos = subj->pos;
792
- int length = 0;
793
- unsigned char c;
794
-
795
- // advance past [
796
- if (peek_char(subj) == '[') {
797
- advance(subj);
798
- } else {
799
- return 0;
800
- }
801
-
802
- while ((c = peek_char(subj)) && c != '[' && c != ']') {
803
- if (c == '\\') {
804
- advance(subj);
805
- length++;
806
- if (cmark_ispunct(peek_char(subj))) {
807
- advance(subj);
808
- length++;
809
- }
810
- } else {
811
- advance(subj);
812
- length++;
813
- }
814
- if (length > MAX_LINK_LABEL_LENGTH) {
815
- goto noMatch;
816
- }
817
- }
818
-
819
- if (c == ']') { // match found
820
- *raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
821
- cmark_chunk_trim(raw_label);
822
- advance(subj); // advance past ]
823
- return 1;
824
- }
746
+ static int link_label(subject *subj, cmark_chunk *raw_label) {
747
+ bufsize_t startpos = subj->pos;
748
+ int length = 0;
749
+ unsigned char c;
750
+
751
+ // advance past [
752
+ if (peek_char(subj) == '[') {
753
+ advance(subj);
754
+ } else {
755
+ return 0;
756
+ }
757
+
758
+ while ((c = peek_char(subj)) && c != '[' && c != ']') {
759
+ if (c == '\\') {
760
+ advance(subj);
761
+ length++;
762
+ if (cmark_ispunct(peek_char(subj))) {
763
+ advance(subj);
764
+ length++;
765
+ }
766
+ } else {
767
+ advance(subj);
768
+ length++;
769
+ }
770
+ if (length > MAX_LINK_LABEL_LENGTH) {
771
+ goto noMatch;
772
+ }
773
+ }
774
+
775
+ if (c == ']') { // match found
776
+ *raw_label =
777
+ cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
778
+ cmark_chunk_trim(raw_label);
779
+ advance(subj); // advance past ]
780
+ return 1;
781
+ }
825
782
 
826
783
  noMatch:
827
- subj->pos = startpos; // rewind
828
- return 0;
829
-
784
+ subj->pos = startpos; // rewind
785
+ return 0;
830
786
  }
831
787
 
832
788
  // Return a link, an image, or a literal close bracket.
833
- static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
834
- {
835
- bufsize_t initial_pos;
836
- bufsize_t starturl, endurl, starttitle, endtitle, endall;
837
- bufsize_t n;
838
- bufsize_t sps;
839
- cmark_reference *ref;
840
- bool is_image = false;
841
- cmark_chunk url_chunk, title_chunk;
842
- cmark_chunk url, title;
843
- delimiter *opener;
844
- cmark_node *link_text;
845
- cmark_node *inl;
846
- cmark_chunk raw_label;
847
- int found_label;
848
-
849
- advance(subj); // advance past ]
850
- initial_pos = subj->pos;
851
-
852
- // look through list of delimiters for a [ or !
853
- opener = subj->last_delim;
854
- while (opener) {
855
- if (opener->delim_char == '[' || opener->delim_char == '!') {
856
- break;
857
- }
858
- opener = opener->previous;
859
- }
860
-
861
- if (opener == NULL) {
862
- return make_str(cmark_chunk_literal("]"));
863
- }
864
-
865
- if (!opener->active) {
866
- // take delimiter off stack
867
- remove_delimiter(subj, opener);
868
- return make_str(cmark_chunk_literal("]"));
869
- }
870
-
871
- // If we got here, we matched a potential link/image text.
872
- is_image = opener->delim_char == '!';
873
- link_text = opener->inl_text->next;
874
-
875
- // Now we check to see if it's a link/image.
876
-
877
- // First, look for an inline link.
878
- if (peek_char(subj) == '(' &&
879
- ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
880
- ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
881
-
882
- // try to parse an explicit link:
883
- starturl = subj->pos + 1 + sps; // after (
884
- endurl = starturl + n;
885
- starttitle = endurl + scan_spacechars(&subj->input, endurl);
886
-
887
- // ensure there are spaces btw url and title
888
- endtitle = (starttitle == endurl) ? starttitle :
889
- starttitle + scan_link_title(&subj->input, starttitle);
890
-
891
- endall = endtitle + scan_spacechars(&subj->input, endtitle);
892
-
893
- if (peek_at(subj, endall) == ')') {
894
- subj->pos = endall + 1;
895
-
896
- url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
897
- title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
898
- url = cmark_clean_url(&url_chunk);
899
- title = cmark_clean_title(&title_chunk);
900
- cmark_chunk_free(&url_chunk);
901
- cmark_chunk_free(&title_chunk);
902
- goto match;
903
-
904
- } else {
905
- goto noMatch;
906
- }
907
- }
908
-
909
- // Next, look for a following [link label] that matches in refmap.
910
- // skip spaces
911
- subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
912
- raw_label = cmark_chunk_literal("");
913
- found_label = link_label(subj, &raw_label);
914
- if (!found_label || raw_label.len == 0) {
915
- cmark_chunk_free(&raw_label);
916
- raw_label = cmark_chunk_dup(&subj->input, opener->position,
917
- initial_pos - opener->position - 1);
918
- }
919
-
920
- if (!found_label) {
921
- // If we have a shortcut reference link, back up
922
- // to before the spacse we skipped.
923
- subj->pos = initial_pos;
924
- }
925
-
926
- ref = cmark_reference_lookup(subj->refmap, &raw_label);
927
- cmark_chunk_free(&raw_label);
928
-
929
- if (ref != NULL) { // found
930
- url = chunk_clone(&ref->url);
931
- title = chunk_clone(&ref->title);
932
- goto match;
933
- } else {
934
- goto noMatch;
935
- }
789
+ static cmark_node *handle_close_bracket(subject *subj, cmark_node *parent) {
790
+ bufsize_t initial_pos;
791
+ bufsize_t starturl, endurl, starttitle, endtitle, endall;
792
+ bufsize_t n;
793
+ bufsize_t sps;
794
+ cmark_reference *ref;
795
+ bool is_image = false;
796
+ cmark_chunk url_chunk, title_chunk;
797
+ cmark_chunk url, title;
798
+ delimiter *opener;
799
+ cmark_node *link_text;
800
+ cmark_node *inl;
801
+ cmark_chunk raw_label;
802
+ int found_label;
803
+
804
+ advance(subj); // advance past ]
805
+ initial_pos = subj->pos;
806
+
807
+ // look through list of delimiters for a [ or !
808
+ opener = subj->last_delim;
809
+ while (opener) {
810
+ if (opener->delim_char == '[' || opener->delim_char == '!') {
811
+ break;
812
+ }
813
+ opener = opener->previous;
814
+ }
815
+
816
+ if (opener == NULL) {
817
+ return make_str(cmark_chunk_literal("]"));
818
+ }
819
+
820
+ if (!opener->active) {
821
+ // take delimiter off stack
822
+ remove_delimiter(subj, opener);
823
+ return make_str(cmark_chunk_literal("]"));
824
+ }
825
+
826
+ // If we got here, we matched a potential link/image text.
827
+ is_image = opener->delim_char == '!';
828
+ link_text = opener->inl_text->next;
829
+
830
+ // Now we check to see if it's a link/image.
831
+
832
+ // First, look for an inline link.
833
+ if (peek_char(subj) == '(' &&
834
+ ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
835
+ ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
836
+
837
+ // try to parse an explicit link:
838
+ starturl = subj->pos + 1 + sps; // after (
839
+ endurl = starturl + n;
840
+ starttitle = endurl + scan_spacechars(&subj->input, endurl);
841
+
842
+ // ensure there are spaces btw url and title
843
+ endtitle = (starttitle == endurl)
844
+ ? starttitle
845
+ : starttitle + scan_link_title(&subj->input, starttitle);
846
+
847
+ endall = endtitle + scan_spacechars(&subj->input, endtitle);
848
+
849
+ if (peek_at(subj, endall) == ')') {
850
+ subj->pos = endall + 1;
851
+
852
+ url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
853
+ title_chunk =
854
+ cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
855
+ url = cmark_clean_url(&url_chunk);
856
+ title = cmark_clean_title(&title_chunk);
857
+ cmark_chunk_free(&url_chunk);
858
+ cmark_chunk_free(&title_chunk);
859
+ goto match;
860
+
861
+ } else {
862
+ goto noMatch;
863
+ }
864
+ }
865
+
866
+ // Next, look for a following [link label] that matches in refmap.
867
+ // skip spaces
868
+ subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
869
+ raw_label = cmark_chunk_literal("");
870
+ found_label = link_label(subj, &raw_label);
871
+ if (!found_label || raw_label.len == 0) {
872
+ cmark_chunk_free(&raw_label);
873
+ raw_label = cmark_chunk_dup(&subj->input, opener->position,
874
+ initial_pos - opener->position - 1);
875
+ }
876
+
877
+ if (!found_label) {
878
+ // If we have a shortcut reference link, back up
879
+ // to before the spacse we skipped.
880
+ subj->pos = initial_pos;
881
+ }
882
+
883
+ ref = cmark_reference_lookup(subj->refmap, &raw_label);
884
+ cmark_chunk_free(&raw_label);
885
+
886
+ if (ref != NULL) { // found
887
+ url = chunk_clone(&ref->url);
888
+ title = chunk_clone(&ref->title);
889
+ goto match;
890
+ } else {
891
+ goto noMatch;
892
+ }
936
893
 
937
894
  noMatch:
938
- // If we fall through to here, it means we didn't match a link:
939
- remove_delimiter(subj, opener); // remove this opener from delimiter list
940
- subj->pos = initial_pos;
941
- return make_str(cmark_chunk_literal("]"));
895
+ // If we fall through to here, it means we didn't match a link:
896
+ remove_delimiter(subj, opener); // remove this opener from delimiter list
897
+ subj->pos = initial_pos;
898
+ return make_str(cmark_chunk_literal("]"));
942
899
 
943
900
  match:
944
- inl = opener->inl_text;
945
- inl->type = is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK;
946
- cmark_chunk_free(&inl->as.literal);
947
- inl->first_child = link_text;
948
- process_emphasis(subj, opener);
949
- inl->as.link.url = url;
950
- inl->as.link.title = title;
951
- inl->next = NULL;
952
- if (link_text) {
953
- cmark_node *tmp;
954
- link_text->prev = NULL;
955
- for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
956
- tmp->parent = inl;
957
- }
958
- tmp->parent = inl;
959
- inl->last_child = tmp;
960
- }
961
- parent->last_child = inl;
962
-
963
- // Now, if we have a link, we also want to deactivate earlier link
964
- // delimiters. (This code can be removed if we decide to allow links
965
- // inside links.)
966
- remove_delimiter(subj, opener);
967
- if (!is_image) {
968
- opener = subj->last_delim;
969
- while (opener != NULL) {
970
- if (opener->delim_char == '[') {
971
- if (!opener->active) {
972
- break;
973
- } else {
974
- opener->active = false;
975
- }
976
- }
977
- opener = opener->previous;
978
- }
979
- }
980
-
981
- return NULL;
901
+ inl = opener->inl_text;
902
+ inl->type = is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK;
903
+ cmark_chunk_free(&inl->as.literal);
904
+ inl->first_child = link_text;
905
+ process_emphasis(subj, opener);
906
+ inl->as.link.url = url;
907
+ inl->as.link.title = title;
908
+ inl->next = NULL;
909
+ if (link_text) {
910
+ cmark_node *tmp;
911
+ link_text->prev = NULL;
912
+ for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
913
+ tmp->parent = inl;
914
+ }
915
+ tmp->parent = inl;
916
+ inl->last_child = tmp;
917
+ }
918
+ parent->last_child = inl;
919
+
920
+ // Now, if we have a link, we also want to deactivate earlier link
921
+ // delimiters. (This code can be removed if we decide to allow links
922
+ // inside links.)
923
+ remove_delimiter(subj, opener);
924
+ if (!is_image) {
925
+ opener = subj->last_delim;
926
+ while (opener != NULL) {
927
+ if (opener->delim_char == '[') {
928
+ if (!opener->active) {
929
+ break;
930
+ } else {
931
+ opener->active = false;
932
+ }
933
+ }
934
+ opener = opener->previous;
935
+ }
936
+ }
937
+
938
+ return NULL;
982
939
  }
983
940
 
984
941
  // Parse a hard or soft linebreak, returning an inline.
985
- // Assumes the subject has a newline at the current position.
986
- static cmark_node* handle_newline(subject *subj)
987
- {
988
- bufsize_t nlpos = subj->pos;
989
- // skip over newline
990
- advance(subj);
991
- // skip spaces at beginning of line
992
- skip_spaces(subj);
993
- if (nlpos > 1 &&
994
- peek_at(subj, nlpos - 1) == ' ' &&
995
- peek_at(subj, nlpos - 2) == ' ') {
996
- return make_linebreak();
997
- } else {
998
- return make_softbreak();
999
- }
942
+ // Assumes the subject has a cr or newline at the current position.
943
+ static cmark_node *handle_newline(subject *subj) {
944
+ bufsize_t nlpos = subj->pos;
945
+ // skip over cr, crlf, or lf:
946
+ if (peek_at(subj, subj->pos) == '\r') {
947
+ advance(subj);
948
+ }
949
+ if (peek_at(subj, subj->pos) == '\n') {
950
+ advance(subj);
951
+ }
952
+ // skip spaces at beginning of line
953
+ skip_spaces(subj);
954
+ if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
955
+ peek_at(subj, nlpos - 2) == ' ') {
956
+ return make_linebreak();
957
+ } else {
958
+ return make_softbreak();
959
+ }
1000
960
  }
1001
961
 
1002
- static bufsize_t subject_find_special_char(subject *subj, int options)
1003
- {
1004
- // "\r\n\\`&_*[]<!"
1005
- static const int8_t SPECIAL_CHARS[256] = {
1006
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
1007
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1008
- 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1009
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
1010
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1011
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1012
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1013
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1014
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1015
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1016
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1017
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1018
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1019
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1020
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1021
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1022
- };
1023
-
1024
- // " ' . -
1025
- static const char SMART_PUNCT_CHARS[] = {
1026
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1027
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1028
- 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
1029
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1030
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1031
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1032
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1033
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1034
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1035
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1036
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1037
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1038
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1039
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1040
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1041
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1042
- };
1043
-
1044
- bufsize_t n = subj->pos + 1;
1045
-
1046
- while (n < subj->input.len) {
1047
- if (SPECIAL_CHARS[subj->input.data[n]])
1048
- return n;
1049
- if (options & CMARK_OPT_SMART &&
1050
- SMART_PUNCT_CHARS[subj->input.data[n]])
1051
- return n;
1052
- n++;
1053
- }
1054
-
1055
- return subj->input.len;
962
+ static bufsize_t subject_find_special_char(subject *subj, int options) {
963
+ // "\r\n\\`&_*[]<!"
964
+ static const int8_t SPECIAL_CHARS[256] = {
965
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
966
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
967
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
968
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
969
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
970
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
971
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
972
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
973
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
974
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
975
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
976
+
977
+ // " ' . -
978
+ static const char SMART_PUNCT_CHARS[] = {
979
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
980
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
981
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
982
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
983
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
984
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
985
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
986
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
987
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
988
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
989
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
990
+ };
991
+
992
+ bufsize_t n = subj->pos + 1;
993
+
994
+ while (n < subj->input.len) {
995
+ if (SPECIAL_CHARS[subj->input.data[n]])
996
+ return n;
997
+ if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
998
+ return n;
999
+ n++;
1000
+ }
1001
+
1002
+ return subj->input.len;
1056
1003
  }
1057
1004
 
1058
1005
  // Parse an inline, advancing subject, and add it as a child of parent.
1059
1006
  // Return 0 if no inline can be parsed, 1 otherwise.
1060
- static int parse_inline(subject* subj, cmark_node * parent, int options)
1061
- {
1062
- cmark_node* new_inl = NULL;
1063
- cmark_chunk contents;
1064
- unsigned char c;
1065
- bufsize_t endpos;
1066
- c = peek_char(subj);
1067
- if (c == 0) {
1068
- return 0;
1069
- }
1070
- switch(c) {
1071
- case '\r':
1072
- case '\n':
1073
- new_inl = handle_newline(subj);
1074
- break;
1075
- case '`':
1076
- new_inl = handle_backticks(subj);
1077
- break;
1078
- case '\\':
1079
- new_inl = handle_backslash(subj);
1080
- break;
1081
- case '&':
1082
- new_inl = handle_entity(subj);
1083
- break;
1084
- case '<':
1085
- new_inl = handle_pointy_brace(subj);
1086
- break;
1087
- case '*':
1088
- case '_':
1089
- case '\'':
1090
- case '"':
1091
- new_inl = handle_delim(subj, c, options & CMARK_OPT_SMART);
1092
- break;
1093
- case '-':
1094
- new_inl = handle_hyphen(subj, options & CMARK_OPT_SMART);
1095
- break;
1096
- case '.':
1097
- new_inl = handle_period(subj, options & CMARK_OPT_SMART);
1098
- break;
1099
- case '[':
1100
- advance(subj);
1101
- new_inl = make_str(cmark_chunk_literal("["));
1102
- push_delimiter(subj, '[', true, false, new_inl);
1103
- break;
1104
- case ']':
1105
- new_inl = handle_close_bracket(subj, parent);
1106
- break;
1107
- case '!':
1108
- advance(subj);
1109
- if (peek_char(subj) == '[') {
1110
- advance(subj);
1111
- new_inl = make_str(cmark_chunk_literal("!["));
1112
- push_delimiter(subj, '!', false, true, new_inl);
1113
- } else {
1114
- new_inl = make_str(cmark_chunk_literal("!"));
1115
- }
1116
- break;
1117
- default:
1118
- endpos = subject_find_special_char(subj, options);
1119
- contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1120
- subj->pos = endpos;
1121
-
1122
- // if we're at a newline, strip trailing spaces.
1123
- if (S_is_line_end_char(peek_char(subj))) {
1124
- cmark_chunk_rtrim(&contents);
1125
- }
1126
-
1127
- new_inl = make_str(contents);
1128
- }
1129
- if (new_inl != NULL) {
1130
- cmark_node_append_child(parent, new_inl);
1131
- }
1132
-
1133
- return 1;
1007
+ static int parse_inline(subject *subj, cmark_node *parent, int options) {
1008
+ cmark_node *new_inl = NULL;
1009
+ cmark_chunk contents;
1010
+ unsigned char c;
1011
+ bufsize_t endpos;
1012
+ c = peek_char(subj);
1013
+ if (c == 0) {
1014
+ return 0;
1015
+ }
1016
+ switch (c) {
1017
+ case '\r':
1018
+ case '\n':
1019
+ new_inl = handle_newline(subj);
1020
+ break;
1021
+ case '`':
1022
+ new_inl = handle_backticks(subj);
1023
+ break;
1024
+ case '\\':
1025
+ new_inl = handle_backslash(subj);
1026
+ break;
1027
+ case '&':
1028
+ new_inl = handle_entity(subj);
1029
+ break;
1030
+ case '<':
1031
+ new_inl = handle_pointy_brace(subj);
1032
+ break;
1033
+ case '*':
1034
+ case '_':
1035
+ case '\'':
1036
+ case '"':
1037
+ new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
1038
+ break;
1039
+ case '-':
1040
+ new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
1041
+ break;
1042
+ case '.':
1043
+ new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
1044
+ break;
1045
+ case '[':
1046
+ advance(subj);
1047
+ new_inl = make_str(cmark_chunk_literal("["));
1048
+ push_delimiter(subj, '[', true, false, new_inl);
1049
+ break;
1050
+ case ']':
1051
+ new_inl = handle_close_bracket(subj, parent);
1052
+ break;
1053
+ case '!':
1054
+ advance(subj);
1055
+ if (peek_char(subj) == '[') {
1056
+ advance(subj);
1057
+ new_inl = make_str(cmark_chunk_literal("!["));
1058
+ push_delimiter(subj, '!', false, true, new_inl);
1059
+ } else {
1060
+ new_inl = make_str(cmark_chunk_literal("!"));
1061
+ }
1062
+ break;
1063
+ default:
1064
+ endpos = subject_find_special_char(subj, options);
1065
+ contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1066
+ subj->pos = endpos;
1067
+
1068
+ // if we're at a newline, strip trailing spaces.
1069
+ if (S_is_line_end_char(peek_char(subj))) {
1070
+ cmark_chunk_rtrim(&contents);
1071
+ }
1072
+
1073
+ new_inl = make_str(contents);
1074
+ }
1075
+ if (new_inl != NULL) {
1076
+ cmark_node_append_child(parent, new_inl);
1077
+ }
1078
+
1079
+ return 1;
1134
1080
  }
1135
1081
 
1136
1082
  // Parse inlines from parent's string_content, adding as children of parent.
1137
- extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options)
1138
- {
1139
- subject subj;
1140
- subject_from_buf(&subj, &parent->string_content, refmap);
1141
- cmark_chunk_rtrim(&subj.input);
1083
+ extern void cmark_parse_inlines(cmark_node *parent, cmark_reference_map *refmap,
1084
+ int options) {
1085
+ subject subj;
1086
+ subject_from_buf(&subj, &parent->string_content, refmap);
1087
+ cmark_chunk_rtrim(&subj.input);
1142
1088
 
1143
- while (!is_eof(&subj) && parse_inline(&subj, parent, options)) ;
1089
+ while (!is_eof(&subj) && parse_inline(&subj, parent, options))
1090
+ ;
1144
1091
 
1145
- process_emphasis(&subj, NULL);
1092
+ process_emphasis(&subj, NULL);
1146
1093
  }
1147
1094
 
1148
1095
  // Parse zero or more space characters, including at most one newline.
1149
- static void spnl(subject* subj)
1150
- {
1151
- skip_spaces(subj);
1152
- if (skip_line_end(subj)) {
1153
- skip_spaces(subj);
1154
- }
1096
+ static void spnl(subject *subj) {
1097
+ skip_spaces(subj);
1098
+ if (skip_line_end(subj)) {
1099
+ skip_spaces(subj);
1100
+ }
1155
1101
  }
1156
1102
 
1157
1103
  // Parse reference. Assumes string begins with '[' character.
1158
1104
  // Modify refmap if a reference is encountered.
1159
1105
  // Return 0 if no reference found, otherwise position of subject
1160
1106
  // after reference is parsed.
1161
- bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap)
1162
- {
1163
- subject subj;
1164
-
1165
- cmark_chunk lab;
1166
- cmark_chunk url;
1167
- cmark_chunk title;
1168
-
1169
- bufsize_t matchlen = 0;
1170
- bufsize_t beforetitle;
1171
-
1172
- subject_from_buf(&subj, input, NULL);
1173
-
1174
- // parse label:
1175
- if (!link_label(&subj, &lab) || lab.len == 0)
1176
- return 0;
1177
-
1178
- // colon:
1179
- if (peek_char(&subj) == ':') {
1180
- advance(&subj);
1181
- } else {
1182
- return 0;
1183
- }
1184
-
1185
- // parse link url:
1186
- spnl(&subj);
1187
- matchlen = scan_link_url(&subj.input, subj.pos);
1188
- if (matchlen) {
1189
- url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1190
- subj.pos += matchlen;
1191
- } else {
1192
- return 0;
1193
- }
1194
-
1195
- // parse optional link_title
1196
- beforetitle = subj.pos;
1197
- spnl(&subj);
1198
- matchlen = scan_link_title(&subj.input, subj.pos);
1199
- if (matchlen) {
1200
- title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1201
- subj.pos += matchlen;
1202
- } else {
1203
- subj.pos = beforetitle;
1204
- title = cmark_chunk_literal("");
1205
- }
1206
-
1207
- // parse final spaces and newline:
1208
- skip_spaces(&subj);
1209
- if (!skip_line_end(&subj)) {
1210
- if (matchlen) { // try rewinding before title
1211
- subj.pos = beforetitle;
1212
- skip_spaces(&subj);
1213
- if (!skip_line_end(&subj)) {
1214
- return 0;
1215
- }
1216
- } else {
1217
- return 0;
1218
- }
1219
- }
1220
- // insert reference into refmap
1221
- cmark_reference_create(refmap, &lab, &url, &title);
1222
- return subj.pos;
1107
+ bufsize_t cmark_parse_reference_inline(cmark_strbuf *input,
1108
+ cmark_reference_map *refmap) {
1109
+ subject subj;
1110
+
1111
+ cmark_chunk lab;
1112
+ cmark_chunk url;
1113
+ cmark_chunk title;
1114
+
1115
+ bufsize_t matchlen = 0;
1116
+ bufsize_t beforetitle;
1117
+
1118
+ subject_from_buf(&subj, input, NULL);
1119
+
1120
+ // parse label:
1121
+ if (!link_label(&subj, &lab) || lab.len == 0)
1122
+ return 0;
1123
+
1124
+ // colon:
1125
+ if (peek_char(&subj) == ':') {
1126
+ advance(&subj);
1127
+ } else {
1128
+ return 0;
1129
+ }
1130
+
1131
+ // parse link url:
1132
+ spnl(&subj);
1133
+ matchlen = scan_link_url(&subj.input, subj.pos);
1134
+ if (matchlen) {
1135
+ url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1136
+ subj.pos += matchlen;
1137
+ } else {
1138
+ return 0;
1139
+ }
1140
+
1141
+ // parse optional link_title
1142
+ beforetitle = subj.pos;
1143
+ spnl(&subj);
1144
+ matchlen = scan_link_title(&subj.input, subj.pos);
1145
+ if (matchlen) {
1146
+ title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1147
+ subj.pos += matchlen;
1148
+ } else {
1149
+ subj.pos = beforetitle;
1150
+ title = cmark_chunk_literal("");
1151
+ }
1152
+
1153
+ // parse final spaces and newline:
1154
+ skip_spaces(&subj);
1155
+ if (!skip_line_end(&subj)) {
1156
+ if (matchlen) { // try rewinding before title
1157
+ subj.pos = beforetitle;
1158
+ skip_spaces(&subj);
1159
+ if (!skip_line_end(&subj)) {
1160
+ return 0;
1161
+ }
1162
+ } else {
1163
+ return 0;
1164
+ }
1165
+ }
1166
+ // insert reference into refmap
1167
+ cmark_reference_create(refmap, &lab, &url, &title);
1168
+ return subj.pos;
1223
1169
  }