commonmarker 0.16.8 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/commonmarker/cmark/CMakeLists.txt +3 -3
- data/ext/commonmarker/cmark/README.md +2 -2
- data/ext/commonmarker/cmark/api_test/CMakeLists.txt +2 -1
- data/ext/commonmarker/cmark/api_test/harness.c +27 -0
- data/ext/commonmarker/cmark/api_test/main.c +179 -3
- data/ext/commonmarker/cmark/changelog.txt +148 -0
- data/ext/commonmarker/cmark/extensions/autolink.c +8 -0
- data/ext/commonmarker/cmark/extensions/core-extensions.c +11 -1
- data/ext/commonmarker/cmark/extensions/core-extensions.h +1 -1
- data/ext/commonmarker/cmark/extensions/strikethrough.c +5 -0
- data/ext/commonmarker/cmark/extensions/table.c +44 -23
- data/ext/commonmarker/cmark/src/blocks.c +3 -2
- data/ext/commonmarker/cmark/src/cmark_extension_api.h +9 -0
- data/ext/commonmarker/cmark/src/inlines.c +208 -93
- data/ext/commonmarker/cmark/src/inlines.h +2 -2
- data/ext/commonmarker/cmark/src/iterator.c +1 -0
- data/ext/commonmarker/cmark/src/latex.c +11 -11
- data/ext/commonmarker/cmark/src/main.c +12 -11
- data/ext/commonmarker/cmark/src/node.h +1 -0
- data/ext/commonmarker/cmark/src/scanners.c +34 -24
- data/ext/commonmarker/cmark/src/scanners.re +1 -1
- data/ext/commonmarker/cmark/src/syntax_extension.c +5 -0
- data/ext/commonmarker/cmark/src/syntax_extension.h +1 -0
- data/ext/commonmarker/cmark/test/CMakeLists.txt +3 -2
- data/ext/commonmarker/cmark/test/cmark.py +2 -5
- data/ext/commonmarker/cmark/test/regression.txt +35 -1
- data/ext/commonmarker/cmark/test/smart_punct.txt +9 -0
- data/ext/commonmarker/cmark/test/spec.txt +88 -26
- data/ext/commonmarker/commonmarker.c +1 -1
- data/ext/commonmarker/extconf.rb +1 -1
- data/lib/commonmarker/version.rb +1 -1
- data/test/test_attributes.rb +1 -80
- metadata +2 -2
@@ -149,6 +149,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
|
|
149
149
|
size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
|
150
150
|
uint8_t *data = chunk->data + max_rewind;
|
151
151
|
size_t size = chunk->len - max_rewind;
|
152
|
+
int start = cmark_inline_parser_get_column(inline_parser);
|
152
153
|
|
153
154
|
size_t link_end;
|
154
155
|
|
@@ -187,6 +188,13 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
|
|
187
188
|
cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
|
188
189
|
cmark_node_append_child(node, text);
|
189
190
|
|
191
|
+
node->start_line = text->start_line =
|
192
|
+
node->end_line = text->end_line =
|
193
|
+
cmark_inline_parser_get_line(inline_parser);
|
194
|
+
|
195
|
+
node->start_column = text->start_column = start - 1;
|
196
|
+
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
|
197
|
+
|
190
198
|
return node;
|
191
199
|
}
|
192
200
|
|
@@ -3,8 +3,9 @@
|
|
3
3
|
#include "strikethrough.h"
|
4
4
|
#include "table.h"
|
5
5
|
#include "tagfilter.h"
|
6
|
+
#include "registry.h"
|
6
7
|
|
7
|
-
int core_extensions_registration(cmark_plugin *plugin) {
|
8
|
+
static int core_extensions_registration(cmark_plugin *plugin) {
|
8
9
|
cmark_plugin_register_syntax_extension(plugin, create_table_extension());
|
9
10
|
cmark_plugin_register_syntax_extension(plugin,
|
10
11
|
create_strikethrough_extension());
|
@@ -12,3 +13,12 @@ int core_extensions_registration(cmark_plugin *plugin) {
|
|
12
13
|
cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension());
|
13
14
|
return 1;
|
14
15
|
}
|
16
|
+
|
17
|
+
void core_extensions_ensure_registered(void) {
|
18
|
+
static int registered = 0;
|
19
|
+
|
20
|
+
if (!registered) {
|
21
|
+
cmark_register_plugin(core_extensions_registration);
|
22
|
+
registered = 1;
|
23
|
+
}
|
24
|
+
}
|
@@ -9,7 +9,7 @@ extern "C" {
|
|
9
9
|
#include "cmarkextensions_export.h"
|
10
10
|
|
11
11
|
CMARKEXTENSIONS_EXPORT
|
12
|
-
|
12
|
+
void core_extensions_ensure_registered(void);
|
13
13
|
|
14
14
|
CMARKEXTENSIONS_EXPORT
|
15
15
|
uint16_t cmarkextensions_get_table_columns(cmark_node *node);
|
@@ -23,6 +23,8 @@ static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser,
|
|
23
23
|
|
24
24
|
res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
|
25
25
|
cmark_node_set_literal(res, buffer);
|
26
|
+
res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser);
|
27
|
+
res->start_column = cmark_inline_parser_get_column(inline_parser) - delims;
|
26
28
|
|
27
29
|
if (left_flanking || right_flanking) {
|
28
30
|
cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking,
|
@@ -58,6 +60,7 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser,
|
|
58
60
|
tmp = next;
|
59
61
|
}
|
60
62
|
|
63
|
+
strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1;
|
61
64
|
cmark_node_free(closer->inl_text);
|
62
65
|
|
63
66
|
delim = closer;
|
@@ -147,5 +150,7 @@ cmark_syntax_extension *create_strikethrough_extension(void) {
|
|
147
150
|
special_chars = cmark_llist_append(mem, special_chars, (void *)'~');
|
148
151
|
cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
|
149
152
|
|
153
|
+
cmark_syntax_extension_set_emphasis(ext, true);
|
154
|
+
|
150
155
|
return ext;
|
151
156
|
}
|
@@ -26,9 +26,16 @@ typedef struct {
|
|
26
26
|
bool is_header;
|
27
27
|
} node_table_row;
|
28
28
|
|
29
|
+
typedef struct {
|
30
|
+
cmark_strbuf *buf;
|
31
|
+
int start_offset, end_offset, internal_offset;
|
32
|
+
} node_cell;
|
33
|
+
|
29
34
|
static void free_table_cell(cmark_mem *mem, void *data) {
|
30
|
-
|
31
|
-
|
35
|
+
node_cell *cell = (node_cell *)data;
|
36
|
+
cmark_strbuf_free((cmark_strbuf *)cell->buf);
|
37
|
+
mem->free(cell->buf);
|
38
|
+
mem->free(cell);
|
32
39
|
}
|
33
40
|
|
34
41
|
static void free_table_row(cmark_mem *mem, table_row *row) {
|
@@ -105,14 +112,13 @@ static table_row *row_from_string(cmark_syntax_extension *self,
|
|
105
112
|
cmark_parser *parser, unsigned char *string,
|
106
113
|
int len) {
|
107
114
|
table_row *row = NULL;
|
108
|
-
bufsize_t cell_matched, pipe_matched, offset
|
115
|
+
bufsize_t cell_matched, pipe_matched, offset;
|
109
116
|
|
110
117
|
row = (table_row *)parser->mem->calloc(1, sizeof(table_row));
|
111
118
|
row->n_columns = 0;
|
112
119
|
row->cells = NULL;
|
113
120
|
|
114
|
-
|
115
|
-
++offset;
|
121
|
+
offset = scan_table_cell_end(string, len, 0);
|
116
122
|
|
117
123
|
do {
|
118
124
|
cell_matched = scan_table_cell(string, len, offset);
|
@@ -122,8 +128,17 @@ static table_row *row_from_string(cmark_syntax_extension *self,
|
|
122
128
|
cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
|
123
129
|
cell_matched);
|
124
130
|
cmark_strbuf_trim(cell_buf);
|
131
|
+
|
132
|
+
node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
|
133
|
+
cell->buf = cell_buf;
|
134
|
+
cell->start_offset = offset;
|
135
|
+
cell->end_offset = offset + cell_matched - 1;
|
136
|
+
while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
|
137
|
+
--cell->start_offset;
|
138
|
+
++cell->internal_offset;
|
139
|
+
}
|
125
140
|
row->n_columns += 1;
|
126
|
-
row->cells = cmark_llist_append(parser->mem, row->cells,
|
141
|
+
row->cells = cmark_llist_append(parser->mem, row->cells, cell);
|
127
142
|
}
|
128
143
|
|
129
144
|
offset += cell_matched + pipe_matched;
|
@@ -202,15 +217,14 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
|
|
202
217
|
|
203
218
|
parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table));
|
204
219
|
|
205
|
-
|
206
220
|
set_n_table_columns(parent_container, header_row->n_columns);
|
207
221
|
|
208
222
|
uint8_t *alignments =
|
209
223
|
(uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t));
|
210
224
|
cmark_llist *it = marker_row->cells;
|
211
225
|
for (i = 0; it; it = it->next, ++i) {
|
212
|
-
|
213
|
-
bool left = node->ptr[0] == ':', right = node->ptr[node->size - 1] == ':';
|
226
|
+
node_cell *node = (node_cell *)it->data;
|
227
|
+
bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':';
|
214
228
|
|
215
229
|
if (left && right)
|
216
230
|
alignments[i] = 'c';
|
@@ -223,8 +237,10 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
|
|
223
237
|
|
224
238
|
table_header =
|
225
239
|
cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW,
|
226
|
-
|
240
|
+
parent_container->start_column);
|
227
241
|
cmark_node_set_syntax_extension(table_header, self);
|
242
|
+
table_header->end_column = parent_container->start_column + (int)strlen(parent_string) - 2;
|
243
|
+
table_header->start_line = table_header->end_line = parent_container->start_line;
|
228
244
|
|
229
245
|
table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row));
|
230
246
|
ntr->is_header = true;
|
@@ -233,10 +249,13 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
|
|
233
249
|
cmark_llist *tmp;
|
234
250
|
|
235
251
|
for (tmp = header_row->cells; tmp; tmp = tmp->next) {
|
236
|
-
|
252
|
+
node_cell *cell = (node_cell *) tmp->data;
|
237
253
|
cmark_node *header_cell = cmark_parser_add_child(parser, table_header,
|
238
|
-
CMARK_NODE_TABLE_CELL,
|
239
|
-
|
254
|
+
CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
|
255
|
+
header_cell->start_line = header_cell->end_line = parent_container->start_line;
|
256
|
+
header_cell->internal_offset = cell->internal_offset;
|
257
|
+
header_cell->end_column = parent_container->start_column + cell->end_offset;
|
258
|
+
cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr);
|
240
259
|
cmark_node_set_syntax_extension(header_cell, self);
|
241
260
|
}
|
242
261
|
}
|
@@ -262,9 +281,9 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
|
|
262
281
|
|
263
282
|
table_row_block =
|
264
283
|
cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW,
|
265
|
-
|
266
|
-
|
284
|
+
parent_container->start_column);
|
267
285
|
cmark_node_set_syntax_extension(table_row_block, self);
|
286
|
+
table_row_block->end_column = parent_container->end_column;
|
268
287
|
table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row));
|
269
288
|
|
270
289
|
row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser),
|
@@ -275,17 +294,19 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
|
|
275
294
|
int i, table_columns = get_n_table_columns(parent_container);
|
276
295
|
|
277
296
|
for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) {
|
278
|
-
|
279
|
-
cmark_node *
|
280
|
-
CMARK_NODE_TABLE_CELL,
|
281
|
-
|
282
|
-
|
297
|
+
node_cell *cell = (node_cell *) tmp->data;
|
298
|
+
cmark_node *node = cmark_parser_add_child(parser, table_row_block,
|
299
|
+
CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
|
300
|
+
node->internal_offset = cell->internal_offset;
|
301
|
+
node->end_column = parent_container->start_column + cell->end_offset;
|
302
|
+
cmark_node_set_string_content(node, (char *) cell->buf->ptr);
|
303
|
+
cmark_node_set_syntax_extension(node, self);
|
283
304
|
}
|
284
305
|
|
285
306
|
for (; i < table_columns; ++i) {
|
286
|
-
cmark_node *
|
287
|
-
parser, table_row_block, CMARK_NODE_TABLE_CELL,
|
288
|
-
cmark_node_set_syntax_extension(
|
307
|
+
cmark_node *node = cmark_parser_add_child(
|
308
|
+
parser, table_row_block, CMARK_NODE_TABLE_CELL, 0);
|
309
|
+
cmark_node_set_syntax_extension(node, self);
|
289
310
|
}
|
290
311
|
}
|
291
312
|
|
@@ -394,9 +394,9 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add)
|
|
394
394
|
for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
|
395
395
|
unsigned char c = (unsigned char)(size_t)tmp_char->data;
|
396
396
|
if (add)
|
397
|
-
cmark_inlines_add_special_character(c);
|
397
|
+
cmark_inlines_add_special_character(c, ext->emphasis);
|
398
398
|
else
|
399
|
-
cmark_inlines_remove_special_character(c);
|
399
|
+
cmark_inlines_remove_special_character(c, ext->emphasis);
|
400
400
|
}
|
401
401
|
}
|
402
402
|
}
|
@@ -980,6 +980,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
|
|
980
980
|
|
981
981
|
(*container)->as.heading.level = level;
|
982
982
|
(*container)->as.heading.setext = false;
|
983
|
+
(*container)->internal_offset = matched;
|
983
984
|
|
984
985
|
} else if (!indented && (matched = scan_open_code_fence(
|
985
986
|
input, parser->first_nonspace))) {
|
@@ -269,6 +269,9 @@ cmark_syntax_extension *cmark_syntax_extension_new (const char *name);
|
|
269
269
|
CMARK_EXPORT
|
270
270
|
cmark_node_type cmark_syntax_extension_add_node(int is_inline);
|
271
271
|
|
272
|
+
CMARK_EXPORT
|
273
|
+
void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, bool emphasis);
|
274
|
+
|
272
275
|
/** See the documentation for 'cmark_syntax_extension'
|
273
276
|
*/
|
274
277
|
CMARK_EXPORT
|
@@ -666,6 +669,12 @@ void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter
|
|
666
669
|
CMARK_EXPORT
|
667
670
|
delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser);
|
668
671
|
|
672
|
+
CMARK_EXPORT
|
673
|
+
int cmark_inline_parser_get_line(cmark_inline_parser *parser);
|
674
|
+
|
675
|
+
CMARK_EXPORT
|
676
|
+
int cmark_inline_parser_get_column(cmark_inline_parser *parser);
|
677
|
+
|
669
678
|
/** Convenience function to scan a given delimiter.
|
670
679
|
*
|
671
680
|
* 'left_flanking' and 'right_flanking' will be set to true if they
|
@@ -23,9 +23,9 @@ static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
|
|
23
23
|
static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
|
24
24
|
|
25
25
|
// Macros for creating various kinds of simple.
|
26
|
-
#define make_str(
|
27
|
-
#define make_code(
|
28
|
-
#define make_raw_html(
|
26
|
+
#define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s)
|
27
|
+
#define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s)
|
28
|
+
#define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)
|
29
29
|
#define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
|
30
30
|
#define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
|
31
31
|
#define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
|
@@ -46,7 +46,10 @@ typedef struct bracket {
|
|
46
46
|
typedef struct subject{
|
47
47
|
cmark_mem *mem;
|
48
48
|
cmark_chunk input;
|
49
|
+
int line;
|
49
50
|
bufsize_t pos;
|
51
|
+
int block_offset;
|
52
|
+
int column_offset;
|
50
53
|
cmark_reference_map *refmap;
|
51
54
|
delimiter *last_delim;
|
52
55
|
bracket *last_bracket;
|
@@ -54,6 +57,9 @@ typedef struct subject{
|
|
54
57
|
bool scanned_for_backticks;
|
55
58
|
} subject;
|
56
59
|
|
60
|
+
// Extensions may populate this.
|
61
|
+
static int8_t SKIP_CHARS[256];
|
62
|
+
|
57
63
|
static CMARK_INLINE bool S_is_line_end_char(char c) {
|
58
64
|
return (c == '\n' || c == '\r');
|
59
65
|
}
|
@@ -63,17 +69,22 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
|
63
69
|
|
64
70
|
static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options);
|
65
71
|
|
66
|
-
static void subject_from_buf(cmark_mem *mem,
|
67
|
-
cmark_reference_map *refmap);
|
72
|
+
static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
|
73
|
+
cmark_strbuf *buffer, cmark_reference_map *refmap);
|
68
74
|
static bufsize_t subject_find_special_char(subject *subj, int options);
|
69
75
|
|
70
76
|
// Create an inline with a literal string value.
|
71
|
-
static CMARK_INLINE cmark_node *make_literal(
|
77
|
+
static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
|
78
|
+
int start_column, int end_column,
|
72
79
|
cmark_chunk s) {
|
73
|
-
cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
|
74
|
-
cmark_strbuf_init(mem, &e->content, 0);
|
80
|
+
cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
|
81
|
+
cmark_strbuf_init(subj->mem, &e->content, 0);
|
75
82
|
e->type = (uint16_t)t;
|
76
83
|
e->as.literal = s;
|
84
|
+
e->start_line = e->end_line = subj->line;
|
85
|
+
// columns are 1 based.
|
86
|
+
e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
|
87
|
+
e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
|
77
88
|
return e;
|
78
89
|
}
|
79
90
|
|
@@ -86,14 +97,15 @@ static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
|
|
86
97
|
}
|
87
98
|
|
88
99
|
// Like make_str, but parses entities.
|
89
|
-
static cmark_node *make_str_with_entities(
|
100
|
+
static cmark_node *make_str_with_entities(subject *subj,
|
101
|
+
int start_column, int end_column,
|
90
102
|
cmark_chunk *content) {
|
91
|
-
cmark_strbuf unescaped = CMARK_BUF_INIT(mem);
|
103
|
+
cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
|
92
104
|
|
93
105
|
if (houdini_unescape_html(&unescaped, content->data, content->len)) {
|
94
|
-
return make_str(
|
106
|
+
return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));
|
95
107
|
} else {
|
96
|
-
return make_str(
|
108
|
+
return make_str(subj, start_column, end_column, *content);
|
97
109
|
}
|
98
110
|
}
|
99
111
|
|
@@ -107,7 +119,7 @@ static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
|
|
107
119
|
c.data = (unsigned char *)mem->calloc(len + 1, 1);
|
108
120
|
c.alloc = 1;
|
109
121
|
if (len)
|
110
|
-
|
122
|
+
memcpy(c.data, src->data, len);
|
111
123
|
c.data[len] = '\0';
|
112
124
|
|
113
125
|
return c;
|
@@ -131,23 +143,30 @@ static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
|
|
131
143
|
return cmark_chunk_buf_detach(&buf);
|
132
144
|
}
|
133
145
|
|
134
|
-
static CMARK_INLINE cmark_node *make_autolink(
|
135
|
-
int
|
136
|
-
|
137
|
-
link
|
146
|
+
static CMARK_INLINE cmark_node *make_autolink(subject *subj,
|
147
|
+
int start_column, int end_column,
|
148
|
+
cmark_chunk url, int is_email) {
|
149
|
+
cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
|
150
|
+
link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
|
138
151
|
link->as.link.title = cmark_chunk_literal("");
|
139
|
-
|
152
|
+
link->start_line = link->end_line = subj->line;
|
153
|
+
link->start_column = start_column + 1;
|
154
|
+
link->end_column = end_column + 1;
|
155
|
+
cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
|
140
156
|
return link;
|
141
157
|
}
|
142
158
|
|
143
|
-
static void subject_from_buf(cmark_mem *mem,
|
144
|
-
cmark_reference_map *refmap) {
|
159
|
+
static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
|
160
|
+
cmark_strbuf *buffer, cmark_reference_map *refmap) {
|
145
161
|
int i;
|
146
162
|
e->mem = mem;
|
147
163
|
e->input.data = buffer->ptr;
|
148
164
|
e->input.len = buffer->size;
|
149
165
|
e->input.alloc = 0;
|
166
|
+
e->line = line_number;
|
150
167
|
e->pos = 0;
|
168
|
+
e->block_offset = block_offset;
|
169
|
+
e->column_offset = 0;
|
151
170
|
e->refmap = refmap;
|
152
171
|
e->last_delim = NULL;
|
153
172
|
e->last_bracket = NULL;
|
@@ -214,6 +233,47 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
|
|
214
233
|
return cmark_chunk_dup(&subj->input, startpos, len);
|
215
234
|
}
|
216
235
|
|
236
|
+
// Return the number of newlines in a given span of text in a subject. If
|
237
|
+
// the number is greater than zero, also return the number of characters
|
238
|
+
// between the last newline and the end of the span in `since_newline`.
|
239
|
+
static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
|
240
|
+
int nls = 0;
|
241
|
+
int since_nl = 0;
|
242
|
+
|
243
|
+
while (len--) {
|
244
|
+
if (subj->input.data[from++] == '\n') {
|
245
|
+
++nls;
|
246
|
+
since_nl = 0;
|
247
|
+
} else {
|
248
|
+
++since_nl;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
if (!nls)
|
253
|
+
return 0;
|
254
|
+
|
255
|
+
*since_newline = since_nl;
|
256
|
+
return nls;
|
257
|
+
}
|
258
|
+
|
259
|
+
// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
|
260
|
+
// `column_offset` according to the number of newlines in a just-matched span
|
261
|
+
// of text in `subj`.
|
262
|
+
static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
|
263
|
+
if (!(options & CMARK_OPT_SOURCEPOS)) {
|
264
|
+
return;
|
265
|
+
}
|
266
|
+
|
267
|
+
int since_newline;
|
268
|
+
int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
|
269
|
+
if (newlines) {
|
270
|
+
subj->line += newlines;
|
271
|
+
node->end_line += newlines;
|
272
|
+
node->end_column = since_newline;
|
273
|
+
subj->column_offset = -subj->pos + since_newline + extra;
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
217
277
|
// Try to process a backtick code span that began with a
|
218
278
|
// span of ticks of length openticklength length (already
|
219
279
|
// parsed). Return 0 if you don't find matching closing
|
@@ -261,14 +321,14 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
|
|
261
321
|
|
262
322
|
// Parse backtick code section or raw backticks, return an inline.
|
263
323
|
// Assumes that the subject has a backtick at the current position.
|
264
|
-
static cmark_node *handle_backticks(subject *subj) {
|
324
|
+
static cmark_node *handle_backticks(subject *subj, int options) {
|
265
325
|
cmark_chunk openticks = take_while(subj, isbacktick);
|
266
326
|
bufsize_t startpos = subj->pos;
|
267
327
|
bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
|
268
328
|
|
269
329
|
if (endpos == 0) { // not found
|
270
330
|
subj->pos = startpos; // rewind
|
271
|
-
return make_str(subj->
|
331
|
+
return make_str(subj, subj->pos, subj->pos, openticks);
|
272
332
|
} else {
|
273
333
|
cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
|
274
334
|
|
@@ -277,7 +337,9 @@ static cmark_node *handle_backticks(subject *subj) {
|
|
277
337
|
cmark_strbuf_trim(&buf);
|
278
338
|
cmark_strbuf_normalize_whitespace(&buf);
|
279
339
|
|
280
|
-
|
340
|
+
cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
|
341
|
+
adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
|
342
|
+
return node;
|
281
343
|
}
|
282
344
|
}
|
283
345
|
|
@@ -286,7 +348,7 @@ static cmark_node *handle_backticks(subject *subj) {
|
|
286
348
|
static int scan_delims(subject *subj, unsigned char c, bool *can_open,
|
287
349
|
bool *can_close) {
|
288
350
|
int numdelims = 0;
|
289
|
-
bufsize_t before_char_pos;
|
351
|
+
bufsize_t before_char_pos, after_char_pos;
|
290
352
|
int32_t after_char = 0;
|
291
353
|
int32_t before_char = 0;
|
292
354
|
int len;
|
@@ -297,12 +359,12 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
|
|
297
359
|
} else {
|
298
360
|
before_char_pos = subj->pos - 1;
|
299
361
|
// walk back to the beginning of the UTF_8 sequence:
|
300
|
-
while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
|
362
|
+
while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
|
301
363
|
before_char_pos -= 1;
|
302
364
|
}
|
303
365
|
len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
|
304
366
|
subj->pos - before_char_pos, &before_char);
|
305
|
-
if (len == -1) {
|
367
|
+
if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) {
|
306
368
|
before_char = 10;
|
307
369
|
}
|
308
370
|
}
|
@@ -317,11 +379,20 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
|
|
317
379
|
}
|
318
380
|
}
|
319
381
|
|
320
|
-
|
321
|
-
|
322
|
-
|
382
|
+
if (subj->pos == subj->input.len) {
|
383
|
+
after_char = 10;
|
384
|
+
} else {
|
385
|
+
after_char_pos = subj->pos;
|
386
|
+
while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
|
387
|
+
after_char_pos += 1;
|
388
|
+
}
|
389
|
+
len = cmark_utf8proc_iterate(subj->input.data + after_char_pos,
|
390
|
+
subj->input.len - after_char_pos, &after_char);
|
391
|
+
if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) {
|
323
392
|
after_char = 10;
|
324
393
|
}
|
394
|
+
}
|
395
|
+
|
325
396
|
left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
|
326
397
|
(!cmark_utf8proc_is_punctuation(after_char) ||
|
327
398
|
cmark_utf8proc_is_space(before_char) ||
|
@@ -336,7 +407,8 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
|
|
336
407
|
*can_close = right_flanking &&
|
337
408
|
(!left_flanking || cmark_utf8proc_is_punctuation(after_char));
|
338
409
|
} else if (c == '\'' || c == '"') {
|
339
|
-
*can_open = left_flanking && !right_flanking
|
410
|
+
*can_open = left_flanking && !right_flanking &&
|
411
|
+
before_char != ']' && before_char != ')';
|
340
412
|
*can_close = right_flanking;
|
341
413
|
} else {
|
342
414
|
*can_open = left_flanking;
|
@@ -434,7 +506,7 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
|
|
434
506
|
contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
|
435
507
|
}
|
436
508
|
|
437
|
-
inl_text = make_str(subj->
|
509
|
+
inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
|
438
510
|
|
439
511
|
if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
|
440
512
|
push_delimiter(subj, c, can_open, can_close, inl_text);
|
@@ -450,7 +522,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
|
|
450
522
|
advance(subj);
|
451
523
|
|
452
524
|
if (!smart || peek_char(subj) != '-') {
|
453
|
-
return make_str(subj->
|
525
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
|
454
526
|
}
|
455
527
|
|
456
528
|
while (smart && peek_char(subj) == '-') {
|
@@ -483,7 +555,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
|
|
483
555
|
cmark_strbuf_puts(&buf, ENDASH);
|
484
556
|
}
|
485
557
|
|
486
|
-
return make_str(subj->
|
558
|
+
return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));
|
487
559
|
}
|
488
560
|
|
489
561
|
// Assumes we have a period at the current position.
|
@@ -493,12 +565,12 @@ static cmark_node *handle_period(subject *subj, bool smart) {
|
|
493
565
|
advance(subj);
|
494
566
|
if (peek_char(subj) == '.') {
|
495
567
|
advance(subj);
|
496
|
-
return make_str(subj->
|
568
|
+
return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
|
497
569
|
} else {
|
498
|
-
return make_str(subj->
|
570
|
+
return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
|
499
571
|
}
|
500
572
|
} else {
|
501
|
-
return make_str(subj->
|
573
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
|
502
574
|
}
|
503
575
|
}
|
504
576
|
|
@@ -627,7 +699,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
|
627
699
|
cmark_node *tmp, *tmpnext, *emph;
|
628
700
|
|
629
701
|
// calculate the actual number of characters used from this closer
|
630
|
-
use_delims = (closer_num_chars >= 2 && opener_num_chars >=2) ? 2 : 1;
|
702
|
+
use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
|
631
703
|
|
632
704
|
// remove used characters from associated inlines.
|
633
705
|
opener_num_chars -= use_delims;
|
@@ -655,6 +727,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
|
655
727
|
}
|
656
728
|
cmark_node_insert_after(opener_inl, emph);
|
657
729
|
|
730
|
+
emph->start_line = emph->end_line = subj->line;
|
731
|
+
emph->start_column = opener_inl->start_column + subj->column_offset;
|
732
|
+
emph->end_column = closer_inl->end_column + subj->column_offset;
|
733
|
+
|
658
734
|
// if opener has 0 characters, remove it and its associated inline
|
659
735
|
if (opener_num_chars == 0) {
|
660
736
|
cmark_node_free(opener_inl);
|
@@ -681,11 +757,11 @@ static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) {
|
|
681
757
|
if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) {
|
682
758
|
// only ascii symbols and newline can be escaped
|
683
759
|
advance(subj);
|
684
|
-
return make_str(subj->
|
760
|
+
return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
|
685
761
|
} else if (!is_eof(subj) && skip_line_end(subj)) {
|
686
762
|
return make_linebreak(subj->mem);
|
687
763
|
} else {
|
688
|
-
return make_str(subj->
|
764
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
|
689
765
|
}
|
690
766
|
}
|
691
767
|
|
@@ -701,14 +777,14 @@ static cmark_node *handle_entity(subject *subj) {
|
|
701
777
|
subj->input.len - subj->pos);
|
702
778
|
|
703
779
|
if (len == 0)
|
704
|
-
return make_str(subj->
|
780
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
|
705
781
|
|
706
782
|
subj->pos += len;
|
707
|
-
return make_str(subj->
|
783
|
+
return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));
|
708
784
|
}
|
709
785
|
|
710
|
-
// Clean a URL: remove surrounding whitespace and
|
711
|
-
//
|
786
|
+
// Clean a URL: remove surrounding whitespace, and remove \ that escape
|
787
|
+
// punctuation.
|
712
788
|
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
|
713
789
|
cmark_strbuf buf = CMARK_BUF_INIT(mem);
|
714
790
|
|
@@ -719,11 +795,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
|
|
719
795
|
return result;
|
720
796
|
}
|
721
797
|
|
722
|
-
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
|
723
|
-
houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
|
724
|
-
} else {
|
725
798
|
houdini_unescape_html_f(&buf, url->data, url->len);
|
726
|
-
}
|
727
799
|
|
728
800
|
cmark_strbuf_unescape(&buf);
|
729
801
|
return cmark_chunk_buf_detach(&buf);
|
@@ -755,7 +827,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
|
|
755
827
|
|
756
828
|
// Parse an autolink or HTML tag.
|
757
829
|
// Assumes the subject has a '<' character at the current position.
|
758
|
-
static cmark_node *handle_pointy_brace(subject *subj,
|
830
|
+
static cmark_node *handle_pointy_brace(subject *subj, int options) {
|
759
831
|
bufsize_t matchlen = 0;
|
760
832
|
cmark_chunk contents;
|
761
833
|
|
@@ -767,7 +839,7 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) {
|
|
767
839
|
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
768
840
|
subj->pos += matchlen;
|
769
841
|
|
770
|
-
return make_autolink(subj->
|
842
|
+
return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
|
771
843
|
}
|
772
844
|
|
773
845
|
// next try to match an email autolink
|
@@ -776,7 +848,7 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) {
|
|
776
848
|
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
777
849
|
subj->pos += matchlen;
|
778
850
|
|
779
|
-
return make_autolink(subj->
|
851
|
+
return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
|
780
852
|
}
|
781
853
|
|
782
854
|
// finally, try to match an html tag
|
@@ -784,20 +856,24 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) {
|
|
784
856
|
if (matchlen > 0) {
|
785
857
|
contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
|
786
858
|
subj->pos += matchlen;
|
787
|
-
|
859
|
+
cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
|
860
|
+
adjust_subj_node_newlines(subj, node, matchlen, 1, options);
|
861
|
+
return node;
|
788
862
|
}
|
789
863
|
|
790
|
-
if (
|
864
|
+
if (options & CMARK_OPT_LIBERAL_HTML_TAG) {
|
791
865
|
matchlen = scan_liberal_html_tag(&subj->input, subj->pos);
|
792
866
|
if (matchlen > 0) {
|
793
867
|
contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
|
794
868
|
subj->pos += matchlen;
|
795
|
-
|
869
|
+
cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
|
870
|
+
adjust_subj_node_newlines(subj, node, matchlen, 1, options);
|
871
|
+
return node;
|
796
872
|
}
|
797
873
|
}
|
798
874
|
|
799
875
|
// if nothing matches, just return the opening <:
|
800
|
-
return make_str(subj->
|
876
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
|
801
877
|
}
|
802
878
|
|
803
879
|
// Parse a link label. Returns 1 if successful.
|
@@ -845,24 +921,12 @@ noMatch:
|
|
845
921
|
subj->pos = startpos; // rewind
|
846
922
|
return 0;
|
847
923
|
}
|
848
|
-
|
924
|
+
|
925
|
+
static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
|
926
|
+
cmark_chunk *output) {
|
849
927
|
bufsize_t i = offset;
|
850
928
|
size_t nb_p = 0;
|
851
929
|
|
852
|
-
if (i < input->len && input->data[i] == '<') {
|
853
|
-
++i;
|
854
|
-
while (i < input->len) {
|
855
|
-
if (input->data[i] == '>') {
|
856
|
-
++i;
|
857
|
-
break;
|
858
|
-
} else if (input->data[i] == '\\')
|
859
|
-
i += 2;
|
860
|
-
else if (cmark_isspace(input->data[i]))
|
861
|
-
return -1;
|
862
|
-
else
|
863
|
-
++i;
|
864
|
-
}
|
865
|
-
} else {
|
866
930
|
while (i < input->len) {
|
867
931
|
if (input->data[i] == '\\' &&
|
868
932
|
i + 1 < input-> len &&
|
@@ -883,18 +947,53 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
|
|
883
947
|
else
|
884
948
|
++i;
|
885
949
|
}
|
950
|
+
|
951
|
+
if (i >= input->len)
|
952
|
+
return -1;
|
953
|
+
|
954
|
+
{
|
955
|
+
cmark_chunk result = {input->data + offset, i - offset, 0};
|
956
|
+
*output = result;
|
957
|
+
}
|
958
|
+
return i - offset;
|
959
|
+
}
|
960
|
+
|
961
|
+
static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
|
962
|
+
cmark_chunk *output) {
|
963
|
+
bufsize_t i = offset;
|
964
|
+
|
965
|
+
if (i < input->len && input->data[i] == '<') {
|
966
|
+
++i;
|
967
|
+
while (i < input->len) {
|
968
|
+
if (input->data[i] == '>') {
|
969
|
+
++i;
|
970
|
+
break;
|
971
|
+
} else if (input->data[i] == '\\')
|
972
|
+
i += 2;
|
973
|
+
else if (cmark_isspace(input->data[i]) || input->data[i] == '<')
|
974
|
+
return manual_scan_link_url_2(input, offset, output);
|
975
|
+
else
|
976
|
+
++i;
|
977
|
+
}
|
978
|
+
} else {
|
979
|
+
return manual_scan_link_url_2(input, offset, output);
|
886
980
|
}
|
887
981
|
|
888
982
|
if (i >= input->len)
|
889
983
|
return -1;
|
984
|
+
|
985
|
+
{
|
986
|
+
cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
|
987
|
+
*output = result;
|
988
|
+
}
|
890
989
|
return i - offset;
|
891
990
|
}
|
991
|
+
|
892
992
|
// Return a link, an image, or a literal close bracket.
|
893
993
|
static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
|
894
994
|
bufsize_t initial_pos, after_link_text_pos;
|
895
|
-
bufsize_t
|
896
|
-
bufsize_t n;
|
897
|
-
bufsize_t sps;
|
995
|
+
bufsize_t endurl, starttitle, endtitle, endall;
|
996
|
+
bufsize_t sps, n;
|
898
997
|
cmark_reference *ref = NULL;
|
899
998
|
cmark_chunk url_chunk, title_chunk;
|
900
999
|
cmark_chunk url, title;
|
@@ -912,13 +1011,13 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
|
|
912
1011
|
opener = subj->last_bracket;
|
913
1012
|
|
914
1013
|
if (opener == NULL) {
|
915
|
-
return make_str(subj->
|
1014
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
|
916
1015
|
}
|
917
1016
|
|
918
1017
|
if (!opener->active) {
|
919
1018
|
// take delimiter off stack
|
920
1019
|
pop_bracket(subj);
|
921
|
-
return make_str(subj->
|
1020
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
|
922
1021
|
}
|
923
1022
|
|
924
1023
|
// If we got here, we matched a potential link/image text.
|
@@ -930,11 +1029,11 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
|
|
930
1029
|
// First, look for an inline link.
|
931
1030
|
if (peek_char(subj) == '(' &&
|
932
1031
|
((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
|
933
|
-
((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps
|
1032
|
+
((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
|
1033
|
+
&url_chunk)) > -1)) {
|
934
1034
|
|
935
1035
|
// try to parse an explicit link:
|
936
|
-
|
937
|
-
endurl = starturl + n;
|
1036
|
+
endurl = subj->pos + 1 + sps + n;
|
938
1037
|
starttitle = endurl + scan_spacechars(&subj->input, endurl);
|
939
1038
|
|
940
1039
|
// ensure there are spaces btw url and title
|
@@ -947,7 +1046,6 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
|
|
947
1046
|
if (peek_at(subj, endall) == ')') {
|
948
1047
|
subj->pos = endall + 1;
|
949
1048
|
|
950
|
-
url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
|
951
1049
|
title_chunk =
|
952
1050
|
cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
|
953
1051
|
url = cmark_clean_url(subj->mem, &url_chunk);
|
@@ -996,12 +1094,15 @@ noMatch:
|
|
996
1094
|
// If we fall through to here, it means we didn't match a link:
|
997
1095
|
pop_bracket(subj); // remove this opener from delimiter list
|
998
1096
|
subj->pos = initial_pos;
|
999
|
-
return make_str(subj->
|
1097
|
+
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
|
1000
1098
|
|
1001
1099
|
match:
|
1002
1100
|
inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
|
1003
1101
|
inl->as.link.url = url;
|
1004
1102
|
inl->as.link.title = title;
|
1103
|
+
inl->start_line = inl->end_line = subj->line;
|
1104
|
+
inl->start_column = opener->inl_text->start_column;
|
1105
|
+
inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
|
1005
1106
|
cmark_node_insert_before(opener->inl_text, inl);
|
1006
1107
|
// Add link text:
|
1007
1108
|
tmp = opener->inl_text->next;
|
@@ -1048,6 +1149,8 @@ static cmark_node *handle_newline(subject *subj) {
|
|
1048
1149
|
if (peek_at(subj, subj->pos) == '\n') {
|
1049
1150
|
advance(subj);
|
1050
1151
|
}
|
1152
|
+
++subj->line;
|
1153
|
+
subj->column_offset = -subj->pos;
|
1051
1154
|
// skip spaces at beginning of line
|
1052
1155
|
skip_spaces(subj);
|
1053
1156
|
if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
|
@@ -1101,12 +1204,16 @@ static bufsize_t subject_find_special_char(subject *subj, int options) {
|
|
1101
1204
|
return subj->input.len;
|
1102
1205
|
}
|
1103
1206
|
|
1104
|
-
void cmark_inlines_add_special_character(unsigned char c) {
|
1207
|
+
void cmark_inlines_add_special_character(unsigned char c, bool emphasis) {
|
1105
1208
|
SPECIAL_CHARS[c] = 1;
|
1209
|
+
if (emphasis)
|
1210
|
+
SKIP_CHARS[c] = 1;
|
1106
1211
|
}
|
1107
1212
|
|
1108
|
-
void cmark_inlines_remove_special_character(unsigned char c) {
|
1213
|
+
void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) {
|
1109
1214
|
SPECIAL_CHARS[c] = 0;
|
1215
|
+
if (emphasis)
|
1216
|
+
SKIP_CHARS[c] = 0;
|
1110
1217
|
}
|
1111
1218
|
|
1112
1219
|
static cmark_node *try_extensions(cmark_parser *parser,
|
@@ -1133,7 +1240,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
|
|
1133
1240
|
cmark_node *new_inl = NULL;
|
1134
1241
|
cmark_chunk contents;
|
1135
1242
|
unsigned char c;
|
1136
|
-
bufsize_t endpos;
|
1243
|
+
bufsize_t startpos, endpos;
|
1137
1244
|
c = peek_char(subj);
|
1138
1245
|
if (c == 0) {
|
1139
1246
|
return 0;
|
@@ -1144,7 +1251,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
|
|
1144
1251
|
new_inl = handle_newline(subj);
|
1145
1252
|
break;
|
1146
1253
|
case '`':
|
1147
|
-
new_inl = handle_backticks(subj);
|
1254
|
+
new_inl = handle_backticks(subj, options);
|
1148
1255
|
break;
|
1149
1256
|
case '\\':
|
1150
1257
|
new_inl = handle_backslash(parser, subj);
|
@@ -1153,7 +1260,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
|
|
1153
1260
|
new_inl = handle_entity(subj);
|
1154
1261
|
break;
|
1155
1262
|
case '<':
|
1156
|
-
new_inl = handle_pointy_brace(subj,
|
1263
|
+
new_inl = handle_pointy_brace(subj, options);
|
1157
1264
|
break;
|
1158
1265
|
case '*':
|
1159
1266
|
case '_':
|
@@ -1169,7 +1276,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
|
|
1169
1276
|
break;
|
1170
1277
|
case '[':
|
1171
1278
|
advance(subj);
|
1172
|
-
new_inl = make_str(subj->
|
1279
|
+
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
|
1173
1280
|
push_bracket(subj, false, new_inl);
|
1174
1281
|
break;
|
1175
1282
|
case ']':
|
@@ -1179,10 +1286,10 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
|
|
1179
1286
|
advance(subj);
|
1180
1287
|
if (peek_char(subj) == '[') {
|
1181
1288
|
advance(subj);
|
1182
|
-
new_inl = make_str(subj->
|
1289
|
+
new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
|
1183
1290
|
push_bracket(subj, true, new_inl);
|
1184
1291
|
} else {
|
1185
|
-
new_inl = make_str(subj->
|
1292
|
+
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
|
1186
1293
|
}
|
1187
1294
|
break;
|
1188
1295
|
default:
|
@@ -1192,6 +1299,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
|
|
1192
1299
|
|
1193
1300
|
endpos = subject_find_special_char(subj, options);
|
1194
1301
|
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
|
1302
|
+
startpos = subj->pos;
|
1195
1303
|
subj->pos = endpos;
|
1196
1304
|
|
1197
1305
|
// if we're at a newline, strip trailing spaces.
|
@@ -1199,7 +1307,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
|
|
1199
1307
|
cmark_chunk_rtrim(&contents);
|
1200
1308
|
}
|
1201
1309
|
|
1202
|
-
new_inl = make_str(subj
|
1310
|
+
new_inl = make_str(subj, startpos, endpos - 1, contents);
|
1203
1311
|
}
|
1204
1312
|
if (new_inl != NULL) {
|
1205
1313
|
cmark_node_append_child(parent, new_inl);
|
@@ -1214,7 +1322,7 @@ void cmark_parse_inlines(cmark_parser *parser,
|
|
1214
1322
|
cmark_reference_map *refmap,
|
1215
1323
|
int options) {
|
1216
1324
|
subject subj;
|
1217
|
-
subject_from_buf(parser->mem, &subj, &parent->content, refmap);
|
1325
|
+
subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap);
|
1218
1326
|
cmark_chunk_rtrim(&subj.input);
|
1219
1327
|
|
1220
1328
|
while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options))
|
@@ -1253,7 +1361,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
|
|
1253
1361
|
bufsize_t matchlen = 0;
|
1254
1362
|
bufsize_t beforetitle;
|
1255
1363
|
|
1256
|
-
subject_from_buf(mem, &subj, input, NULL);
|
1364
|
+
subject_from_buf(mem, -1, 0, &subj, input, NULL);
|
1257
1365
|
|
1258
1366
|
// parse label:
|
1259
1367
|
if (!link_label(&subj, &lab) || lab.len == 0)
|
@@ -1268,9 +1376,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
|
|
1268
1376
|
|
1269
1377
|
// parse link url:
|
1270
1378
|
spnl(&subj);
|
1271
|
-
matchlen = manual_scan_link_url(&subj.input, subj.pos)
|
1272
|
-
|
1273
|
-
url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
|
1379
|
+
if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
|
1380
|
+
url.len > 0) {
|
1274
1381
|
subj.pos += matchlen;
|
1275
1382
|
} else {
|
1276
1383
|
return 0;
|
@@ -1425,6 +1532,10 @@ void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) {
|
|
1425
1532
|
parser->pos = offset;
|
1426
1533
|
}
|
1427
1534
|
|
1535
|
+
int cmark_inline_parser_get_column(cmark_inline_parser *parser) {
|
1536
|
+
return parser->pos + 1 + parser->column_offset + parser->block_offset;
|
1537
|
+
}
|
1538
|
+
|
1428
1539
|
cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) {
|
1429
1540
|
return &parser->input;
|
1430
1541
|
}
|
@@ -1453,3 +1564,7 @@ void cmark_node_unput(cmark_node *node, int n) {
|
|
1453
1564
|
delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) {
|
1454
1565
|
return parser->last_delim;
|
1455
1566
|
}
|
1567
|
+
|
1568
|
+
int cmark_inline_parser_get_line(cmark_inline_parser *parser) {
|
1569
|
+
return parser->line;
|
1570
|
+
}
|