commonmarker 0.16.8 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/commonmarker/cmark/CMakeLists.txt +3 -3
  4. data/ext/commonmarker/cmark/README.md +2 -2
  5. data/ext/commonmarker/cmark/api_test/CMakeLists.txt +2 -1
  6. data/ext/commonmarker/cmark/api_test/harness.c +27 -0
  7. data/ext/commonmarker/cmark/api_test/main.c +179 -3
  8. data/ext/commonmarker/cmark/changelog.txt +148 -0
  9. data/ext/commonmarker/cmark/extensions/autolink.c +8 -0
  10. data/ext/commonmarker/cmark/extensions/core-extensions.c +11 -1
  11. data/ext/commonmarker/cmark/extensions/core-extensions.h +1 -1
  12. data/ext/commonmarker/cmark/extensions/strikethrough.c +5 -0
  13. data/ext/commonmarker/cmark/extensions/table.c +44 -23
  14. data/ext/commonmarker/cmark/src/blocks.c +3 -2
  15. data/ext/commonmarker/cmark/src/cmark_extension_api.h +9 -0
  16. data/ext/commonmarker/cmark/src/inlines.c +208 -93
  17. data/ext/commonmarker/cmark/src/inlines.h +2 -2
  18. data/ext/commonmarker/cmark/src/iterator.c +1 -0
  19. data/ext/commonmarker/cmark/src/latex.c +11 -11
  20. data/ext/commonmarker/cmark/src/main.c +12 -11
  21. data/ext/commonmarker/cmark/src/node.h +1 -0
  22. data/ext/commonmarker/cmark/src/scanners.c +34 -24
  23. data/ext/commonmarker/cmark/src/scanners.re +1 -1
  24. data/ext/commonmarker/cmark/src/syntax_extension.c +5 -0
  25. data/ext/commonmarker/cmark/src/syntax_extension.h +1 -0
  26. data/ext/commonmarker/cmark/test/CMakeLists.txt +3 -2
  27. data/ext/commonmarker/cmark/test/cmark.py +2 -5
  28. data/ext/commonmarker/cmark/test/regression.txt +35 -1
  29. data/ext/commonmarker/cmark/test/smart_punct.txt +9 -0
  30. data/ext/commonmarker/cmark/test/spec.txt +88 -26
  31. data/ext/commonmarker/commonmarker.c +1 -1
  32. data/ext/commonmarker/extconf.rb +1 -1
  33. data/lib/commonmarker/version.rb +1 -1
  34. data/test/test_attributes.rb +1 -80
  35. metadata +2 -2
@@ -149,6 +149,7 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
149
149
  size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
150
150
  uint8_t *data = chunk->data + max_rewind;
151
151
  size_t size = chunk->len - max_rewind;
152
+ int start = cmark_inline_parser_get_column(inline_parser);
152
153
 
153
154
  size_t link_end;
154
155
 
@@ -187,6 +188,13 @@ static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
187
188
  cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
188
189
  cmark_node_append_child(node, text);
189
190
 
191
+ node->start_line = text->start_line =
192
+ node->end_line = text->end_line =
193
+ cmark_inline_parser_get_line(inline_parser);
194
+
195
+ node->start_column = text->start_column = start - 1;
196
+ node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
197
+
190
198
  return node;
191
199
  }
192
200
 
@@ -3,8 +3,9 @@
3
3
  #include "strikethrough.h"
4
4
  #include "table.h"
5
5
  #include "tagfilter.h"
6
+ #include "registry.h"
6
7
 
7
- int core_extensions_registration(cmark_plugin *plugin) {
8
+ static int core_extensions_registration(cmark_plugin *plugin) {
8
9
  cmark_plugin_register_syntax_extension(plugin, create_table_extension());
9
10
  cmark_plugin_register_syntax_extension(plugin,
10
11
  create_strikethrough_extension());
@@ -12,3 +13,12 @@ int core_extensions_registration(cmark_plugin *plugin) {
12
13
  cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension());
13
14
  return 1;
14
15
  }
16
+
17
+ void core_extensions_ensure_registered(void) {
18
+ static int registered = 0;
19
+
20
+ if (!registered) {
21
+ cmark_register_plugin(core_extensions_registration);
22
+ registered = 1;
23
+ }
24
+ }
@@ -9,7 +9,7 @@ extern "C" {
9
9
  #include "cmarkextensions_export.h"
10
10
 
11
11
  CMARKEXTENSIONS_EXPORT
12
- int core_extensions_registration(cmark_plugin *plugin);
12
+ void core_extensions_ensure_registered(void);
13
13
 
14
14
  CMARKEXTENSIONS_EXPORT
15
15
  uint16_t cmarkextensions_get_table_columns(cmark_node *node);
@@ -23,6 +23,8 @@ static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser,
23
23
 
24
24
  res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
25
25
  cmark_node_set_literal(res, buffer);
26
+ res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser);
27
+ res->start_column = cmark_inline_parser_get_column(inline_parser) - delims;
26
28
 
27
29
  if (left_flanking || right_flanking) {
28
30
  cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking,
@@ -58,6 +60,7 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser,
58
60
  tmp = next;
59
61
  }
60
62
 
63
+ strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1;
61
64
  cmark_node_free(closer->inl_text);
62
65
 
63
66
  delim = closer;
@@ -147,5 +150,7 @@ cmark_syntax_extension *create_strikethrough_extension(void) {
147
150
  special_chars = cmark_llist_append(mem, special_chars, (void *)'~');
148
151
  cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
149
152
 
153
+ cmark_syntax_extension_set_emphasis(ext, true);
154
+
150
155
  return ext;
151
156
  }
@@ -26,9 +26,16 @@ typedef struct {
26
26
  bool is_header;
27
27
  } node_table_row;
28
28
 
29
+ typedef struct {
30
+ cmark_strbuf *buf;
31
+ int start_offset, end_offset, internal_offset;
32
+ } node_cell;
33
+
29
34
  static void free_table_cell(cmark_mem *mem, void *data) {
30
- cmark_strbuf_free((cmark_strbuf *)data);
31
- mem->free(data);
35
+ node_cell *cell = (node_cell *)data;
36
+ cmark_strbuf_free((cmark_strbuf *)cell->buf);
37
+ mem->free(cell->buf);
38
+ mem->free(cell);
32
39
  }
33
40
 
34
41
  static void free_table_row(cmark_mem *mem, table_row *row) {
@@ -105,14 +112,13 @@ static table_row *row_from_string(cmark_syntax_extension *self,
105
112
  cmark_parser *parser, unsigned char *string,
106
113
  int len) {
107
114
  table_row *row = NULL;
108
- bufsize_t cell_matched, pipe_matched, offset = 0;
115
+ bufsize_t cell_matched, pipe_matched, offset;
109
116
 
110
117
  row = (table_row *)parser->mem->calloc(1, sizeof(table_row));
111
118
  row->n_columns = 0;
112
119
  row->cells = NULL;
113
120
 
114
- if (len > 0 && string[0] == '|')
115
- ++offset;
121
+ offset = scan_table_cell_end(string, len, 0);
116
122
 
117
123
  do {
118
124
  cell_matched = scan_table_cell(string, len, offset);
@@ -122,8 +128,17 @@ static table_row *row_from_string(cmark_syntax_extension *self,
122
128
  cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
123
129
  cell_matched);
124
130
  cmark_strbuf_trim(cell_buf);
131
+
132
+ node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
133
+ cell->buf = cell_buf;
134
+ cell->start_offset = offset;
135
+ cell->end_offset = offset + cell_matched - 1;
136
+ while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
137
+ --cell->start_offset;
138
+ ++cell->internal_offset;
139
+ }
125
140
  row->n_columns += 1;
126
- row->cells = cmark_llist_append(parser->mem, row->cells, cell_buf);
141
+ row->cells = cmark_llist_append(parser->mem, row->cells, cell);
127
142
  }
128
143
 
129
144
  offset += cell_matched + pipe_matched;
@@ -202,15 +217,14 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
202
217
 
203
218
  parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table));
204
219
 
205
-
206
220
  set_n_table_columns(parent_container, header_row->n_columns);
207
221
 
208
222
  uint8_t *alignments =
209
223
  (uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t));
210
224
  cmark_llist *it = marker_row->cells;
211
225
  for (i = 0; it; it = it->next, ++i) {
212
- cmark_strbuf *node = (cmark_strbuf *)it->data;
213
- bool left = node->ptr[0] == ':', right = node->ptr[node->size - 1] == ':';
226
+ node_cell *node = (node_cell *)it->data;
227
+ bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':';
214
228
 
215
229
  if (left && right)
216
230
  alignments[i] = 'c';
@@ -223,8 +237,10 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
223
237
 
224
238
  table_header =
225
239
  cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW,
226
- cmark_parser_get_offset(parser));
240
+ parent_container->start_column);
227
241
  cmark_node_set_syntax_extension(table_header, self);
242
+ table_header->end_column = parent_container->start_column + (int)strlen(parent_string) - 2;
243
+ table_header->start_line = table_header->end_line = parent_container->start_line;
228
244
 
229
245
  table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row));
230
246
  ntr->is_header = true;
@@ -233,10 +249,13 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
233
249
  cmark_llist *tmp;
234
250
 
235
251
  for (tmp = header_row->cells; tmp; tmp = tmp->next) {
236
- cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data;
252
+ node_cell *cell = (node_cell *) tmp->data;
237
253
  cmark_node *header_cell = cmark_parser_add_child(parser, table_header,
238
- CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser));
239
- cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr);
254
+ CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
255
+ header_cell->start_line = header_cell->end_line = parent_container->start_line;
256
+ header_cell->internal_offset = cell->internal_offset;
257
+ header_cell->end_column = parent_container->start_column + cell->end_offset;
258
+ cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr);
240
259
  cmark_node_set_syntax_extension(header_cell, self);
241
260
  }
242
261
  }
@@ -262,9 +281,9 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
262
281
 
263
282
  table_row_block =
264
283
  cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW,
265
- cmark_parser_get_offset(parser));
266
-
284
+ parent_container->start_column);
267
285
  cmark_node_set_syntax_extension(table_row_block, self);
286
+ table_row_block->end_column = parent_container->end_column;
268
287
  table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row));
269
288
 
270
289
  row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser),
@@ -275,17 +294,19 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
275
294
  int i, table_columns = get_n_table_columns(parent_container);
276
295
 
277
296
  for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) {
278
- cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data;
279
- cmark_node *cell = cmark_parser_add_child(parser, table_row_block,
280
- CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser));
281
- cmark_node_set_string_content(cell, (char *) cell_buf->ptr);
282
- cmark_node_set_syntax_extension(cell, self);
297
+ node_cell *cell = (node_cell *) tmp->data;
298
+ cmark_node *node = cmark_parser_add_child(parser, table_row_block,
299
+ CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
300
+ node->internal_offset = cell->internal_offset;
301
+ node->end_column = parent_container->start_column + cell->end_offset;
302
+ cmark_node_set_string_content(node, (char *) cell->buf->ptr);
303
+ cmark_node_set_syntax_extension(node, self);
283
304
  }
284
305
 
285
306
  for (; i < table_columns; ++i) {
286
- cmark_node *cell = cmark_parser_add_child(
287
- parser, table_row_block, CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser));
288
- cmark_node_set_syntax_extension(cell, self);
307
+ cmark_node *node = cmark_parser_add_child(
308
+ parser, table_row_block, CMARK_NODE_TABLE_CELL, 0);
309
+ cmark_node_set_syntax_extension(node, self);
289
310
  }
290
311
  }
291
312
 
@@ -394,9 +394,9 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add)
394
394
  for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
395
395
  unsigned char c = (unsigned char)(size_t)tmp_char->data;
396
396
  if (add)
397
- cmark_inlines_add_special_character(c);
397
+ cmark_inlines_add_special_character(c, ext->emphasis);
398
398
  else
399
- cmark_inlines_remove_special_character(c);
399
+ cmark_inlines_remove_special_character(c, ext->emphasis);
400
400
  }
401
401
  }
402
402
  }
@@ -980,6 +980,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
980
980
 
981
981
  (*container)->as.heading.level = level;
982
982
  (*container)->as.heading.setext = false;
983
+ (*container)->internal_offset = matched;
983
984
 
984
985
  } else if (!indented && (matched = scan_open_code_fence(
985
986
  input, parser->first_nonspace))) {
@@ -269,6 +269,9 @@ cmark_syntax_extension *cmark_syntax_extension_new (const char *name);
269
269
  CMARK_EXPORT
270
270
  cmark_node_type cmark_syntax_extension_add_node(int is_inline);
271
271
 
272
+ CMARK_EXPORT
273
+ void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, bool emphasis);
274
+
272
275
  /** See the documentation for 'cmark_syntax_extension'
273
276
  */
274
277
  CMARK_EXPORT
@@ -666,6 +669,12 @@ void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter
666
669
  CMARK_EXPORT
667
670
  delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser);
668
671
 
672
+ CMARK_EXPORT
673
+ int cmark_inline_parser_get_line(cmark_inline_parser *parser);
674
+
675
+ CMARK_EXPORT
676
+ int cmark_inline_parser_get_column(cmark_inline_parser *parser);
677
+
669
678
  /** Convenience function to scan a given delimiter.
670
679
  *
671
680
  * 'left_flanking' and 'right_flanking' will be set to true if they
@@ -23,9 +23,9 @@ static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
23
23
  static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
24
24
 
25
25
  // Macros for creating various kinds of simple.
26
- #define make_str(mem, s) make_literal(mem, CMARK_NODE_TEXT, s)
27
- #define make_code(mem, s) make_literal(mem, CMARK_NODE_CODE, s)
28
- #define make_raw_html(mem, s) make_literal(mem, CMARK_NODE_HTML_INLINE, s)
26
+ #define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s)
27
+ #define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s)
28
+ #define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)
29
29
  #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
30
30
  #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
31
31
  #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
@@ -46,7 +46,10 @@ typedef struct bracket {
46
46
  typedef struct subject{
47
47
  cmark_mem *mem;
48
48
  cmark_chunk input;
49
+ int line;
49
50
  bufsize_t pos;
51
+ int block_offset;
52
+ int column_offset;
50
53
  cmark_reference_map *refmap;
51
54
  delimiter *last_delim;
52
55
  bracket *last_bracket;
@@ -54,6 +57,9 @@ typedef struct subject{
54
57
  bool scanned_for_backticks;
55
58
  } subject;
56
59
 
60
+ // Extensions may populate this.
61
+ static int8_t SKIP_CHARS[256];
62
+
57
63
  static CMARK_INLINE bool S_is_line_end_char(char c) {
58
64
  return (c == '\n' || c == '\r');
59
65
  }
@@ -63,17 +69,22 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
63
69
 
64
70
  static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options);
65
71
 
66
- static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
67
- cmark_reference_map *refmap);
72
+ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
73
+ cmark_strbuf *buffer, cmark_reference_map *refmap);
68
74
  static bufsize_t subject_find_special_char(subject *subj, int options);
69
75
 
70
76
  // Create an inline with a literal string value.
71
- static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t,
77
+ static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
78
+ int start_column, int end_column,
72
79
  cmark_chunk s) {
73
- cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
74
- cmark_strbuf_init(mem, &e->content, 0);
80
+ cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
81
+ cmark_strbuf_init(subj->mem, &e->content, 0);
75
82
  e->type = (uint16_t)t;
76
83
  e->as.literal = s;
84
+ e->start_line = e->end_line = subj->line;
85
+ // columns are 1 based.
86
+ e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
87
+ e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
77
88
  return e;
78
89
  }
79
90
 
@@ -86,14 +97,15 @@ static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
86
97
  }
87
98
 
88
99
  // Like make_str, but parses entities.
89
- static cmark_node *make_str_with_entities(cmark_mem *mem,
100
+ static cmark_node *make_str_with_entities(subject *subj,
101
+ int start_column, int end_column,
90
102
  cmark_chunk *content) {
91
- cmark_strbuf unescaped = CMARK_BUF_INIT(mem);
103
+ cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
92
104
 
93
105
  if (houdini_unescape_html(&unescaped, content->data, content->len)) {
94
- return make_str(mem, cmark_chunk_buf_detach(&unescaped));
106
+ return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));
95
107
  } else {
96
- return make_str(mem, *content);
108
+ return make_str(subj, start_column, end_column, *content);
97
109
  }
98
110
  }
99
111
 
@@ -107,7 +119,7 @@ static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
107
119
  c.data = (unsigned char *)mem->calloc(len + 1, 1);
108
120
  c.alloc = 1;
109
121
  if (len)
110
- memcpy(c.data, src->data, len);
122
+ memcpy(c.data, src->data, len);
111
123
  c.data[len] = '\0';
112
124
 
113
125
  return c;
@@ -131,23 +143,30 @@ static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
131
143
  return cmark_chunk_buf_detach(&buf);
132
144
  }
133
145
 
134
- static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url,
135
- int is_email) {
136
- cmark_node *link = make_simple(mem, CMARK_NODE_LINK);
137
- link->as.link.url = cmark_clean_autolink(mem, &url, is_email);
146
+ static CMARK_INLINE cmark_node *make_autolink(subject *subj,
147
+ int start_column, int end_column,
148
+ cmark_chunk url, int is_email) {
149
+ cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
150
+ link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
138
151
  link->as.link.title = cmark_chunk_literal("");
139
- cmark_node_append_child(link, make_str_with_entities(mem, &url));
152
+ link->start_line = link->end_line = subj->line;
153
+ link->start_column = start_column + 1;
154
+ link->end_column = end_column + 1;
155
+ cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
140
156
  return link;
141
157
  }
142
158
 
143
- static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
144
- cmark_reference_map *refmap) {
159
+ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
160
+ cmark_strbuf *buffer, cmark_reference_map *refmap) {
145
161
  int i;
146
162
  e->mem = mem;
147
163
  e->input.data = buffer->ptr;
148
164
  e->input.len = buffer->size;
149
165
  e->input.alloc = 0;
166
+ e->line = line_number;
150
167
  e->pos = 0;
168
+ e->block_offset = block_offset;
169
+ e->column_offset = 0;
151
170
  e->refmap = refmap;
152
171
  e->last_delim = NULL;
153
172
  e->last_bracket = NULL;
@@ -214,6 +233,47 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
214
233
  return cmark_chunk_dup(&subj->input, startpos, len);
215
234
  }
216
235
 
236
+ // Return the number of newlines in a given span of text in a subject. If
237
+ // the number is greater than zero, also return the number of characters
238
+ // between the last newline and the end of the span in `since_newline`.
239
+ static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
240
+ int nls = 0;
241
+ int since_nl = 0;
242
+
243
+ while (len--) {
244
+ if (subj->input.data[from++] == '\n') {
245
+ ++nls;
246
+ since_nl = 0;
247
+ } else {
248
+ ++since_nl;
249
+ }
250
+ }
251
+
252
+ if (!nls)
253
+ return 0;
254
+
255
+ *since_newline = since_nl;
256
+ return nls;
257
+ }
258
+
259
+ // Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
260
+ // `column_offset` according to the number of newlines in a just-matched span
261
+ // of text in `subj`.
262
+ static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
263
+ if (!(options & CMARK_OPT_SOURCEPOS)) {
264
+ return;
265
+ }
266
+
267
+ int since_newline;
268
+ int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
269
+ if (newlines) {
270
+ subj->line += newlines;
271
+ node->end_line += newlines;
272
+ node->end_column = since_newline;
273
+ subj->column_offset = -subj->pos + since_newline + extra;
274
+ }
275
+ }
276
+
217
277
  // Try to process a backtick code span that began with a
218
278
  // span of ticks of length openticklength length (already
219
279
  // parsed). Return 0 if you don't find matching closing
@@ -261,14 +321,14 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
261
321
 
262
322
  // Parse backtick code section or raw backticks, return an inline.
263
323
  // Assumes that the subject has a backtick at the current position.
264
- static cmark_node *handle_backticks(subject *subj) {
324
+ static cmark_node *handle_backticks(subject *subj, int options) {
265
325
  cmark_chunk openticks = take_while(subj, isbacktick);
266
326
  bufsize_t startpos = subj->pos;
267
327
  bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
268
328
 
269
329
  if (endpos == 0) { // not found
270
330
  subj->pos = startpos; // rewind
271
- return make_str(subj->mem, openticks);
331
+ return make_str(subj, subj->pos, subj->pos, openticks);
272
332
  } else {
273
333
  cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
274
334
 
@@ -277,7 +337,9 @@ static cmark_node *handle_backticks(subject *subj) {
277
337
  cmark_strbuf_trim(&buf);
278
338
  cmark_strbuf_normalize_whitespace(&buf);
279
339
 
280
- return make_code(subj->mem, cmark_chunk_buf_detach(&buf));
340
+ cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
341
+ adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
342
+ return node;
281
343
  }
282
344
  }
283
345
 
@@ -286,7 +348,7 @@ static cmark_node *handle_backticks(subject *subj) {
286
348
  static int scan_delims(subject *subj, unsigned char c, bool *can_open,
287
349
  bool *can_close) {
288
350
  int numdelims = 0;
289
- bufsize_t before_char_pos;
351
+ bufsize_t before_char_pos, after_char_pos;
290
352
  int32_t after_char = 0;
291
353
  int32_t before_char = 0;
292
354
  int len;
@@ -297,12 +359,12 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
297
359
  } else {
298
360
  before_char_pos = subj->pos - 1;
299
361
  // walk back to the beginning of the UTF_8 sequence:
300
- while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
362
+ while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
301
363
  before_char_pos -= 1;
302
364
  }
303
365
  len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
304
366
  subj->pos - before_char_pos, &before_char);
305
- if (len == -1) {
367
+ if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) {
306
368
  before_char = 10;
307
369
  }
308
370
  }
@@ -317,11 +379,20 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
317
379
  }
318
380
  }
319
381
 
320
- len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
321
- subj->input.len - subj->pos, &after_char);
322
- if (len == -1) {
382
+ if (subj->pos == subj->input.len) {
383
+ after_char = 10;
384
+ } else {
385
+ after_char_pos = subj->pos;
386
+ while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
387
+ after_char_pos += 1;
388
+ }
389
+ len = cmark_utf8proc_iterate(subj->input.data + after_char_pos,
390
+ subj->input.len - after_char_pos, &after_char);
391
+ if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) {
323
392
  after_char = 10;
324
393
  }
394
+ }
395
+
325
396
  left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
326
397
  (!cmark_utf8proc_is_punctuation(after_char) ||
327
398
  cmark_utf8proc_is_space(before_char) ||
@@ -336,7 +407,8 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
336
407
  *can_close = right_flanking &&
337
408
  (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
338
409
  } else if (c == '\'' || c == '"') {
339
- *can_open = left_flanking && !right_flanking;
410
+ *can_open = left_flanking && !right_flanking &&
411
+ before_char != ']' && before_char != ')';
340
412
  *can_close = right_flanking;
341
413
  } else {
342
414
  *can_open = left_flanking;
@@ -434,7 +506,7 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
434
506
  contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
435
507
  }
436
508
 
437
- inl_text = make_str(subj->mem, contents);
509
+ inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
438
510
 
439
511
  if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
440
512
  push_delimiter(subj, c, can_open, can_close, inl_text);
@@ -450,7 +522,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
450
522
  advance(subj);
451
523
 
452
524
  if (!smart || peek_char(subj) != '-') {
453
- return make_str(subj->mem, cmark_chunk_literal("-"));
525
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
454
526
  }
455
527
 
456
528
  while (smart && peek_char(subj) == '-') {
@@ -483,7 +555,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
483
555
  cmark_strbuf_puts(&buf, ENDASH);
484
556
  }
485
557
 
486
- return make_str(subj->mem, cmark_chunk_buf_detach(&buf));
558
+ return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));
487
559
  }
488
560
 
489
561
  // Assumes we have a period at the current position.
@@ -493,12 +565,12 @@ static cmark_node *handle_period(subject *subj, bool smart) {
493
565
  advance(subj);
494
566
  if (peek_char(subj) == '.') {
495
567
  advance(subj);
496
- return make_str(subj->mem, cmark_chunk_literal(ELLIPSES));
568
+ return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
497
569
  } else {
498
- return make_str(subj->mem, cmark_chunk_literal(".."));
570
+ return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
499
571
  }
500
572
  } else {
501
- return make_str(subj->mem, cmark_chunk_literal("."));
573
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
502
574
  }
503
575
  }
504
576
 
@@ -627,7 +699,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
627
699
  cmark_node *tmp, *tmpnext, *emph;
628
700
 
629
701
  // calculate the actual number of characters used from this closer
630
- use_delims = (closer_num_chars >= 2 && opener_num_chars >=2) ? 2 : 1;
702
+ use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
631
703
 
632
704
  // remove used characters from associated inlines.
633
705
  opener_num_chars -= use_delims;
@@ -655,6 +727,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
655
727
  }
656
728
  cmark_node_insert_after(opener_inl, emph);
657
729
 
730
+ emph->start_line = emph->end_line = subj->line;
731
+ emph->start_column = opener_inl->start_column + subj->column_offset;
732
+ emph->end_column = closer_inl->end_column + subj->column_offset;
733
+
658
734
  // if opener has 0 characters, remove it and its associated inline
659
735
  if (opener_num_chars == 0) {
660
736
  cmark_node_free(opener_inl);
@@ -681,11 +757,11 @@ static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) {
681
757
  if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) {
682
758
  // only ascii symbols and newline can be escaped
683
759
  advance(subj);
684
- return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
760
+ return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
685
761
  } else if (!is_eof(subj) && skip_line_end(subj)) {
686
762
  return make_linebreak(subj->mem);
687
763
  } else {
688
- return make_str(subj->mem, cmark_chunk_literal("\\"));
764
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
689
765
  }
690
766
  }
691
767
 
@@ -701,14 +777,14 @@ static cmark_node *handle_entity(subject *subj) {
701
777
  subj->input.len - subj->pos);
702
778
 
703
779
  if (len == 0)
704
- return make_str(subj->mem, cmark_chunk_literal("&"));
780
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
705
781
 
706
782
  subj->pos += len;
707
- return make_str(subj->mem, cmark_chunk_buf_detach(&ent));
783
+ return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));
708
784
  }
709
785
 
710
- // Clean a URL: remove surrounding whitespace and surrounding <>,
711
- // and remove \ that escape punctuation.
786
+ // Clean a URL: remove surrounding whitespace, and remove \ that escape
787
+ // punctuation.
712
788
  cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
713
789
  cmark_strbuf buf = CMARK_BUF_INIT(mem);
714
790
 
@@ -719,11 +795,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
719
795
  return result;
720
796
  }
721
797
 
722
- if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
723
- houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
724
- } else {
725
798
  houdini_unescape_html_f(&buf, url->data, url->len);
726
- }
727
799
 
728
800
  cmark_strbuf_unescape(&buf);
729
801
  return cmark_chunk_buf_detach(&buf);
@@ -755,7 +827,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
755
827
 
756
828
  // Parse an autolink or HTML tag.
757
829
  // Assumes the subject has a '<' character at the current position.
758
- static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) {
830
+ static cmark_node *handle_pointy_brace(subject *subj, int options) {
759
831
  bufsize_t matchlen = 0;
760
832
  cmark_chunk contents;
761
833
 
@@ -767,7 +839,7 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) {
767
839
  contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
768
840
  subj->pos += matchlen;
769
841
 
770
- return make_autolink(subj->mem, contents, 0);
842
+ return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
771
843
  }
772
844
 
773
845
  // next try to match an email autolink
@@ -776,7 +848,7 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) {
776
848
  contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
777
849
  subj->pos += matchlen;
778
850
 
779
- return make_autolink(subj->mem, contents, 1);
851
+ return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
780
852
  }
781
853
 
782
854
  // finally, try to match an html tag
@@ -784,20 +856,24 @@ static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) {
784
856
  if (matchlen > 0) {
785
857
  contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
786
858
  subj->pos += matchlen;
787
- return make_raw_html(subj->mem, contents);
859
+ cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
860
+ adjust_subj_node_newlines(subj, node, matchlen, 1, options);
861
+ return node;
788
862
  }
789
863
 
790
- if (liberal_html_tag) {
864
+ if (options & CMARK_OPT_LIBERAL_HTML_TAG) {
791
865
  matchlen = scan_liberal_html_tag(&subj->input, subj->pos);
792
866
  if (matchlen > 0) {
793
867
  contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
794
868
  subj->pos += matchlen;
795
- return make_raw_html(subj->mem, contents);
869
+ cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
870
+ adjust_subj_node_newlines(subj, node, matchlen, 1, options);
871
+ return node;
796
872
  }
797
873
  }
798
874
 
799
875
  // if nothing matches, just return the opening <:
800
- return make_str(subj->mem, cmark_chunk_literal("<"));
876
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
801
877
  }
802
878
 
803
879
  // Parse a link label. Returns 1 if successful.
@@ -845,24 +921,12 @@ noMatch:
845
921
  subj->pos = startpos; // rewind
846
922
  return 0;
847
923
  }
848
- static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
924
+
925
+ static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
926
+ cmark_chunk *output) {
849
927
  bufsize_t i = offset;
850
928
  size_t nb_p = 0;
851
929
 
852
- if (i < input->len && input->data[i] == '<') {
853
- ++i;
854
- while (i < input->len) {
855
- if (input->data[i] == '>') {
856
- ++i;
857
- break;
858
- } else if (input->data[i] == '\\')
859
- i += 2;
860
- else if (cmark_isspace(input->data[i]))
861
- return -1;
862
- else
863
- ++i;
864
- }
865
- } else {
866
930
  while (i < input->len) {
867
931
  if (input->data[i] == '\\' &&
868
932
  i + 1 < input-> len &&
@@ -883,18 +947,53 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
883
947
  else
884
948
  ++i;
885
949
  }
950
+
951
+ if (i >= input->len)
952
+ return -1;
953
+
954
+ {
955
+ cmark_chunk result = {input->data + offset, i - offset, 0};
956
+ *output = result;
957
+ }
958
+ return i - offset;
959
+ }
960
+
961
+ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
962
+ cmark_chunk *output) {
963
+ bufsize_t i = offset;
964
+
965
+ if (i < input->len && input->data[i] == '<') {
966
+ ++i;
967
+ while (i < input->len) {
968
+ if (input->data[i] == '>') {
969
+ ++i;
970
+ break;
971
+ } else if (input->data[i] == '\\')
972
+ i += 2;
973
+ else if (cmark_isspace(input->data[i]) || input->data[i] == '<')
974
+ return manual_scan_link_url_2(input, offset, output);
975
+ else
976
+ ++i;
977
+ }
978
+ } else {
979
+ return manual_scan_link_url_2(input, offset, output);
886
980
  }
887
981
 
888
982
  if (i >= input->len)
889
983
  return -1;
984
+
985
+ {
986
+ cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
987
+ *output = result;
988
+ }
890
989
  return i - offset;
891
990
  }
991
+
892
992
  // Return a link, an image, or a literal close bracket.
893
993
  static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
894
994
  bufsize_t initial_pos, after_link_text_pos;
895
- bufsize_t starturl, endurl, starttitle, endtitle, endall;
896
- bufsize_t n;
897
- bufsize_t sps;
995
+ bufsize_t endurl, starttitle, endtitle, endall;
996
+ bufsize_t sps, n;
898
997
  cmark_reference *ref = NULL;
899
998
  cmark_chunk url_chunk, title_chunk;
900
999
  cmark_chunk url, title;
@@ -912,13 +1011,13 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
912
1011
  opener = subj->last_bracket;
913
1012
 
914
1013
  if (opener == NULL) {
915
- return make_str(subj->mem, cmark_chunk_literal("]"));
1014
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
916
1015
  }
917
1016
 
918
1017
  if (!opener->active) {
919
1018
  // take delimiter off stack
920
1019
  pop_bracket(subj);
921
- return make_str(subj->mem, cmark_chunk_literal("]"));
1020
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
922
1021
  }
923
1022
 
924
1023
  // If we got here, we matched a potential link/image text.
@@ -930,11 +1029,11 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
930
1029
  // First, look for an inline link.
931
1030
  if (peek_char(subj) == '(' &&
932
1031
  ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
933
- ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
1032
+ ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
1033
+ &url_chunk)) > -1)) {
934
1034
 
935
1035
  // try to parse an explicit link:
936
- starturl = subj->pos + 1 + sps; // after (
937
- endurl = starturl + n;
1036
+ endurl = subj->pos + 1 + sps + n;
938
1037
  starttitle = endurl + scan_spacechars(&subj->input, endurl);
939
1038
 
940
1039
  // ensure there are spaces btw url and title
@@ -947,7 +1046,6 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
947
1046
  if (peek_at(subj, endall) == ')') {
948
1047
  subj->pos = endall + 1;
949
1048
 
950
- url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
951
1049
  title_chunk =
952
1050
  cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
953
1051
  url = cmark_clean_url(subj->mem, &url_chunk);
@@ -996,12 +1094,15 @@ noMatch:
996
1094
  // If we fall through to here, it means we didn't match a link:
997
1095
  pop_bracket(subj); // remove this opener from delimiter list
998
1096
  subj->pos = initial_pos;
999
- return make_str(subj->mem, cmark_chunk_literal("]"));
1097
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1000
1098
 
1001
1099
  match:
1002
1100
  inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
1003
1101
  inl->as.link.url = url;
1004
1102
  inl->as.link.title = title;
1103
+ inl->start_line = inl->end_line = subj->line;
1104
+ inl->start_column = opener->inl_text->start_column;
1105
+ inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1005
1106
  cmark_node_insert_before(opener->inl_text, inl);
1006
1107
  // Add link text:
1007
1108
  tmp = opener->inl_text->next;
@@ -1048,6 +1149,8 @@ static cmark_node *handle_newline(subject *subj) {
1048
1149
  if (peek_at(subj, subj->pos) == '\n') {
1049
1150
  advance(subj);
1050
1151
  }
1152
+ ++subj->line;
1153
+ subj->column_offset = -subj->pos;
1051
1154
  // skip spaces at beginning of line
1052
1155
  skip_spaces(subj);
1053
1156
  if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
@@ -1101,12 +1204,16 @@ static bufsize_t subject_find_special_char(subject *subj, int options) {
1101
1204
  return subj->input.len;
1102
1205
  }
1103
1206
 
1104
- void cmark_inlines_add_special_character(unsigned char c) {
1207
+ void cmark_inlines_add_special_character(unsigned char c, bool emphasis) {
1105
1208
  SPECIAL_CHARS[c] = 1;
1209
+ if (emphasis)
1210
+ SKIP_CHARS[c] = 1;
1106
1211
  }
1107
1212
 
1108
- void cmark_inlines_remove_special_character(unsigned char c) {
1213
+ void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) {
1109
1214
  SPECIAL_CHARS[c] = 0;
1215
+ if (emphasis)
1216
+ SKIP_CHARS[c] = 0;
1110
1217
  }
1111
1218
 
1112
1219
  static cmark_node *try_extensions(cmark_parser *parser,
@@ -1133,7 +1240,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1133
1240
  cmark_node *new_inl = NULL;
1134
1241
  cmark_chunk contents;
1135
1242
  unsigned char c;
1136
- bufsize_t endpos;
1243
+ bufsize_t startpos, endpos;
1137
1244
  c = peek_char(subj);
1138
1245
  if (c == 0) {
1139
1246
  return 0;
@@ -1144,7 +1251,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1144
1251
  new_inl = handle_newline(subj);
1145
1252
  break;
1146
1253
  case '`':
1147
- new_inl = handle_backticks(subj);
1254
+ new_inl = handle_backticks(subj, options);
1148
1255
  break;
1149
1256
  case '\\':
1150
1257
  new_inl = handle_backslash(parser, subj);
@@ -1153,7 +1260,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1153
1260
  new_inl = handle_entity(subj);
1154
1261
  break;
1155
1262
  case '<':
1156
- new_inl = handle_pointy_brace(subj, (options & CMARK_OPT_LIBERAL_HTML_TAG) != 0);
1263
+ new_inl = handle_pointy_brace(subj, options);
1157
1264
  break;
1158
1265
  case '*':
1159
1266
  case '_':
@@ -1169,7 +1276,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1169
1276
  break;
1170
1277
  case '[':
1171
1278
  advance(subj);
1172
- new_inl = make_str(subj->mem, cmark_chunk_literal("["));
1279
+ new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
1173
1280
  push_bracket(subj, false, new_inl);
1174
1281
  break;
1175
1282
  case ']':
@@ -1179,10 +1286,10 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1179
1286
  advance(subj);
1180
1287
  if (peek_char(subj) == '[') {
1181
1288
  advance(subj);
1182
- new_inl = make_str(subj->mem, cmark_chunk_literal("!["));
1289
+ new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
1183
1290
  push_bracket(subj, true, new_inl);
1184
1291
  } else {
1185
- new_inl = make_str(subj->mem, cmark_chunk_literal("!"));
1292
+ new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
1186
1293
  }
1187
1294
  break;
1188
1295
  default:
@@ -1192,6 +1299,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1192
1299
 
1193
1300
  endpos = subject_find_special_char(subj, options);
1194
1301
  contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1302
+ startpos = subj->pos;
1195
1303
  subj->pos = endpos;
1196
1304
 
1197
1305
  // if we're at a newline, strip trailing spaces.
@@ -1199,7 +1307,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
1199
1307
  cmark_chunk_rtrim(&contents);
1200
1308
  }
1201
1309
 
1202
- new_inl = make_str(subj->mem, contents);
1310
+ new_inl = make_str(subj, startpos, endpos - 1, contents);
1203
1311
  }
1204
1312
  if (new_inl != NULL) {
1205
1313
  cmark_node_append_child(parent, new_inl);
@@ -1214,7 +1322,7 @@ void cmark_parse_inlines(cmark_parser *parser,
1214
1322
  cmark_reference_map *refmap,
1215
1323
  int options) {
1216
1324
  subject subj;
1217
- subject_from_buf(parser->mem, &subj, &parent->content, refmap);
1325
+ subject_from_buf(parser->mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &parent->content, refmap);
1218
1326
  cmark_chunk_rtrim(&subj.input);
1219
1327
 
1220
1328
  while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options))
@@ -1253,7 +1361,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
1253
1361
  bufsize_t matchlen = 0;
1254
1362
  bufsize_t beforetitle;
1255
1363
 
1256
- subject_from_buf(mem, &subj, input, NULL);
1364
+ subject_from_buf(mem, -1, 0, &subj, input, NULL);
1257
1365
 
1258
1366
  // parse label:
1259
1367
  if (!link_label(&subj, &lab) || lab.len == 0)
@@ -1268,9 +1376,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
1268
1376
 
1269
1377
  // parse link url:
1270
1378
  spnl(&subj);
1271
- matchlen = manual_scan_link_url(&subj.input, subj.pos);
1272
- if (matchlen > 0) {
1273
- url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1379
+ if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
1380
+ url.len > 0) {
1274
1381
  subj.pos += matchlen;
1275
1382
  } else {
1276
1383
  return 0;
@@ -1425,6 +1532,10 @@ void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) {
1425
1532
  parser->pos = offset;
1426
1533
  }
1427
1534
 
1535
+ int cmark_inline_parser_get_column(cmark_inline_parser *parser) {
1536
+ return parser->pos + 1 + parser->column_offset + parser->block_offset;
1537
+ }
1538
+
1428
1539
  cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) {
1429
1540
  return &parser->input;
1430
1541
  }
@@ -1453,3 +1564,7 @@ void cmark_node_unput(cmark_node *node, int n) {
1453
1564
  delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) {
1454
1565
  return parser->last_delim;
1455
1566
  }
1567
+
1568
+ int cmark_inline_parser_get_line(cmark_inline_parser *parser) {
1569
+ return parser->line;
1570
+ }