markly 0.6.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/markly/table.c CHANGED
@@ -114,60 +114,87 @@ static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsi
114
114
  static table_row *row_from_string(cmark_syntax_extension *self,
115
115
  cmark_parser *parser, unsigned char *string,
116
116
  int len) {
117
+ // Parses a single table row. It has the following form:
118
+ // `delim? table_cell (delim table_cell)* delim? newline`
119
+ // Note that cells are allowed to be empty.
120
+ //
121
+ // From the GitHub-flavored Markdown specification:
122
+ //
123
+ // > Each row consists of cells containing arbitrary text, in which inlines
124
+ // > are parsed, separated by pipes (|). A leading and trailing pipe is also
125
+ // > recommended for clarity of reading, and if there’s otherwise parsing
126
+ // > ambiguity.
127
+
117
128
  table_row *row = NULL;
118
129
  bufsize_t cell_matched = 1, pipe_matched = 1, offset;
119
- int cell_end_offset;
130
+ int expect_more_cells = 1;
131
+ int row_end_offset = 0;
120
132
 
121
133
  row = (table_row *)parser->mem->calloc(1, sizeof(table_row));
122
134
  row->n_columns = 0;
123
135
  row->cells = NULL;
124
136
 
137
+ // Scan past the (optional) leading pipe.
125
138
  offset = scan_table_cell_end(string, len, 0);
126
139
 
127
140
  // Parse the cells of the row. Stop if we reach the end of the input, or if we
128
141
  // cannot detect any more cells.
129
- while (offset < len && (cell_matched || pipe_matched)) {
142
+ while (offset < len && expect_more_cells) {
130
143
  cell_matched = scan_table_cell(string, len, offset);
131
144
  pipe_matched = scan_table_cell_end(string, len, offset + cell_matched);
132
145
 
133
146
  if (cell_matched || pipe_matched) {
134
- cell_end_offset = offset + cell_matched - 1;
147
+ // We are guaranteed to have a cell, since (1) either we found some
148
+ // content and cell_matched, or (2) we found an empty cell followed by a
149
+ // pipe.
150
+ cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
151
+ cell_matched);
152
+ cmark_strbuf_trim(cell_buf);
153
+
154
+ node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
155
+ cell->buf = cell_buf;
156
+ cell->start_offset = offset;
157
+ cell->end_offset = offset + cell_matched - 1;
158
+
159
+ while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
160
+ --cell->start_offset;
161
+ ++cell->internal_offset;
162
+ }
163
+
164
+ row->n_columns += 1;
165
+ row->cells = cmark_llist_append(parser->mem, row->cells, cell);
166
+ }
167
+
168
+ offset += cell_matched + pipe_matched;
169
+
170
+ if (pipe_matched) {
171
+ expect_more_cells = 1;
172
+ } else {
173
+ // We've scanned the last cell. Check if we have reached the end of the row
174
+ row_end_offset = scan_table_row_end(string, len, offset);
175
+ offset += row_end_offset;
135
176
 
136
- if (string[cell_end_offset] == '\n' || string[cell_end_offset] == '\r') {
137
- row->paragraph_offset = cell_end_offset;
177
+ // If the end of the row is not the end of the input,
178
+ // the row is not a real row but potentially part of the paragraph
179
+ // preceding the table.
180
+ if (row_end_offset && offset != len) {
181
+ row->paragraph_offset = offset;
138
182
 
139
183
  cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell);
140
184
  row->cells = NULL;
141
185
  row->n_columns = 0;
142
- } else {
143
- cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
144
- cell_matched);
145
- cmark_strbuf_trim(cell_buf);
146
-
147
- node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
148
- cell->buf = cell_buf;
149
- cell->start_offset = offset;
150
- cell->end_offset = cell_end_offset;
151
-
152
- while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
153
- --cell->start_offset;
154
- ++cell->internal_offset;
155
- }
156
186
 
157
- row->n_columns += 1;
158
- row->cells = cmark_llist_append(parser->mem, row->cells, cell);
159
- }
160
- }
187
+ // Scan past the (optional) leading pipe.
188
+ offset += scan_table_cell_end(string, len, offset);
161
189
 
162
- offset += cell_matched + pipe_matched;
163
-
164
- if (!pipe_matched) {
165
- pipe_matched = scan_table_row_end(string, len, offset);
166
- offset += pipe_matched;
190
+ expect_more_cells = 1;
191
+ } else {
192
+ expect_more_cells = 0;
193
+ }
167
194
  }
168
195
  }
169
196
 
170
- if (offset != len || !row->n_columns) {
197
+ if (offset != len || row->n_columns == 0) {
171
198
  free_table_row(parser->mem, row);
172
199
  row = NULL;
173
200
  }
@@ -199,8 +226,6 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
199
226
  cmark_parser *parser,
200
227
  cmark_node *parent_container,
201
228
  unsigned char *input, int len) {
202
- bufsize_t matched =
203
- scan_table_start(input, len, cmark_parser_get_first_nonspace(parser));
204
229
  cmark_node *table_header;
205
230
  table_row *header_row = NULL;
206
231
  table_row *marker_row = NULL;
@@ -208,41 +233,37 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
208
233
  const char *parent_string;
209
234
  uint16_t i;
210
235
 
211
- if (!matched)
212
- return parent_container;
213
-
214
- parent_string = cmark_node_get_string_content(parent_container);
215
-
216
- cmark_arena_push();
217
-
218
- header_row = row_from_string(self, parser, (unsigned char *)parent_string,
219
- (int)strlen(parent_string));
220
-
221
- if (!header_row) {
222
- free_table_row(parser->mem, header_row);
223
- cmark_arena_pop();
236
+ if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) {
224
237
  return parent_container;
225
238
  }
226
239
 
240
+ // Since scan_table_start was successful, we must have a marker row.
227
241
  marker_row = row_from_string(self, parser,
228
242
  input + cmark_parser_get_first_nonspace(parser),
229
243
  len - cmark_parser_get_first_nonspace(parser));
230
-
231
244
  assert(marker_row);
232
245
 
233
- if (header_row->n_columns != marker_row->n_columns) {
234
- free_table_row(parser->mem, header_row);
246
+ cmark_arena_push();
247
+
248
+ // Check for a matching header row. We call `row_from_string` with the entire
249
+ // (potentially long) parent container as input, but this should be safe since
250
+ // `row_from_string` bails out early if it does not find a row.
251
+ parent_string = cmark_node_get_string_content(parent_container);
252
+ header_row = row_from_string(self, parser, (unsigned char *)parent_string,
253
+ (int)strlen(parent_string));
254
+ if (!header_row || header_row->n_columns != marker_row->n_columns) {
235
255
  free_table_row(parser->mem, marker_row);
256
+ free_table_row(parser->mem, header_row);
236
257
  cmark_arena_pop();
237
258
  return parent_container;
238
259
  }
239
260
 
240
261
  if (cmark_arena_pop()) {
262
+ marker_row = row_from_string(
263
+ self, parser, input + cmark_parser_get_first_nonspace(parser),
264
+ len - cmark_parser_get_first_nonspace(parser));
241
265
  header_row = row_from_string(self, parser, (unsigned char *)parent_string,
242
266
  (int)strlen(parent_string));
243
- marker_row = row_from_string(self, parser,
244
- input + cmark_parser_get_first_nonspace(parser),
245
- len - cmark_parser_get_first_nonspace(parser));
246
267
  }
247
268
 
248
269
  if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) {
@@ -257,9 +278,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
257
278
  }
258
279
 
259
280
  cmark_node_set_syntax_extension(parent_container, self);
260
-
261
281
  parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table));
262
-
263
282
  set_n_table_columns(parent_container, header_row->n_columns);
264
283
 
265
284
  uint8_t *alignments =
Binary file
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require 'stringio'
5
+
6
+ module Markly
7
+ module Renderer
8
+ class Generic
9
+ def initialize(flags: DEFAULT, extensions: [])
10
+ @flags = flags
11
+ @stream = StringIO.new(+'')
12
+ @need_blocksep = false
13
+ @in_tight = false
14
+ @in_plain = false
15
+ @tagfilter = extensions.include?(:tagfilter)
16
+ end
17
+
18
+ attr_accessor :in_tight
19
+ attr_accessor :in_plain
20
+
21
+ def out(*args)
22
+ args.each do |arg|
23
+ if arg == :children
24
+ @node.each { |child| out(child) }
25
+ elsif arg.is_a?(Array)
26
+ arg.each { |x| render(x) }
27
+ elsif arg.is_a?(Node)
28
+ render(arg)
29
+ else
30
+ @stream.write(arg)
31
+ end
32
+ end
33
+ end
34
+
35
+ def render(node)
36
+ @node = node
37
+ if node.type == :document
38
+ document(node)
39
+ @stream.string
40
+ elsif @in_plain && node.type != :text && node.type != :softbreak
41
+ node.each { |child| render(child) }
42
+ else
43
+ send(node.type, node)
44
+ end
45
+ end
46
+
47
+ def document(_node)
48
+ out(:children)
49
+ end
50
+
51
+ def code_block(node)
52
+ code_block(node)
53
+ end
54
+
55
+ def reference_def(_node); end
56
+
57
+ def cr
58
+ return if @stream.string.empty? || @stream.string[-1] == "\n"
59
+
60
+ out("\n")
61
+ end
62
+
63
+ def blocksep
64
+ out("\n")
65
+ end
66
+
67
+ def containersep
68
+ cr unless @in_tight
69
+ end
70
+
71
+ def block
72
+ cr
73
+ yield
74
+ cr
75
+ end
76
+
77
+ def container(starter, ender)
78
+ out(starter)
79
+ yield
80
+ out(ender)
81
+ end
82
+
83
+ def plain
84
+ old_in_plain = @in_plain
85
+ @in_plain = true
86
+ yield
87
+ @in_plain = old_in_plain
88
+ end
89
+
90
+ private
91
+
92
+ def escape_href(str)
93
+ @node.html_escape_href(str)
94
+ end
95
+
96
+ def escape_html(str)
97
+ @node.html_escape_html(str)
98
+ end
99
+
100
+ def tagfilter(str)
101
+ if @tagfilter
102
+ str.gsub(
103
+ %r{
104
+ <
105
+ (
106
+ title|textarea|style|xmp|iframe|
107
+ noembed|noframes|script|plaintext
108
+ )
109
+ (?=\s|>|/>)
110
+ }xi,
111
+ '&lt;\1'
112
+ )
113
+ else
114
+ str
115
+ end
116
+ end
117
+
118
+ def source_position(node)
119
+ return '' unless flag_enabled?(SOURCE_POSITION)
120
+
121
+ s = node.source_position
122
+ " data-sourcepos=\"#{s[:start_line]}:#{s[:start_column]}-" \
123
+ "#{s[:end_line]}:#{s[:end_column]}\""
124
+ end
125
+
126
+ def flag_enabled?(flag)
127
+ (@flags & flag) != 0
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,282 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'generic'
4
+ require 'cgi'
5
+
6
+ module Markly
7
+ module Renderer
8
+ class HTML < Generic
9
+ def initialize(ids: false, tight: false, **options)
10
+ super(**options)
11
+
12
+ @ids = ids
13
+ @section = nil
14
+ @tight = tight
15
+ end
16
+
17
+ def document(_)
18
+ @section = false
19
+ super
20
+ out("</ol>\n</section>\n") if @written_footnote_ix
21
+ out("</section>") if @section
22
+ end
23
+
24
+ def id_for(node)
25
+ if @ids
26
+ id = node.to_plaintext.chomp.downcase.gsub(/\s+/, '-')
27
+
28
+ return " id=\"#{CGI.escape_html id}\""
29
+ end
30
+ end
31
+
32
+ def header(node)
33
+ block do
34
+ if @ids
35
+ out('</section>') if @section
36
+ @section = true
37
+ out("<section#{id_for(node)}>")
38
+ end
39
+
40
+ out('<h', node.header_level, "#{source_position(node)}>", :children,
41
+ '</h', node.header_level, '>')
42
+ end
43
+ end
44
+
45
+ def paragraph(node)
46
+ if @tight && node.parent.type != :blockquote
47
+ out(:children)
48
+ else
49
+ block do
50
+ container("<p#{source_position(node)}>", '</p>') do
51
+ out(:children)
52
+ if node.parent.type == :footnote_definition && node.next.nil?
53
+ out(' ')
54
+ out_footnote_backref
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ def list(node)
62
+ old_tight = @tight
63
+ @tight = node.list_tight
64
+
65
+ block do
66
+ if node.list_type == :bullet_list
67
+ container("<ul#{source_position(node)}>\n", '</ul>') do
68
+ out(:children)
69
+ end
70
+ else
71
+ start = if node.list_start == 1
72
+ "<ol#{source_position(node)}>\n"
73
+ else
74
+ "<ol start=\"#{node.list_start}\"#{source_position(node)}>\n"
75
+ end
76
+ container(start, '</ol>') do
77
+ out(:children)
78
+ end
79
+ end
80
+ end
81
+
82
+ @tight = old_tight
83
+ end
84
+
85
+ def list_item(node)
86
+ block do
87
+ tasklist_data = tasklist(node)
88
+ container("<li#{source_position(node)}#{tasklist_data}>#{' ' if tasklist?(node)}", '</li>') do
89
+ out(:children)
90
+ end
91
+ end
92
+ end
93
+
94
+ def tasklist(node)
95
+ return '' unless tasklist?(node)
96
+
97
+ state = if checked?(node)
98
+ 'checked="" disabled=""'
99
+ else
100
+ 'disabled=""'
101
+ end
102
+ "><input type=\"checkbox\" #{state} /"
103
+ end
104
+
105
+ def blockquote(node)
106
+ block do
107
+ container("<blockquote#{source_position(node)}>\n", '</blockquote>') do
108
+ out(:children)
109
+ end
110
+ end
111
+ end
112
+
113
+ def hrule(node)
114
+ block do
115
+ out("<hr#{source_position(node)} />")
116
+ end
117
+ end
118
+
119
+ def code_block(node)
120
+ block do
121
+ if flag_enabled?(GITHUB_PRE_LANG)
122
+ out("<pre#{source_position(node)}")
123
+ out(' lang="', node.fence_info.split(/\s+/)[0], '"') if node.fence_info && !node.fence_info.empty?
124
+ out('><code>')
125
+ else
126
+ out("<pre#{source_position(node)}><code")
127
+ if node.fence_info && !node.fence_info.empty?
128
+ out(' class="language-', node.fence_info.split(/\s+/)[0], '">')
129
+ else
130
+ out('>')
131
+ end
132
+ end
133
+ out(escape_html(node.string_content))
134
+ out('</code></pre>')
135
+ end
136
+ end
137
+
138
+ def html(node)
139
+ block do
140
+ if flag_enabled?(UNSAFE)
141
+ out(tagfilter(node.string_content))
142
+ else
143
+ out('<!-- raw HTML omitted -->')
144
+ end
145
+ end
146
+ end
147
+
148
+ def inline_html(node)
149
+ if flag_enabled?(UNSAFE)
150
+ out(tagfilter(node.string_content))
151
+ else
152
+ out('<!-- raw HTML omitted -->')
153
+ end
154
+ end
155
+
156
+ def emph(_)
157
+ out('<em>', :children, '</em>')
158
+ end
159
+
160
+ def strong(_)
161
+ out('<strong>', :children, '</strong>')
162
+ end
163
+
164
+ def link(node)
165
+ out('<a href="', node.url.nil? ? '' : escape_href(node.url), '"')
166
+ out(' title="', escape_html(node.title), '"') if node.title && !node.title.empty?
167
+ out('>', :children, '</a>')
168
+ end
169
+
170
+ def image(node)
171
+ out('<img src="', escape_href(node.url), '"')
172
+ plain do
173
+ out(' alt="', :children, '"')
174
+ end
175
+ out(' title="', escape_html(node.title), '"') if node.title && !node.title.empty?
176
+ out(' />')
177
+ end
178
+
179
+ def text(node)
180
+ out(escape_html(node.string_content))
181
+ end
182
+
183
+ def code(node)
184
+ out('<code>')
185
+ out(escape_html(node.string_content))
186
+ out('</code>')
187
+ end
188
+
189
+ def linebreak(_node)
190
+ out("<br />\n")
191
+ end
192
+
193
+ def softbreak(_)
194
+ if flag_enabled?(HARD_BREAKS)
195
+ out("<br />\n")
196
+ elsif flag_enabled?(NO_BREAKS)
197
+ out(' ')
198
+ else
199
+ out("\n")
200
+ end
201
+ end
202
+
203
+ def table(node)
204
+ @alignments = node.table_alignments
205
+ @needs_close_tbody = false
206
+ out("<table#{source_position(node)}>\n", :children)
207
+ out("</tbody>\n") if @needs_close_tbody
208
+ out("</table>\n")
209
+ end
210
+
211
+ def table_header(node)
212
+ @column_index = 0
213
+
214
+ @in_header = true
215
+ out("<thead>\n<tr#{source_position(node)}>\n", :children, "</tr>\n</thead>\n")
216
+ @in_header = false
217
+ end
218
+
219
+ def table_row(node)
220
+ @column_index = 0
221
+ if !@in_header && !@needs_close_tbody
222
+ @needs_close_tbody = true
223
+ out("<tbody>\n")
224
+ end
225
+ out("<tr#{source_position(node)}>\n", :children, "</tr>\n")
226
+ end
227
+
228
+ def table_cell(node)
229
+ align = case @alignments[@column_index]
230
+ when :left then ' align="left"'
231
+ when :right then ' align="right"'
232
+ when :center then ' align="center"'
233
+ else; ''
234
+ end
235
+ out(@in_header ? "<th#{align}#{source_position(node)}>" : "<td#{align}#{source_position(node)}>", :children, @in_header ? "</th>\n" : "</td>\n")
236
+ @column_index += 1
237
+ end
238
+
239
+ def strikethrough(_)
240
+ out('<del>', :children, '</del>')
241
+ end
242
+
243
+ def footnote_reference(node)
244
+ out("<sup class=\"footnote-ref\"><a href=\"#fn#{node.string_content}\" id=\"fnref#{node.string_content}\">#{node.string_content}</a></sup>")
245
+ out(node.to_html)
246
+ end
247
+
248
+ def footnote_definition(_)
249
+ unless @footnote_ix
250
+ out("<section class=\"footnotes\" data-footnotes>\n<ol>\n")
251
+ @footnote_ix = 0
252
+ end
253
+
254
+ @footnote_ix += 1
255
+ out("<li id=\"fn#{@footnote_ix}\">\n", :children)
256
+ out("\n") if out_footnote_backref
257
+ out("</li>\n")
258
+ # </ol>
259
+ # </section>
260
+ end
261
+
262
+ private
263
+
264
+ def out_footnote_backref
265
+ return false if @written_footnote_ix == @footnote_ix
266
+
267
+ @written_footnote_ix = @footnote_ix
268
+
269
+ out("<a href=\"#fnref#{@footnote_ix}\" class=\"footnote-backref\">↩</a>")
270
+ true
271
+ end
272
+
273
+ def tasklist?(node)
274
+ node.type_string == 'tasklist'
275
+ end
276
+
277
+ def checked?(node)
278
+ node.tasklist_item_checked?
279
+ end
280
+ end
281
+ end
282
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Markly
4
- VERSION = '0.6.1'
4
+ VERSION = '0.7.0'
5
5
  end
data/lib/markly.rb CHANGED
@@ -6,8 +6,8 @@ require 'markly/markly'
6
6
 
7
7
  require_relative 'markly/flags'
8
8
  require_relative 'markly/node'
9
- require_relative 'markly/renderer'
10
- require_relative 'markly/renderer/html_renderer'
9
+ require_relative 'markly/renderer/html'
10
+
11
11
  require_relative 'markly/version'
12
12
 
13
13
  module Markly
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2021-09-03 00:00:00.000000000 Z
13
+ date: 2022-01-17 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bake
@@ -150,11 +150,11 @@ files:
150
150
  - ext/markly/xml.c
151
151
  - lib/markly.rb
152
152
  - lib/markly/flags.rb
153
- - lib/markly/markly.so
153
+ - lib/markly/markly.bundle
154
154
  - lib/markly/node.rb
155
155
  - lib/markly/node/inspect.rb
156
- - lib/markly/renderer.rb
157
- - lib/markly/renderer/html_renderer.rb
156
+ - lib/markly/renderer/generic.rb
157
+ - lib/markly/renderer/html.rb
158
158
  - lib/markly/version.rb
159
159
  homepage: https://github.com/ioquatix/markly
160
160
  licenses:
@@ -176,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
176
176
  - !ruby/object:Gem::Version
177
177
  version: '0'
178
178
  requirements: []
179
- rubygems_version: 3.1.6
179
+ rubygems_version: 3.2.32
180
180
  signing_key:
181
181
  specification_version: 4
182
182
  summary: CommonMark parser and renderer. Written in C, wrapped in Ruby.
data/lib/markly/markly.so DELETED
Binary file