markly 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/markly/table.c CHANGED
@@ -114,60 +114,87 @@ static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsi
114
114
  static table_row *row_from_string(cmark_syntax_extension *self,
115
115
  cmark_parser *parser, unsigned char *string,
116
116
  int len) {
117
+ // Parses a single table row. It has the following form:
118
+ // `delim? table_cell (delim table_cell)* delim? newline`
119
+ // Note that cells are allowed to be empty.
120
+ //
121
+ // From the GitHub-flavored Markdown specification:
122
+ //
123
+ // > Each row consists of cells containing arbitrary text, in which inlines
124
+ // > are parsed, separated by pipes (|). A leading and trailing pipe is also
125
+ // > recommended for clarity of reading, and if there’s otherwise parsing
126
+ // > ambiguity.
127
+
117
128
  table_row *row = NULL;
118
129
  bufsize_t cell_matched = 1, pipe_matched = 1, offset;
119
- int cell_end_offset;
130
+ int expect_more_cells = 1;
131
+ int row_end_offset = 0;
120
132
 
121
133
  row = (table_row *)parser->mem->calloc(1, sizeof(table_row));
122
134
  row->n_columns = 0;
123
135
  row->cells = NULL;
124
136
 
137
+ // Scan past the (optional) leading pipe.
125
138
  offset = scan_table_cell_end(string, len, 0);
126
139
 
127
140
  // Parse the cells of the row. Stop if we reach the end of the input, or if we
128
141
  // cannot detect any more cells.
129
- while (offset < len && (cell_matched || pipe_matched)) {
142
+ while (offset < len && expect_more_cells) {
130
143
  cell_matched = scan_table_cell(string, len, offset);
131
144
  pipe_matched = scan_table_cell_end(string, len, offset + cell_matched);
132
145
 
133
146
  if (cell_matched || pipe_matched) {
134
- cell_end_offset = offset + cell_matched - 1;
147
+ // We are guaranteed to have a cell, since (1) either we found some
148
+ // content and cell_matched, or (2) we found an empty cell followed by a
149
+ // pipe.
150
+ cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
151
+ cell_matched);
152
+ cmark_strbuf_trim(cell_buf);
153
+
154
+ node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
155
+ cell->buf = cell_buf;
156
+ cell->start_offset = offset;
157
+ cell->end_offset = offset + cell_matched - 1;
158
+
159
+ while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
160
+ --cell->start_offset;
161
+ ++cell->internal_offset;
162
+ }
163
+
164
+ row->n_columns += 1;
165
+ row->cells = cmark_llist_append(parser->mem, row->cells, cell);
166
+ }
167
+
168
+ offset += cell_matched + pipe_matched;
169
+
170
+ if (pipe_matched) {
171
+ expect_more_cells = 1;
172
+ } else {
173
+ // We've scanned the last cell. Check if we have reached the end of the row
174
+ row_end_offset = scan_table_row_end(string, len, offset);
175
+ offset += row_end_offset;
135
176
 
136
- if (string[cell_end_offset] == '\n' || string[cell_end_offset] == '\r') {
137
- row->paragraph_offset = cell_end_offset;
177
+ // If the end of the row is not the end of the input,
178
+ // the row is not a real row but potentially part of the paragraph
179
+ // preceding the table.
180
+ if (row_end_offset && offset != len) {
181
+ row->paragraph_offset = offset;
138
182
 
139
183
  cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell);
140
184
  row->cells = NULL;
141
185
  row->n_columns = 0;
142
- } else {
143
- cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
144
- cell_matched);
145
- cmark_strbuf_trim(cell_buf);
146
-
147
- node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell));
148
- cell->buf = cell_buf;
149
- cell->start_offset = offset;
150
- cell->end_offset = cell_end_offset;
151
-
152
- while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') {
153
- --cell->start_offset;
154
- ++cell->internal_offset;
155
- }
156
186
 
157
- row->n_columns += 1;
158
- row->cells = cmark_llist_append(parser->mem, row->cells, cell);
159
- }
160
- }
187
+ // Scan past the (optional) leading pipe.
188
+ offset += scan_table_cell_end(string, len, offset);
161
189
 
162
- offset += cell_matched + pipe_matched;
163
-
164
- if (!pipe_matched) {
165
- pipe_matched = scan_table_row_end(string, len, offset);
166
- offset += pipe_matched;
190
+ expect_more_cells = 1;
191
+ } else {
192
+ expect_more_cells = 0;
193
+ }
167
194
  }
168
195
  }
169
196
 
170
- if (offset != len || !row->n_columns) {
197
+ if (offset != len || row->n_columns == 0) {
171
198
  free_table_row(parser->mem, row);
172
199
  row = NULL;
173
200
  }
@@ -199,8 +226,6 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
199
226
  cmark_parser *parser,
200
227
  cmark_node *parent_container,
201
228
  unsigned char *input, int len) {
202
- bufsize_t matched =
203
- scan_table_start(input, len, cmark_parser_get_first_nonspace(parser));
204
229
  cmark_node *table_header;
205
230
  table_row *header_row = NULL;
206
231
  table_row *marker_row = NULL;
@@ -208,41 +233,37 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
208
233
  const char *parent_string;
209
234
  uint16_t i;
210
235
 
211
- if (!matched)
212
- return parent_container;
213
-
214
- parent_string = cmark_node_get_string_content(parent_container);
215
-
216
- cmark_arena_push();
217
-
218
- header_row = row_from_string(self, parser, (unsigned char *)parent_string,
219
- (int)strlen(parent_string));
220
-
221
- if (!header_row) {
222
- free_table_row(parser->mem, header_row);
223
- cmark_arena_pop();
236
+ if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) {
224
237
  return parent_container;
225
238
  }
226
239
 
240
+ // Since scan_table_start was successful, we must have a marker row.
227
241
  marker_row = row_from_string(self, parser,
228
242
  input + cmark_parser_get_first_nonspace(parser),
229
243
  len - cmark_parser_get_first_nonspace(parser));
230
-
231
244
  assert(marker_row);
232
245
 
233
- if (header_row->n_columns != marker_row->n_columns) {
234
- free_table_row(parser->mem, header_row);
246
+ cmark_arena_push();
247
+
248
+ // Check for a matching header row. We call `row_from_string` with the entire
249
+ // (potentially long) parent container as input, but this should be safe since
250
+ // `row_from_string` bails out early if it does not find a row.
251
+ parent_string = cmark_node_get_string_content(parent_container);
252
+ header_row = row_from_string(self, parser, (unsigned char *)parent_string,
253
+ (int)strlen(parent_string));
254
+ if (!header_row || header_row->n_columns != marker_row->n_columns) {
235
255
  free_table_row(parser->mem, marker_row);
256
+ free_table_row(parser->mem, header_row);
236
257
  cmark_arena_pop();
237
258
  return parent_container;
238
259
  }
239
260
 
240
261
  if (cmark_arena_pop()) {
262
+ marker_row = row_from_string(
263
+ self, parser, input + cmark_parser_get_first_nonspace(parser),
264
+ len - cmark_parser_get_first_nonspace(parser));
241
265
  header_row = row_from_string(self, parser, (unsigned char *)parent_string,
242
266
  (int)strlen(parent_string));
243
- marker_row = row_from_string(self, parser,
244
- input + cmark_parser_get_first_nonspace(parser),
245
- len - cmark_parser_get_first_nonspace(parser));
246
267
  }
247
268
 
248
269
  if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) {
@@ -257,9 +278,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
257
278
  }
258
279
 
259
280
  cmark_node_set_syntax_extension(parent_container, self);
260
-
261
281
  parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table));
262
-
263
282
  set_n_table_columns(parent_container, header_row->n_columns);
264
283
 
265
284
  uint8_t *alignments =
Binary file
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require 'stringio'
5
+
6
+ module Markly
7
+ module Renderer
8
+ class Generic
9
+ def initialize(flags: DEFAULT, extensions: [])
10
+ @flags = flags
11
+ @stream = StringIO.new(+'')
12
+ @need_blocksep = false
13
+ @in_tight = false
14
+ @in_plain = false
15
+ @tagfilter = extensions.include?(:tagfilter)
16
+ end
17
+
18
+ attr_accessor :in_tight
19
+ attr_accessor :in_plain
20
+
21
+ def out(*args)
22
+ args.each do |arg|
23
+ if arg == :children
24
+ @node.each { |child| out(child) }
25
+ elsif arg.is_a?(Array)
26
+ arg.each { |x| render(x) }
27
+ elsif arg.is_a?(Node)
28
+ render(arg)
29
+ else
30
+ @stream.write(arg)
31
+ end
32
+ end
33
+ end
34
+
35
+ def render(node)
36
+ @node = node
37
+ if node.type == :document
38
+ document(node)
39
+ @stream.string
40
+ elsif @in_plain && node.type != :text && node.type != :softbreak
41
+ node.each { |child| render(child) }
42
+ else
43
+ send(node.type, node)
44
+ end
45
+ end
46
+
47
+ def document(_node)
48
+ out(:children)
49
+ end
50
+
51
+ def code_block(node)
52
+ code_block(node)
53
+ end
54
+
55
+ def reference_def(_node); end
56
+
57
+ def cr
58
+ return if @stream.string.empty? || @stream.string[-1] == "\n"
59
+
60
+ out("\n")
61
+ end
62
+
63
+ def blocksep
64
+ out("\n")
65
+ end
66
+
67
+ def containersep
68
+ cr unless @in_tight
69
+ end
70
+
71
+ def block
72
+ cr
73
+ yield
74
+ cr
75
+ end
76
+
77
+ def container(starter, ender)
78
+ out(starter)
79
+ yield
80
+ out(ender)
81
+ end
82
+
83
+ def plain
84
+ old_in_plain = @in_plain
85
+ @in_plain = true
86
+ yield
87
+ @in_plain = old_in_plain
88
+ end
89
+
90
+ private
91
+
92
+ def escape_href(str)
93
+ @node.html_escape_href(str)
94
+ end
95
+
96
+ def escape_html(str)
97
+ @node.html_escape_html(str)
98
+ end
99
+
100
+ def tagfilter(str)
101
+ if @tagfilter
102
+ str.gsub(
103
+ %r{
104
+ <
105
+ (
106
+ title|textarea|style|xmp|iframe|
107
+ noembed|noframes|script|plaintext
108
+ )
109
+ (?=\s|>|/>)
110
+ }xi,
111
+ '&lt;\1'
112
+ )
113
+ else
114
+ str
115
+ end
116
+ end
117
+
118
+ def source_position(node)
119
+ return '' unless flag_enabled?(SOURCE_POSITION)
120
+
121
+ s = node.source_position
122
+ " data-sourcepos=\"#{s[:start_line]}:#{s[:start_column]}-" \
123
+ "#{s[:end_line]}:#{s[:end_column]}\""
124
+ end
125
+
126
+ def flag_enabled?(flag)
127
+ (@flags & flag) != 0
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,282 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'generic'
4
+ require 'cgi'
5
+
6
+ module Markly
7
+ module Renderer
8
+ class HTML < Generic
9
+ def initialize(ids: false, tight: false, **options)
10
+ super(**options)
11
+
12
+ @ids = ids
13
+ @section = nil
14
+ @tight = tight
15
+ end
16
+
17
+ def document(_)
18
+ @section = false
19
+ super
20
+ out("</ol>\n</section>\n") if @written_footnote_ix
21
+ out("</section>") if @section
22
+ end
23
+
24
+ def id_for(node)
25
+ if @ids
26
+ id = node.to_plaintext.chomp.downcase.gsub(/\s+/, '-')
27
+
28
+ return " id=\"#{CGI.escape_html id}\""
29
+ end
30
+ end
31
+
32
+ def header(node)
33
+ block do
34
+ if @ids
35
+ out('</section>') if @section
36
+ @section = true
37
+ out("<section#{id_for(node)}>")
38
+ end
39
+
40
+ out('<h', node.header_level, "#{source_position(node)}>", :children,
41
+ '</h', node.header_level, '>')
42
+ end
43
+ end
44
+
45
+ def paragraph(node)
46
+ if @tight && node.parent.type != :blockquote
47
+ out(:children)
48
+ else
49
+ block do
50
+ container("<p#{source_position(node)}>", '</p>') do
51
+ out(:children)
52
+ if node.parent.type == :footnote_definition && node.next.nil?
53
+ out(' ')
54
+ out_footnote_backref
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ def list(node)
62
+ old_tight = @tight
63
+ @tight = node.list_tight
64
+
65
+ block do
66
+ if node.list_type == :bullet_list
67
+ container("<ul#{source_position(node)}>\n", '</ul>') do
68
+ out(:children)
69
+ end
70
+ else
71
+ start = if node.list_start == 1
72
+ "<ol#{source_position(node)}>\n"
73
+ else
74
+ "<ol start=\"#{node.list_start}\"#{source_position(node)}>\n"
75
+ end
76
+ container(start, '</ol>') do
77
+ out(:children)
78
+ end
79
+ end
80
+ end
81
+
82
+ @tight = old_tight
83
+ end
84
+
85
+ def list_item(node)
86
+ block do
87
+ tasklist_data = tasklist(node)
88
+ container("<li#{source_position(node)}#{tasklist_data}>#{' ' if tasklist?(node)}", '</li>') do
89
+ out(:children)
90
+ end
91
+ end
92
+ end
93
+
94
+ def tasklist(node)
95
+ return '' unless tasklist?(node)
96
+
97
+ state = if checked?(node)
98
+ 'checked="" disabled=""'
99
+ else
100
+ 'disabled=""'
101
+ end
102
+ "><input type=\"checkbox\" #{state} /"
103
+ end
104
+
105
+ def blockquote(node)
106
+ block do
107
+ container("<blockquote#{source_position(node)}>\n", '</blockquote>') do
108
+ out(:children)
109
+ end
110
+ end
111
+ end
112
+
113
+ def hrule(node)
114
+ block do
115
+ out("<hr#{source_position(node)} />")
116
+ end
117
+ end
118
+
119
+ def code_block(node)
120
+ block do
121
+ if flag_enabled?(GITHUB_PRE_LANG)
122
+ out("<pre#{source_position(node)}")
123
+ out(' lang="', node.fence_info.split(/\s+/)[0], '"') if node.fence_info && !node.fence_info.empty?
124
+ out('><code>')
125
+ else
126
+ out("<pre#{source_position(node)}><code")
127
+ if node.fence_info && !node.fence_info.empty?
128
+ out(' class="language-', node.fence_info.split(/\s+/)[0], '">')
129
+ else
130
+ out('>')
131
+ end
132
+ end
133
+ out(escape_html(node.string_content))
134
+ out('</code></pre>')
135
+ end
136
+ end
137
+
138
+ def html(node)
139
+ block do
140
+ if flag_enabled?(UNSAFE)
141
+ out(tagfilter(node.string_content))
142
+ else
143
+ out('<!-- raw HTML omitted -->')
144
+ end
145
+ end
146
+ end
147
+
148
+ def inline_html(node)
149
+ if flag_enabled?(UNSAFE)
150
+ out(tagfilter(node.string_content))
151
+ else
152
+ out('<!-- raw HTML omitted -->')
153
+ end
154
+ end
155
+
156
+ def emph(_)
157
+ out('<em>', :children, '</em>')
158
+ end
159
+
160
+ def strong(_)
161
+ out('<strong>', :children, '</strong>')
162
+ end
163
+
164
+ def link(node)
165
+ out('<a href="', node.url.nil? ? '' : escape_href(node.url), '"')
166
+ out(' title="', escape_html(node.title), '"') if node.title && !node.title.empty?
167
+ out('>', :children, '</a>')
168
+ end
169
+
170
+ def image(node)
171
+ out('<img src="', escape_href(node.url), '"')
172
+ plain do
173
+ out(' alt="', :children, '"')
174
+ end
175
+ out(' title="', escape_html(node.title), '"') if node.title && !node.title.empty?
176
+ out(' />')
177
+ end
178
+
179
+ def text(node)
180
+ out(escape_html(node.string_content))
181
+ end
182
+
183
+ def code(node)
184
+ out('<code>')
185
+ out(escape_html(node.string_content))
186
+ out('</code>')
187
+ end
188
+
189
+ def linebreak(_node)
190
+ out("<br />\n")
191
+ end
192
+
193
+ def softbreak(_)
194
+ if flag_enabled?(HARD_BREAKS)
195
+ out("<br />\n")
196
+ elsif flag_enabled?(NO_BREAKS)
197
+ out(' ')
198
+ else
199
+ out("\n")
200
+ end
201
+ end
202
+
203
+ def table(node)
204
+ @alignments = node.table_alignments
205
+ @needs_close_tbody = false
206
+ out("<table#{source_position(node)}>\n", :children)
207
+ out("</tbody>\n") if @needs_close_tbody
208
+ out("</table>\n")
209
+ end
210
+
211
+ def table_header(node)
212
+ @column_index = 0
213
+
214
+ @in_header = true
215
+ out("<thead>\n<tr#{source_position(node)}>\n", :children, "</tr>\n</thead>\n")
216
+ @in_header = false
217
+ end
218
+
219
+ def table_row(node)
220
+ @column_index = 0
221
+ if !@in_header && !@needs_close_tbody
222
+ @needs_close_tbody = true
223
+ out("<tbody>\n")
224
+ end
225
+ out("<tr#{source_position(node)}>\n", :children, "</tr>\n")
226
+ end
227
+
228
+ def table_cell(node)
229
+ align = case @alignments[@column_index]
230
+ when :left then ' align="left"'
231
+ when :right then ' align="right"'
232
+ when :center then ' align="center"'
233
+ else; ''
234
+ end
235
+ out(@in_header ? "<th#{align}#{source_position(node)}>" : "<td#{align}#{source_position(node)}>", :children, @in_header ? "</th>\n" : "</td>\n")
236
+ @column_index += 1
237
+ end
238
+
239
+ def strikethrough(_)
240
+ out('<del>', :children, '</del>')
241
+ end
242
+
243
+ def footnote_reference(node)
244
+ out("<sup class=\"footnote-ref\"><a href=\"#fn#{node.string_content}\" id=\"fnref#{node.string_content}\">#{node.string_content}</a></sup>")
245
+ out(node.to_html)
246
+ end
247
+
248
+ def footnote_definition(_)
249
+ unless @footnote_ix
250
+ out("<section class=\"footnotes\" data-footnotes>\n<ol>\n")
251
+ @footnote_ix = 0
252
+ end
253
+
254
+ @footnote_ix += 1
255
+ out("<li id=\"fn#{@footnote_ix}\">\n", :children)
256
+ out("\n") if out_footnote_backref
257
+ out("</li>\n")
258
+ # </ol>
259
+ # </section>
260
+ end
261
+
262
+ private
263
+
264
+ def out_footnote_backref
265
+ return false if @written_footnote_ix == @footnote_ix
266
+
267
+ @written_footnote_ix = @footnote_ix
268
+
269
+ out("<a href=\"#fnref#{@footnote_ix}\" class=\"footnote-backref\">↩</a>")
270
+ true
271
+ end
272
+
273
+ def tasklist?(node)
274
+ node.type_string == 'tasklist'
275
+ end
276
+
277
+ def checked?(node)
278
+ node.tasklist_item_checked?
279
+ end
280
+ end
281
+ end
282
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Markly
4
- VERSION = '0.5.1'
4
+ VERSION = '0.7.0'
5
5
  end
data/lib/markly.rb CHANGED
@@ -2,12 +2,12 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  # The compiled library.
5
- require_relative 'markly/markly'
5
+ require 'markly/markly'
6
6
 
7
7
  require_relative 'markly/flags'
8
8
  require_relative 'markly/node'
9
- require_relative 'markly/renderer'
10
- require_relative 'markly/renderer/html_renderer'
9
+ require_relative 'markly/renderer/html'
10
+
11
11
  require_relative 'markly/version'
12
12
 
13
13
  module Markly
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Garen Torikian
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-07-31 00:00:00.000000000 Z
13
+ date: 2022-01-17 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bake
@@ -150,10 +150,11 @@ files:
150
150
  - ext/markly/xml.c
151
151
  - lib/markly.rb
152
152
  - lib/markly/flags.rb
153
+ - lib/markly/markly.bundle
153
154
  - lib/markly/node.rb
154
155
  - lib/markly/node/inspect.rb
155
- - lib/markly/renderer.rb
156
- - lib/markly/renderer/html_renderer.rb
156
+ - lib/markly/renderer/generic.rb
157
+ - lib/markly/renderer/html.rb
157
158
  - lib/markly/version.rb
158
159
  homepage: https://github.com/ioquatix/markly
159
160
  licenses:
@@ -175,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
176
  - !ruby/object:Gem::Version
176
177
  version: '0'
177
178
  requirements: []
178
- rubygems_version: 3.1.2
179
+ rubygems_version: 3.2.32
179
180
  signing_key:
180
181
  specification_version: 4
181
182
  summary: CommonMark parser and renderer. Written in C, wrapped in Ruby.