hamdown_core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'error'
3
+
4
+ module HamdownCore
5
+ class IndentTracker
6
+ class IndentMismatch < Error
7
+ attr_reader :current_level, :indent_levels
8
+
9
+ def initialize(current_level, indent_levels, lineno)
10
+ super("Unexpected indent level: #{current_level}: indent_level=#{indent_levels}", lineno)
11
+ @current_level = current_level
12
+ @indent_levels = indent_levels
13
+ end
14
+ end
15
+
16
+ class InconsistentIndent < Error
17
+ attr_reader :previous_size, :current_size
18
+
19
+ def initialize(previous_size, current_size, lineno)
20
+ super("Inconsistent indentation: #{current_size} spaces used for indentation, but the rest of the document was indented using #{previous_size} spaces.", lineno)
21
+ @previous_size = previous_size
22
+ @current_size = current_size
23
+ end
24
+ end
25
+
26
+ class HardTabNotAllowed < Error
27
+ def initialize(lineno)
28
+ super('Indentation with hard tabs are not allowed :-p', lineno)
29
+ end
30
+ end
31
+
32
+ def initialize(on_enter: nil, on_leave: nil)
33
+ @indent_levels = [0]
34
+ @on_enter = on_enter || lambda { |_level, _text| }
35
+ @on_leave = on_leave || lambda { |_level, _text| }
36
+ @comment_level = nil
37
+ end
38
+
39
+ def process(line, lineno)
40
+ if line.include?("\t")
41
+ raise HardTabNotAllowed.new(lineno)
42
+ end
43
+ indent, text = split(line)
44
+ indent_level = indent.size
45
+
46
+ unless text.empty?
47
+ track(indent_level, text, lineno)
48
+ end
49
+ [text, indent]
50
+ end
51
+
52
+ def split(line)
53
+ m = line.match(/\A( *)(.*)\z/)
54
+ [m[1], m[2]]
55
+ end
56
+
57
+ def finish
58
+ indent_leave(0, '', -1)
59
+ end
60
+
61
+ def current_level
62
+ @indent_levels.last
63
+ end
64
+
65
+ def enter_comment!
66
+ @comment_level = @indent_levels[-2]
67
+ end
68
+
69
+ def check_indent_level!(lineno)
70
+ if @indent_levels.size >= 3
71
+ previous_size = @indent_levels[-2] - @indent_levels[-3]
72
+ current_size = @indent_levels[-1] - @indent_levels[-2]
73
+ if previous_size != current_size
74
+ raise InconsistentIndent.new(previous_size, current_size, lineno)
75
+ end
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def track(indent_level, text, lineno)
82
+ if indent_level > @indent_levels.last
83
+ indent_enter(indent_level, text, lineno)
84
+ elsif indent_level < @indent_levels.last
85
+ indent_leave(indent_level, text, lineno)
86
+ end
87
+ end
88
+
89
+ def indent_enter(indent_level, text, _lineno)
90
+ unless @comment_level
91
+ @indent_levels.push(indent_level)
92
+ @on_enter.call(indent_level, text)
93
+ end
94
+ end
95
+
96
+ def indent_leave(indent_level, text, lineno)
97
+ if @comment_level
98
+ if indent_level <= @comment_level
99
+ # finish comment mode
100
+ @comment_level = nil
101
+ else
102
+ # still in comment
103
+ return
104
+ end
105
+ end
106
+
107
+ while indent_level < @indent_levels.last
108
+ @indent_levels.pop
109
+ @on_leave.call(indent_level, text)
110
+ end
111
+
112
+ if indent_level != @indent_levels.last
113
+ raise IndentMismatch.new(indent_level, @indent_levels.dup, lineno)
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ class LineParser
4
+ attr_reader :filename, :lineno
5
+
6
+ def initialize(filename, template_str)
7
+ @filename = filename
8
+ @lines = template_str.each_line.map { |line| line.chomp.rstrip }
9
+ @lineno = 0
10
+ end
11
+
12
+ def next_line(in_filter: false)
13
+ line = move_next
14
+ if !in_filter && is_multiline?(line)
15
+ next_multiline(line)
16
+ else
17
+ line
18
+ end
19
+ end
20
+
21
+ def has_next?
22
+ @lineno < @lines.size
23
+ end
24
+
25
+ private
26
+
27
+ MULTILINE_SUFFIX = ' |'
28
+
29
+ # Regex to check for blocks with spaces around arguments. Not to be confused
30
+ # with multiline script.
31
+ # For example:
32
+ # foo.each do | bar |
33
+ # = bar
34
+ #
35
+ BLOCK_WITH_SPACES = /do\s*\|\s*[^\|]*\s+\|\z/o
36
+
37
+ def is_multiline?(line)
38
+ line = line.lstrip
39
+ line.end_with?(MULTILINE_SUFFIX) && line !~ BLOCK_WITH_SPACES
40
+ end
41
+
42
+ def move_next
43
+ @lines[@lineno].tap do
44
+ @lineno += 1
45
+ end
46
+ end
47
+
48
+ def move_back
49
+ @lineno -= 1
50
+ end
51
+
52
+ def next_multiline(line)
53
+ buf = [line[0, line.size - 1]]
54
+ while @lineno < @lines.size
55
+ line = move_next
56
+
57
+ if is_multiline?(line)
58
+ line = line[0, line.size - 1]
59
+ buf << line.lstrip
60
+ else
61
+ move_back
62
+ break
63
+ end
64
+ end
65
+ buf.join("\n")
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'ast'
3
+ require_relative 'element_parser'
4
+ require_relative 'error'
5
+ require_relative 'filter_parser'
6
+ require_relative 'indent_tracker'
7
+ require_relative 'line_parser'
8
+ require_relative 'ruby_multiline'
9
+ require_relative 'script_parser'
10
+ require_relative 'utils'
11
+ require 'pry'
12
+
13
+ module HamdownCore
14
+ class Parser
15
+ def initialize(options = {})
16
+ @filename = options[:filename]
17
+ end
18
+
19
+ def call(template_str)
20
+ @ast = Ast::Root.new
21
+ @stack = []
22
+ @line_parser = LineParser.new(@filename, template_str)
23
+ @indent_tracker = IndentTracker.new(on_enter: method(:indent_enter), on_leave: method(:indent_leave))
24
+ @filter_parser = FilterParser.new(@indent_tracker)
25
+
26
+ while @line_parser.has_next?
27
+ in_filter = !@ast.is_a?(Ast::HamlComment) && @filter_parser.enabled?
28
+ line = @line_parser.next_line(in_filter: in_filter)
29
+ if in_filter
30
+ ast = @filter_parser.append(line)
31
+ if ast
32
+ @ast << ast
33
+ end
34
+ end
35
+ unless @filter_parser.enabled?
36
+ line_count = line.count("\n")
37
+ line.delete!("\n")
38
+ parse_line(line)
39
+ line_count.times do
40
+ @ast << create_node(Ast::Empty)
41
+ end
42
+ end
43
+ end
44
+
45
+ ast = @filter_parser.finish
46
+ if ast
47
+ @ast << ast
48
+ end
49
+ @indent_tracker.finish
50
+ @ast
51
+ rescue Error => e
52
+ if @filename && e.lineno
53
+ e.backtrace.unshift "#{@filename}:#{e.lineno}"
54
+ end
55
+ raise e
56
+ end
57
+
58
+ private
59
+
60
+ DOCTYPE_PREFIX = '!'
61
+ ELEMENT_PREFIX = '%'
62
+ COMMENT_PREFIX = '/'
63
+ SILENT_SCRIPT_PREFIX = '-'
64
+ DIV_ID_PREFIX = '#'
65
+ DIV_CLASS_PREFIX = '.'
66
+ FILTER_PREFIX = ':'
67
+ ESCAPE_PREFIX = '\\'
68
+
69
+ MARKDOWN = {
70
+ 'headers' => /^\#{1,6} .+$/,
71
+ 'list_item' => /((^[\d{1,3}|\*|\+]\.? .*\n?)+)/, # not nesting
72
+ 'quotes' => /((^>{1,4} .+\n?)+)/,
73
+ 'image' => /^!\[[^\[\]]*?\]\([^\s]*?\)/,
74
+ 'image_title' => /^!\[[^\[\]]*?\]\([^\s]*\s\".*\"\)/,
75
+ 'link' => /^[^!]\[[^\[\]]*?\]\([^\s]*?\)/,
76
+ 'link_with_title' => /^[^!]\[[^\[\]]*?\]\([^\s]*\s\".*\"\)/
77
+ # codeblock # nesting within plain text is illegal
78
+ }
79
+ # bold, italic, b_italic, monospace
80
+ # paragraphs !
81
+
82
+ def parse_line(line)
83
+ text, indent = @indent_tracker.process(line, @line_parser.lineno)
84
+
85
+ if text.empty?
86
+ @ast << create_node(Ast::Empty)
87
+ return
88
+ end
89
+
90
+ if @ast.is_a?(Ast::HamlComment)
91
+ @ast << create_node(Ast::Text) { |t| t.text = text }
92
+ return
93
+ end
94
+
95
+ case text
96
+ when MARKDOWN['headers']
97
+ parse_md_header(text)
98
+ when MARKDOWN['list_item']
99
+ parse_md_list(text)
100
+ when MARKDOWN['quotes']
101
+ parse_md_quote(text)
102
+ when MARKDOWN['image']
103
+ parse_md_image(text)
104
+ when MARKDOWN['image_title']
105
+ parse_md_image(text, true)
106
+ when MARKDOWN['link']
107
+ parse_md_link(text)
108
+ when MARKDOWN['link_title']
109
+ parse_md_link(text, true)
110
+ else
111
+ std_parse_line(text, indent)
112
+ end
113
+ end
114
+
115
+ def std_parse_line(text, indent)
116
+ case text[0]
117
+ when ESCAPE_PREFIX
118
+ parse_plain(text[1..-1])
119
+ when ELEMENT_PREFIX
120
+ parse_element(text)
121
+ when DOCTYPE_PREFIX
122
+ if text.start_with?('!!!')
123
+ parse_doctype(text)
124
+ else
125
+ parse_script(text)
126
+ end
127
+ when COMMENT_PREFIX
128
+ parse_comment(text)
129
+ when SILENT_SCRIPT_PREFIX
130
+ parse_silent_script(text)
131
+ when DIV_ID_PREFIX, DIV_CLASS_PREFIX
132
+ if text.start_with?('#{')
133
+ parse_script(text)
134
+ else
135
+ parse_line("#{indent}%div#{text}")
136
+ end
137
+ when FILTER_PREFIX
138
+ parse_filter(text)
139
+ else
140
+ parse_script(text)
141
+ end
142
+ end
143
+
144
+ def parse_doctype(text)
145
+ @ast << create_node(Ast::Doctype) { |d| d.doctype = text[3..-1].strip }
146
+ end
147
+
148
+ def parse_comment(text)
149
+ text = text[1, text.size - 1].strip
150
+ comment = create_node(Ast::HtmlComment)
151
+ comment.comment = text
152
+ if text[0] == '['
153
+ comment.conditional, rest = parse_conditional_comment(text)
154
+ text.replace(rest)
155
+ end
156
+ @ast << comment
157
+ end
158
+
159
+ CONDITIONAL_COMMENT_REGEX = /[\[\]]/o
160
+
161
+ def parse_conditional_comment(text)
162
+ s = StringScanner.new(text[1..-1])
163
+ depth = Utils.balance(s, '[', ']')
164
+ if depth == 0
165
+ [s.pre_match, s.rest.lstrip]
166
+ else
167
+ syntax_error!('Unmatched brackets in conditional comment')
168
+ end
169
+ end
170
+
171
+ def parse_plain(text)
172
+ @ast << create_node(Ast::Text) { |t| t.text = text }
173
+ end
174
+
175
+ def parse_md_header(text)
176
+ @ast << create_node(Ast::MdHeader) { |t| t.text = text }
177
+ end
178
+
179
+ def parse_md_list(text)
180
+ @ast << create_node(Ast::MdList) { |t| t.text = text }
181
+ end
182
+
183
+ def parse_md_quote(text)
184
+ @ast << create_node(Ast::MdQuote) { |t| t.text = text }
185
+ end
186
+
187
+ def parse_md_image(text, title = false)
188
+ if title == true
189
+ @ast << create_node(Ast::MdImageTitle) { |t| t.text = text }
190
+ else
191
+ @ast << create_node(Ast::MdImage) { |t| t.text = text }
192
+ end
193
+ end
194
+
195
+ def parse_md_link(text, title = false)
196
+ if title == true
197
+ @ast << create_node(Ast::MdLinkTitle) { |t| t.text = text }
198
+ else
199
+ @ast << create_node(Ast::MdLink) { |t| t.text = text }
200
+ end
201
+ end
202
+
203
+ def parse_element(text)
204
+ @ast << ElementParser.new(@line_parser).parse(text)
205
+ end
206
+
207
+ def parse_script(text)
208
+ node = ScriptParser.new(@line_parser).parse(text)
209
+ if node.is_a?(Ast::Script)
210
+ node.keyword = block_keyword(node.script)
211
+ end
212
+ @ast << node
213
+ end
214
+
215
+ def parse_silent_script(text)
216
+ if text.start_with?('-#')
217
+ @ast << create_node(Ast::HamlComment)
218
+ return
219
+ end
220
+ node = create_node(Ast::SilentScript)
221
+ script = text[/\A- *(.*)\z/, 1]
222
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
223
+ node.keyword = block_keyword(node.script)
224
+ @ast << node
225
+ end
226
+
227
+ def parse_filter(text)
228
+ filter_name = text[/\A#{FILTER_PREFIX}(\w+)\z/, 1]
229
+ unless filter_name
230
+ syntax_error!("Invalid filter name: #{text}")
231
+ end
232
+ @filter_parser.start(filter_name, @line_parser.filename, @line_parser.lineno)
233
+ end
234
+
235
+ def indent_enter(_, _text)
236
+ empty_lines = []
237
+ while @ast.children.last.is_a?(Ast::Empty)
238
+ empty_lines << @ast.children.pop
239
+ end
240
+ @stack.push(@ast)
241
+ @ast = @ast.children.last
242
+ case @ast
243
+ when Ast::Text
244
+ syntax_error!('nesting within plain text is illegal')
245
+ when Ast::Doctype
246
+ syntax_error!('nesting within a header command is illegal')
247
+ when nil
248
+ syntax_error!('Indenting at the beginning of the document is illegal')
249
+ end
250
+ @ast.children = empty_lines
251
+ if @ast.is_a?(Ast::Element) && @ast.self_closing
252
+ syntax_error!('Illegal nesting: nesting within a self-closing tag is illegal')
253
+ end
254
+ if @ast.is_a?(Ast::HtmlComment) && !@ast.comment.empty?
255
+ syntax_error!('Illegal nesting: nesting within a html comment that already has content is illegal.')
256
+ end
257
+ if @ast.is_a?(Ast::HamlComment)
258
+ @indent_tracker.enter_comment!
259
+ else
260
+ @indent_tracker.check_indent_level!(@line_parser.lineno)
261
+ end
262
+ nil
263
+ end
264
+
265
+ def indent_leave(_indent_level, _text)
266
+ parent_ast = @stack.pop
267
+ @ast = parent_ast
268
+ nil
269
+ end
270
+
271
+ MID_BLOCK_KEYWORDS = %w[else elsif rescue ensure end when].freeze
272
+ START_BLOCK_KEYWORDS = %w[if begin case unless].freeze
273
+ # Try to parse assignments to block starters as best as possible
274
+ START_BLOCK_KEYWORD_REGEX = /(?:\w+(?:,\s*\w+)*\s*=\s*)?(#{Regexp.union(START_BLOCK_KEYWORDS)})/
275
+ BLOCK_KEYWORD_REGEX = /^-?\s*(?:(#{Regexp.union(MID_BLOCK_KEYWORDS)})|#{START_BLOCK_KEYWORD_REGEX.source})\b/
276
+
277
+ def block_keyword(text)
278
+ m = text.match(BLOCK_KEYWORD_REGEX)
279
+ if m
280
+ m[1] || m[2]
281
+ end
282
+ end
283
+
284
+ def syntax_error!(message)
285
+ raise Error.new(message, @line_parser.lineno)
286
+ end
287
+
288
+ def create_node(klass, &block)
289
+ klass.new.tap do |node|
290
+ node.filename = @line_parser.filename
291
+ node.lineno = @line_parser.lineno
292
+ if block
293
+ yield(node)
294
+ end
295
+ end
296
+ end
297
+ end
298
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ module RubyMultiline
4
+ def self.read(line_parser, current_text)
5
+ buf = []
6
+ while is_ruby_multiline?(current_text)
7
+ current_text = line_parser.next_line
8
+ buf << current_text
9
+ end
10
+ buf
11
+ end
12
+
13
+ # `text' is a Ruby multiline block if it:
14
+ # - ends with a comma
15
+ # - but not "?," which is a character literal
16
+ # (however, "x?," is a method call and not a literal)
17
+ # - and not "?\," which is a character literal
18
+ def self.is_ruby_multiline?(text)
19
+ text && text.length > 1 && text[-1] == ',' &&
20
+ !((text[-3, 2] =~ /\W\?/) || text[-3, 2] == '?\\')
21
+ end
22
+ private_class_method :is_ruby_multiline?
23
+ end
24
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'ast'
3
+ require_relative 'error'
4
+ require_relative 'ruby_multiline'
5
+
6
+ module HamdownCore
7
+ class ScriptParser
8
+ def initialize(line_parser)
9
+ @line_parser = line_parser
10
+ end
11
+
12
+ def parse(text)
13
+ case text[0]
14
+ when '=', '~'
15
+ parse_script(text)
16
+ when '&'
17
+ parse_sanitized(text)
18
+ when '!'
19
+ parse_unescape(text)
20
+ else
21
+ parse_text(text)
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def parse_script(text)
28
+ if text[1] == '='
29
+ create_node(Ast::Text) { |t| t.text = text[2..-1].strip }
30
+ else
31
+ node = create_node(Ast::Script)
32
+ script = text[1..-1].lstrip
33
+ if script.empty?
34
+ syntax_error!('No Ruby code to evaluate')
35
+ end
36
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
37
+ node.preserve = text[0] == '~'
38
+ node
39
+ end
40
+ end
41
+
42
+ def parse_sanitized(text)
43
+ if text.start_with?('&==')
44
+ create_node(Ast::Text) { |t| t.text = text[3..-1].lstrip }
45
+ elsif text[1] == '=' || text[1] == '~'
46
+ node = create_node(Ast::Script)
47
+ script = text[2..-1].lstrip
48
+ if script.empty?
49
+ syntax_error!('No Ruby code to evaluate')
50
+ end
51
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
52
+ node.preserve = text[1] == '~'
53
+ node
54
+ else
55
+ create_node(Ast::Text) { |t| t.text = text[1..-1].strip }
56
+ end
57
+ end
58
+
59
+ def parse_unescape(text)
60
+ if text.start_with?('!==')
61
+ create_node(Ast::Text) do |t|
62
+ t.text = text[3..-1].lstrip
63
+ t.escape_html = false
64
+ end
65
+ elsif text[1] == '=' || text[1] == '~'
66
+ node = create_node(Ast::Script)
67
+ node.escape_html = false
68
+ script = text[2..-1].lstrip
69
+ if script.empty?
70
+ syntax_error!('No Ruby code to evaluate')
71
+ end
72
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
73
+ node.preserve = text[1] == '~'
74
+ node
75
+ else
76
+ create_node(Ast::Text) do |t|
77
+ t.text = text[1..-1].lstrip
78
+ t.escape_html = false
79
+ end
80
+ end
81
+ end
82
+
83
+ def parse_text(text)
84
+ text = text.lstrip
85
+ if text.empty?
86
+ nil
87
+ else
88
+ create_node(Ast::Text) { |t| t.text = text }
89
+ end
90
+ end
91
+
92
+ def syntax_error!(message)
93
+ raise Error.new(message, @line_parser.lineno)
94
+ end
95
+
96
+ def create_node(klass, &block)
97
+ klass.new.tap do |node|
98
+ node.filename = @line_parser.filename
99
+ node.lineno = @line_parser.lineno
100
+ if block
101
+ block.call(node)
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HamdownCore
4
+ module Transformer
5
+ class << self
6
+ def call(ast)
7
+ transform(ast)
8
+ end
9
+
10
+ private
11
+
12
+ def transform(root_node)
13
+ collected_nodes = []
14
+ new_root_node = root_node.clone
15
+ new_root_node.children = []
16
+ root_node.children.each do |node|
17
+ if node.respond_to?(:markdownable?) && node.markdownable?
18
+ collected_nodes << node
19
+ next
20
+ end
21
+
22
+ if collected_nodes.size > 0
23
+ filter = create_filter(collected_nodes)
24
+ collected_nodes = []
25
+ new_root_node << filter
26
+ end
27
+
28
+ if node.respond_to?(:children) && node.children.size > 0
29
+ node = transform(node)
30
+ end
31
+
32
+ new_root_node << node
33
+ end
34
+ if collected_nodes.size > 0
35
+ filter = create_filter(collected_nodes)
36
+ collected_nodes = []
37
+ new_root_node << filter
38
+ end
39
+ new_root_node
40
+ end
41
+
42
+ def create_filter(nodes)
43
+ filter = HamdownCore::Ast::Filter.new
44
+ filter.name = 'markdown'
45
+ nodes.each do |collected_node|
46
+ filter.texts << collected_node.text
47
+ end
48
+ filter
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ module Utils
4
+ module_function
5
+
6
+ def balance(scanner, start, finish, depth = 1)
7
+ re = /(#{Regexp.escape(start)}|#{Regexp.escape(finish)})/
8
+ while depth > 0 && scanner.scan_until(re)
9
+ if scanner.matched == start
10
+ depth += 1
11
+ else
12
+ depth -= 1
13
+ end
14
+ end
15
+ depth
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ VERSION = '0.5.0'
4
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'hamdown_core/version'
3
+ require_relative 'hamdown_core/cli'
4
+ require_relative 'hamdown_core/engine'
5
+
6
+ module HamdownCore
7
+ # Your code goes here...
8
+ end