hamdown_core 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'error'
3
+
4
+ module HamdownCore
5
+ class IndentTracker
6
+ class IndentMismatch < Error
7
+ attr_reader :current_level, :indent_levels
8
+
9
+ def initialize(current_level, indent_levels, lineno)
10
+ super("Unexpected indent level: #{current_level}: indent_level=#{indent_levels}", lineno)
11
+ @current_level = current_level
12
+ @indent_levels = indent_levels
13
+ end
14
+ end
15
+
16
+ class InconsistentIndent < Error
17
+ attr_reader :previous_size, :current_size
18
+
19
+ def initialize(previous_size, current_size, lineno)
20
+ super("Inconsistent indentation: #{current_size} spaces used for indentation, but the rest of the document was indented using #{previous_size} spaces.", lineno)
21
+ @previous_size = previous_size
22
+ @current_size = current_size
23
+ end
24
+ end
25
+
26
+ class HardTabNotAllowed < Error
27
+ def initialize(lineno)
28
+ super('Indentation with hard tabs are not allowed :-p', lineno)
29
+ end
30
+ end
31
+
32
+ def initialize(on_enter: nil, on_leave: nil)
33
+ @indent_levels = [0]
34
+ @on_enter = on_enter || lambda { |_level, _text| }
35
+ @on_leave = on_leave || lambda { |_level, _text| }
36
+ @comment_level = nil
37
+ end
38
+
39
+ def process(line, lineno)
40
+ if line.include?("\t")
41
+ raise HardTabNotAllowed.new(lineno)
42
+ end
43
+ indent, text = split(line)
44
+ indent_level = indent.size
45
+
46
+ unless text.empty?
47
+ track(indent_level, text, lineno)
48
+ end
49
+ [text, indent]
50
+ end
51
+
52
+ def split(line)
53
+ m = line.match(/\A( *)(.*)\z/)
54
+ [m[1], m[2]]
55
+ end
56
+
57
+ def finish
58
+ indent_leave(0, '', -1)
59
+ end
60
+
61
+ def current_level
62
+ @indent_levels.last
63
+ end
64
+
65
+ def enter_comment!
66
+ @comment_level = @indent_levels[-2]
67
+ end
68
+
69
+ def check_indent_level!(lineno)
70
+ if @indent_levels.size >= 3
71
+ previous_size = @indent_levels[-2] - @indent_levels[-3]
72
+ current_size = @indent_levels[-1] - @indent_levels[-2]
73
+ if previous_size != current_size
74
+ raise InconsistentIndent.new(previous_size, current_size, lineno)
75
+ end
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def track(indent_level, text, lineno)
82
+ if indent_level > @indent_levels.last
83
+ indent_enter(indent_level, text, lineno)
84
+ elsif indent_level < @indent_levels.last
85
+ indent_leave(indent_level, text, lineno)
86
+ end
87
+ end
88
+
89
+ def indent_enter(indent_level, text, _lineno)
90
+ unless @comment_level
91
+ @indent_levels.push(indent_level)
92
+ @on_enter.call(indent_level, text)
93
+ end
94
+ end
95
+
96
+ def indent_leave(indent_level, text, lineno)
97
+ if @comment_level
98
+ if indent_level <= @comment_level
99
+ # finish comment mode
100
+ @comment_level = nil
101
+ else
102
+ # still in comment
103
+ return
104
+ end
105
+ end
106
+
107
+ while indent_level < @indent_levels.last
108
+ @indent_levels.pop
109
+ @on_leave.call(indent_level, text)
110
+ end
111
+
112
+ if indent_level != @indent_levels.last
113
+ raise IndentMismatch.new(indent_level, @indent_levels.dup, lineno)
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ class LineParser
4
+ attr_reader :filename, :lineno
5
+
6
+ def initialize(filename, template_str)
7
+ @filename = filename
8
+ @lines = template_str.each_line.map { |line| line.chomp.rstrip }
9
+ @lineno = 0
10
+ end
11
+
12
+ def next_line(in_filter: false)
13
+ line = move_next
14
+ if !in_filter && is_multiline?(line)
15
+ next_multiline(line)
16
+ else
17
+ line
18
+ end
19
+ end
20
+
21
+ def has_next?
22
+ @lineno < @lines.size
23
+ end
24
+
25
+ private
26
+
27
+ MULTILINE_SUFFIX = ' |'
28
+
29
+ # Regex to check for blocks with spaces around arguments. Not to be confused
30
+ # with multiline script.
31
+ # For example:
32
+ # foo.each do | bar |
33
+ # = bar
34
+ #
35
+ BLOCK_WITH_SPACES = /do\s*\|\s*[^\|]*\s+\|\z/o
36
+
37
+ def is_multiline?(line)
38
+ line = line.lstrip
39
+ line.end_with?(MULTILINE_SUFFIX) && line !~ BLOCK_WITH_SPACES
40
+ end
41
+
42
+ def move_next
43
+ @lines[@lineno].tap do
44
+ @lineno += 1
45
+ end
46
+ end
47
+
48
+ def move_back
49
+ @lineno -= 1
50
+ end
51
+
52
+ def next_multiline(line)
53
+ buf = [line[0, line.size - 1]]
54
+ while @lineno < @lines.size
55
+ line = move_next
56
+
57
+ if is_multiline?(line)
58
+ line = line[0, line.size - 1]
59
+ buf << line.lstrip
60
+ else
61
+ move_back
62
+ break
63
+ end
64
+ end
65
+ buf.join("\n")
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'ast'
3
+ require_relative 'element_parser'
4
+ require_relative 'error'
5
+ require_relative 'filter_parser'
6
+ require_relative 'indent_tracker'
7
+ require_relative 'line_parser'
8
+ require_relative 'ruby_multiline'
9
+ require_relative 'script_parser'
10
+ require_relative 'utils'
11
+ require 'pry'
12
+
13
+ module HamdownCore
14
+ class Parser
15
+ def initialize(options = {})
16
+ @filename = options[:filename]
17
+ end
18
+
19
+ def call(template_str)
20
+ @ast = Ast::Root.new
21
+ @stack = []
22
+ @line_parser = LineParser.new(@filename, template_str)
23
+ @indent_tracker = IndentTracker.new(on_enter: method(:indent_enter), on_leave: method(:indent_leave))
24
+ @filter_parser = FilterParser.new(@indent_tracker)
25
+
26
+ while @line_parser.has_next?
27
+ in_filter = !@ast.is_a?(Ast::HamlComment) && @filter_parser.enabled?
28
+ line = @line_parser.next_line(in_filter: in_filter)
29
+ if in_filter
30
+ ast = @filter_parser.append(line)
31
+ if ast
32
+ @ast << ast
33
+ end
34
+ end
35
+ unless @filter_parser.enabled?
36
+ line_count = line.count("\n")
37
+ line.delete!("\n")
38
+ parse_line(line)
39
+ line_count.times do
40
+ @ast << create_node(Ast::Empty)
41
+ end
42
+ end
43
+ end
44
+
45
+ ast = @filter_parser.finish
46
+ if ast
47
+ @ast << ast
48
+ end
49
+ @indent_tracker.finish
50
+ @ast
51
+ rescue Error => e
52
+ if @filename && e.lineno
53
+ e.backtrace.unshift "#{@filename}:#{e.lineno}"
54
+ end
55
+ raise e
56
+ end
57
+
58
+ private
59
+
60
+ DOCTYPE_PREFIX = '!'
61
+ ELEMENT_PREFIX = '%'
62
+ COMMENT_PREFIX = '/'
63
+ SILENT_SCRIPT_PREFIX = '-'
64
+ DIV_ID_PREFIX = '#'
65
+ DIV_CLASS_PREFIX = '.'
66
+ FILTER_PREFIX = ':'
67
+ ESCAPE_PREFIX = '\\'
68
+
69
+ MARKDOWN = {
70
+ 'headers' => /^\#{1,6} .+$/,
71
+ 'list_item' => /((^[\d{1,3}|\*|\+]\.? .*\n?)+)/, # not nesting
72
+ 'quotes' => /((^>{1,4} .+\n?)+)/,
73
+ 'image' => /^!\[[^\[\]]*?\]\([^\s]*?\)/,
74
+ 'image_title' => /^!\[[^\[\]]*?\]\([^\s]*\s\".*\"\)/,
75
+ 'link' => /^[^!]\[[^\[\]]*?\]\([^\s]*?\)/,
76
+ 'link_with_title' => /^[^!]\[[^\[\]]*?\]\([^\s]*\s\".*\"\)/
77
+ # codeblock # nesting within plain text is illegal
78
+ }
79
+ # bold, italic, b_italic, monospace
80
+ # paragraphs !
81
+
82
+ def parse_line(line)
83
+ text, indent = @indent_tracker.process(line, @line_parser.lineno)
84
+
85
+ if text.empty?
86
+ @ast << create_node(Ast::Empty)
87
+ return
88
+ end
89
+
90
+ if @ast.is_a?(Ast::HamlComment)
91
+ @ast << create_node(Ast::Text) { |t| t.text = text }
92
+ return
93
+ end
94
+
95
+ case text
96
+ when MARKDOWN['headers']
97
+ parse_md_header(text)
98
+ when MARKDOWN['list_item']
99
+ parse_md_list(text)
100
+ when MARKDOWN['quotes']
101
+ parse_md_quote(text)
102
+ when MARKDOWN['image']
103
+ parse_md_image(text)
104
+ when MARKDOWN['image_title']
105
+ parse_md_image(text, true)
106
+ when MARKDOWN['link']
107
+ parse_md_link(text)
108
+ when MARKDOWN['link_title']
109
+ parse_md_link(text, true)
110
+ else
111
+ std_parse_line(text, indent)
112
+ end
113
+ end
114
+
115
+ def std_parse_line(text, indent)
116
+ case text[0]
117
+ when ESCAPE_PREFIX
118
+ parse_plain(text[1..-1])
119
+ when ELEMENT_PREFIX
120
+ parse_element(text)
121
+ when DOCTYPE_PREFIX
122
+ if text.start_with?('!!!')
123
+ parse_doctype(text)
124
+ else
125
+ parse_script(text)
126
+ end
127
+ when COMMENT_PREFIX
128
+ parse_comment(text)
129
+ when SILENT_SCRIPT_PREFIX
130
+ parse_silent_script(text)
131
+ when DIV_ID_PREFIX, DIV_CLASS_PREFIX
132
+ if text.start_with?('#{')
133
+ parse_script(text)
134
+ else
135
+ parse_line("#{indent}%div#{text}")
136
+ end
137
+ when FILTER_PREFIX
138
+ parse_filter(text)
139
+ else
140
+ parse_script(text)
141
+ end
142
+ end
143
+
144
+ def parse_doctype(text)
145
+ @ast << create_node(Ast::Doctype) { |d| d.doctype = text[3..-1].strip }
146
+ end
147
+
148
+ def parse_comment(text)
149
+ text = text[1, text.size - 1].strip
150
+ comment = create_node(Ast::HtmlComment)
151
+ comment.comment = text
152
+ if text[0] == '['
153
+ comment.conditional, rest = parse_conditional_comment(text)
154
+ text.replace(rest)
155
+ end
156
+ @ast << comment
157
+ end
158
+
159
+ CONDITIONAL_COMMENT_REGEX = /[\[\]]/o
160
+
161
+ def parse_conditional_comment(text)
162
+ s = StringScanner.new(text[1..-1])
163
+ depth = Utils.balance(s, '[', ']')
164
+ if depth == 0
165
+ [s.pre_match, s.rest.lstrip]
166
+ else
167
+ syntax_error!('Unmatched brackets in conditional comment')
168
+ end
169
+ end
170
+
171
+ def parse_plain(text)
172
+ @ast << create_node(Ast::Text) { |t| t.text = text }
173
+ end
174
+
175
+ def parse_md_header(text)
176
+ @ast << create_node(Ast::MdHeader) { |t| t.text = text }
177
+ end
178
+
179
+ def parse_md_list(text)
180
+ @ast << create_node(Ast::MdList) { |t| t.text = text }
181
+ end
182
+
183
+ def parse_md_quote(text)
184
+ @ast << create_node(Ast::MdQuote) { |t| t.text = text }
185
+ end
186
+
187
+ def parse_md_image(text, title = false)
188
+ if title == true
189
+ @ast << create_node(Ast::MdImageTitle) { |t| t.text = text }
190
+ else
191
+ @ast << create_node(Ast::MdImage) { |t| t.text = text }
192
+ end
193
+ end
194
+
195
+ def parse_md_link(text, title = false)
196
+ if title == true
197
+ @ast << create_node(Ast::MdLinkTitle) { |t| t.text = text }
198
+ else
199
+ @ast << create_node(Ast::MdLink) { |t| t.text = text }
200
+ end
201
+ end
202
+
203
+ def parse_element(text)
204
+ @ast << ElementParser.new(@line_parser).parse(text)
205
+ end
206
+
207
+ def parse_script(text)
208
+ node = ScriptParser.new(@line_parser).parse(text)
209
+ if node.is_a?(Ast::Script)
210
+ node.keyword = block_keyword(node.script)
211
+ end
212
+ @ast << node
213
+ end
214
+
215
+ def parse_silent_script(text)
216
+ if text.start_with?('-#')
217
+ @ast << create_node(Ast::HamlComment)
218
+ return
219
+ end
220
+ node = create_node(Ast::SilentScript)
221
+ script = text[/\A- *(.*)\z/, 1]
222
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
223
+ node.keyword = block_keyword(node.script)
224
+ @ast << node
225
+ end
226
+
227
+ def parse_filter(text)
228
+ filter_name = text[/\A#{FILTER_PREFIX}(\w+)\z/, 1]
229
+ unless filter_name
230
+ syntax_error!("Invalid filter name: #{text}")
231
+ end
232
+ @filter_parser.start(filter_name, @line_parser.filename, @line_parser.lineno)
233
+ end
234
+
235
+ def indent_enter(_, _text)
236
+ empty_lines = []
237
+ while @ast.children.last.is_a?(Ast::Empty)
238
+ empty_lines << @ast.children.pop
239
+ end
240
+ @stack.push(@ast)
241
+ @ast = @ast.children.last
242
+ case @ast
243
+ when Ast::Text
244
+ syntax_error!('nesting within plain text is illegal')
245
+ when Ast::Doctype
246
+ syntax_error!('nesting within a header command is illegal')
247
+ when nil
248
+ syntax_error!('Indenting at the beginning of the document is illegal')
249
+ end
250
+ @ast.children = empty_lines
251
+ if @ast.is_a?(Ast::Element) && @ast.self_closing
252
+ syntax_error!('Illegal nesting: nesting within a self-closing tag is illegal')
253
+ end
254
+ if @ast.is_a?(Ast::HtmlComment) && !@ast.comment.empty?
255
+ syntax_error!('Illegal nesting: nesting within a html comment that already has content is illegal.')
256
+ end
257
+ if @ast.is_a?(Ast::HamlComment)
258
+ @indent_tracker.enter_comment!
259
+ else
260
+ @indent_tracker.check_indent_level!(@line_parser.lineno)
261
+ end
262
+ nil
263
+ end
264
+
265
+ def indent_leave(_indent_level, _text)
266
+ parent_ast = @stack.pop
267
+ @ast = parent_ast
268
+ nil
269
+ end
270
+
271
+ MID_BLOCK_KEYWORDS = %w[else elsif rescue ensure end when].freeze
272
+ START_BLOCK_KEYWORDS = %w[if begin case unless].freeze
273
+ # Try to parse assignments to block starters as best as possible
274
+ START_BLOCK_KEYWORD_REGEX = /(?:\w+(?:,\s*\w+)*\s*=\s*)?(#{Regexp.union(START_BLOCK_KEYWORDS)})/
275
+ BLOCK_KEYWORD_REGEX = /^-?\s*(?:(#{Regexp.union(MID_BLOCK_KEYWORDS)})|#{START_BLOCK_KEYWORD_REGEX.source})\b/
276
+
277
+ def block_keyword(text)
278
+ m = text.match(BLOCK_KEYWORD_REGEX)
279
+ if m
280
+ m[1] || m[2]
281
+ end
282
+ end
283
+
284
+ def syntax_error!(message)
285
+ raise Error.new(message, @line_parser.lineno)
286
+ end
287
+
288
+ def create_node(klass, &block)
289
+ klass.new.tap do |node|
290
+ node.filename = @line_parser.filename
291
+ node.lineno = @line_parser.lineno
292
+ if block
293
+ yield(node)
294
+ end
295
+ end
296
+ end
297
+ end
298
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ module RubyMultiline
4
+ def self.read(line_parser, current_text)
5
+ buf = []
6
+ while is_ruby_multiline?(current_text)
7
+ current_text = line_parser.next_line
8
+ buf << current_text
9
+ end
10
+ buf
11
+ end
12
+
13
+ # `text' is a Ruby multiline block if it:
14
+ # - ends with a comma
15
+ # - but not "?," which is a character literal
16
+ # (however, "x?," is a method call and not a literal)
17
+ # - and not "?\," which is a character literal
18
+ def self.is_ruby_multiline?(text)
19
+ text && text.length > 1 && text[-1] == ',' &&
20
+ !((text[-3, 2] =~ /\W\?/) || text[-3, 2] == '?\\')
21
+ end
22
+ private_class_method :is_ruby_multiline?
23
+ end
24
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'ast'
3
+ require_relative 'error'
4
+ require_relative 'ruby_multiline'
5
+
6
+ module HamdownCore
7
+ class ScriptParser
8
+ def initialize(line_parser)
9
+ @line_parser = line_parser
10
+ end
11
+
12
+ def parse(text)
13
+ case text[0]
14
+ when '=', '~'
15
+ parse_script(text)
16
+ when '&'
17
+ parse_sanitized(text)
18
+ when '!'
19
+ parse_unescape(text)
20
+ else
21
+ parse_text(text)
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def parse_script(text)
28
+ if text[1] == '='
29
+ create_node(Ast::Text) { |t| t.text = text[2..-1].strip }
30
+ else
31
+ node = create_node(Ast::Script)
32
+ script = text[1..-1].lstrip
33
+ if script.empty?
34
+ syntax_error!('No Ruby code to evaluate')
35
+ end
36
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
37
+ node.preserve = text[0] == '~'
38
+ node
39
+ end
40
+ end
41
+
42
+ def parse_sanitized(text)
43
+ if text.start_with?('&==')
44
+ create_node(Ast::Text) { |t| t.text = text[3..-1].lstrip }
45
+ elsif text[1] == '=' || text[1] == '~'
46
+ node = create_node(Ast::Script)
47
+ script = text[2..-1].lstrip
48
+ if script.empty?
49
+ syntax_error!('No Ruby code to evaluate')
50
+ end
51
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
52
+ node.preserve = text[1] == '~'
53
+ node
54
+ else
55
+ create_node(Ast::Text) { |t| t.text = text[1..-1].strip }
56
+ end
57
+ end
58
+
59
+ def parse_unescape(text)
60
+ if text.start_with?('!==')
61
+ create_node(Ast::Text) do |t|
62
+ t.text = text[3..-1].lstrip
63
+ t.escape_html = false
64
+ end
65
+ elsif text[1] == '=' || text[1] == '~'
66
+ node = create_node(Ast::Script)
67
+ node.escape_html = false
68
+ script = text[2..-1].lstrip
69
+ if script.empty?
70
+ syntax_error!('No Ruby code to evaluate')
71
+ end
72
+ node.script = [script, *RubyMultiline.read(@line_parser, script)].join("\n")
73
+ node.preserve = text[1] == '~'
74
+ node
75
+ else
76
+ create_node(Ast::Text) do |t|
77
+ t.text = text[1..-1].lstrip
78
+ t.escape_html = false
79
+ end
80
+ end
81
+ end
82
+
83
+ def parse_text(text)
84
+ text = text.lstrip
85
+ if text.empty?
86
+ nil
87
+ else
88
+ create_node(Ast::Text) { |t| t.text = text }
89
+ end
90
+ end
91
+
92
+ def syntax_error!(message)
93
+ raise Error.new(message, @line_parser.lineno)
94
+ end
95
+
96
+ def create_node(klass, &block)
97
+ klass.new.tap do |node|
98
+ node.filename = @line_parser.filename
99
+ node.lineno = @line_parser.lineno
100
+ if block
101
+ block.call(node)
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HamdownCore
4
+ module Transformer
5
+ class << self
6
+ def call(ast)
7
+ transform(ast)
8
+ end
9
+
10
+ private
11
+
12
+ def transform(root_node)
13
+ collected_nodes = []
14
+ new_root_node = root_node.clone
15
+ new_root_node.children = []
16
+ root_node.children.each do |node|
17
+ if node.respond_to?(:markdownable?) && node.markdownable?
18
+ collected_nodes << node
19
+ next
20
+ end
21
+
22
+ if collected_nodes.size > 0
23
+ filter = create_filter(collected_nodes)
24
+ collected_nodes = []
25
+ new_root_node << filter
26
+ end
27
+
28
+ if node.respond_to?(:children) && node.children.size > 0
29
+ node = transform(node)
30
+ end
31
+
32
+ new_root_node << node
33
+ end
34
+ if collected_nodes.size > 0
35
+ filter = create_filter(collected_nodes)
36
+ collected_nodes = []
37
+ new_root_node << filter
38
+ end
39
+ new_root_node
40
+ end
41
+
42
+ def create_filter(nodes)
43
+ filter = HamdownCore::Ast::Filter.new
44
+ filter.name = 'markdown'
45
+ nodes.each do |collected_node|
46
+ filter.texts << collected_node.text
47
+ end
48
+ filter
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ module Utils
4
+ module_function
5
+
6
+ def balance(scanner, start, finish, depth = 1)
7
+ re = /(#{Regexp.escape(start)}|#{Regexp.escape(finish)})/
8
+ while depth > 0 && scanner.scan_until(re)
9
+ if scanner.matched == start
10
+ depth += 1
11
+ else
12
+ depth -= 1
13
+ end
14
+ end
15
+ depth
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+ module HamdownCore
3
+ VERSION = '0.5.0'
4
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+ require_relative 'hamdown_core/version'
3
+ require_relative 'hamdown_core/cli'
4
+ require_relative 'hamdown_core/engine'
5
+
6
+ module HamdownCore
7
+ # Your code goes here...
8
+ end