mdlint 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,258 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mdlint
4
+ module Parser
5
+ class InlineParser
6
+ ESCAPE_CHARS = '!"#$%&\'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~'
7
+ ESCAPE_REGEXP = /\\([#{Regexp.escape(ESCAPE_CHARS)}])/
8
+ BACKTICK_REGEXP = /(`+)(.+?)\1(?!`)/
9
+ AUTOLINK_REGEXP = %r{<((?:https?|ftp)://[^>]+)>}
10
+ EMAIL_AUTOLINK_REGEXP = /<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/
11
+ HTML_INLINE_REGEXP = %r{</?[a-zA-Z][a-zA-Z0-9]*(?:\s+[^>]*)?>}
12
+
13
+ def initialize(options = {})
14
+ @options = options
15
+ end
16
+
17
+ def parse(content)
18
+ tokens = []
19
+ parse_inline(content, tokens)
20
+ tokens
21
+ end
22
+
23
+ def parse_inline(content, tokens)
24
+ pos = 0
25
+ text_buffer = ""
26
+
27
+ while pos < content.length
28
+ remaining = content[pos..]
29
+
30
+ if (match = remaining.match(/\A\\([\\`*_\[\]()#+\-.!{}<>])/))
31
+ flush_text(text_buffer, tokens)
32
+ text_buffer = ""
33
+ tokens << Token.new(type: :text, content: match[1])
34
+ pos += match[0].length
35
+ elsif remaining.start_with?("![")
36
+ if (match = remaining.match(/\A!\[([^\]]*)\]\(([^)\s]*)(?:\s+"([^"]*)")?\)/))
37
+ flush_text(text_buffer, tokens)
38
+ text_buffer = ""
39
+ tokens << Token.new(
40
+ type: :image,
41
+ tag: "img",
42
+ attrs: {
43
+ src: match[2],
44
+ alt: match[1],
45
+ title: match[3]
46
+ }.compact,
47
+ content: match[1]
48
+ )
49
+ pos += match[0].length
50
+ else
51
+ text_buffer += remaining[0]
52
+ pos += 1
53
+ end
54
+ elsif remaining.start_with?("[")
55
+ # Inline link: [text](url "title")
56
+ if (match = remaining.match(/\A\[([^\]]*)\]\(([^)\s]*)(?:\s+"([^"]*)")?\)/))
57
+ flush_text(text_buffer, tokens)
58
+ text_buffer = ""
59
+ tokens << Token.new(
60
+ type: :link_open,
61
+ tag: "a",
62
+ nesting: 1,
63
+ attrs: {
64
+ href: match[2],
65
+ title: match[3]
66
+ }.compact
67
+ )
68
+ parse_inline(match[1], tokens)
69
+ tokens << Token.new(
70
+ type: :link_close,
71
+ tag: "a",
72
+ nesting: -1
73
+ )
74
+ pos += match[0].length
75
+ # Full reference link: [text][label]
76
+ elsif (match = remaining.match(/\A\[([^\]]*)\]\[([^\]]*)\]/))
77
+ flush_text(text_buffer, tokens)
78
+ text_buffer = ""
79
+ label = match[2].empty? ? match[1] : match[2]
80
+ tokens << Token.new(
81
+ type: :link_open,
82
+ tag: "a",
83
+ nesting: 1,
84
+ attrs: { reference_label: label.downcase },
85
+ markup: "reference"
86
+ )
87
+ parse_inline(match[1], tokens)
88
+ tokens << Token.new(
89
+ type: :link_close,
90
+ tag: "a",
91
+ nesting: -1,
92
+ markup: "reference"
93
+ )
94
+ pos += match[0].length
95
+ # Shortcut reference link: [label]
96
+ elsif (match = remaining.match(/\A\[([^\]]+)\](?!\(|\[)/))
97
+ flush_text(text_buffer, tokens)
98
+ text_buffer = ""
99
+ label = match[1]
100
+ tokens << Token.new(
101
+ type: :link_open,
102
+ tag: "a",
103
+ nesting: 1,
104
+ attrs: { reference_label: label.downcase },
105
+ markup: "reference"
106
+ )
107
+ tokens << Token.new(type: :text, content: label)
108
+ tokens << Token.new(
109
+ type: :link_close,
110
+ tag: "a",
111
+ nesting: -1,
112
+ markup: "reference"
113
+ )
114
+ pos += match[0].length
115
+ else
116
+ text_buffer += remaining[0]
117
+ pos += 1
118
+ end
119
+ elsif remaining.start_with?("`")
120
+ if (match = remaining.match(/\A(`+)(.+?)\1(?!`)/m))
121
+ flush_text(text_buffer, tokens)
122
+ text_buffer = ""
123
+ code_content = match[2]
124
+ code_content = code_content.strip if code_content.start_with?(" ") && code_content.end_with?(" ")
125
+ tokens << Token.new(
126
+ type: :code_inline,
127
+ tag: "code",
128
+ content: code_content,
129
+ markup: match[1]
130
+ )
131
+ pos += match[0].length
132
+ else
133
+ text_buffer += remaining[0]
134
+ pos += 1
135
+ end
136
+ elsif remaining.start_with?("**")
137
+ if (match = remaining.match(/\A\*\*(?!\s)(.+?)(?<!\s)\*\*/m))
138
+ flush_text(text_buffer, tokens)
139
+ text_buffer = ""
140
+ tokens << Token.new(type: :strong_open, tag: "strong", nesting: 1, markup: "**")
141
+ parse_inline(match[1], tokens)
142
+ tokens << Token.new(type: :strong_close, tag: "strong", nesting: -1, markup: "**")
143
+ pos += match[0].length
144
+ else
145
+ text_buffer += remaining[0]
146
+ pos += 1
147
+ end
148
+ elsif remaining.start_with?("__")
149
+ if (match = remaining.match(/\A__(?!\s)(.+?)(?<!\s)__/m))
150
+ flush_text(text_buffer, tokens)
151
+ text_buffer = ""
152
+ tokens << Token.new(type: :strong_open, tag: "strong", nesting: 1, markup: "__")
153
+ parse_inline(match[1], tokens)
154
+ tokens << Token.new(type: :strong_close, tag: "strong", nesting: -1, markup: "__")
155
+ pos += match[0].length
156
+ else
157
+ text_buffer += remaining[0]
158
+ pos += 1
159
+ end
160
+ elsif remaining.start_with?("*")
161
+ if (match = remaining.match(/\A\*(?!\s)(.+?)(?<!\s)\*/m))
162
+ flush_text(text_buffer, tokens)
163
+ text_buffer = ""
164
+ tokens << Token.new(type: :em_open, tag: "em", nesting: 1, markup: "*")
165
+ parse_inline(match[1], tokens)
166
+ tokens << Token.new(type: :em_close, tag: "em", nesting: -1, markup: "*")
167
+ pos += match[0].length
168
+ else
169
+ text_buffer += remaining[0]
170
+ pos += 1
171
+ end
172
+ elsif remaining.start_with?("_")
173
+ if (match = remaining.match(/\A_(?!\s)(.+?)(?<!\s)_/m))
174
+ flush_text(text_buffer, tokens)
175
+ text_buffer = ""
176
+ tokens << Token.new(type: :em_open, tag: "em", nesting: 1, markup: "_")
177
+ parse_inline(match[1], tokens)
178
+ tokens << Token.new(type: :em_close, tag: "em", nesting: -1, markup: "_")
179
+ pos += match[0].length
180
+ else
181
+ text_buffer += remaining[0]
182
+ pos += 1
183
+ end
184
+ elsif remaining.start_with?("<")
185
+ if (match = remaining.match(/\A#{AUTOLINK_REGEXP}/))
186
+ flush_text(text_buffer, tokens)
187
+ text_buffer = ""
188
+ tokens << Token.new(
189
+ type: :link_open,
190
+ tag: "a",
191
+ nesting: 1,
192
+ attrs: { href: match[1] },
193
+ markup: "autolink"
194
+ )
195
+ tokens << Token.new(type: :text, content: match[1])
196
+ tokens << Token.new(
197
+ type: :link_close,
198
+ tag: "a",
199
+ nesting: -1,
200
+ markup: "autolink"
201
+ )
202
+ pos += match[0].length
203
+ elsif (match = remaining.match(/\A#{EMAIL_AUTOLINK_REGEXP}/))
204
+ flush_text(text_buffer, tokens)
205
+ text_buffer = ""
206
+ tokens << Token.new(
207
+ type: :link_open,
208
+ tag: "a",
209
+ nesting: 1,
210
+ attrs: { href: "mailto:#{match[1]}" },
211
+ markup: "autolink"
212
+ )
213
+ tokens << Token.new(type: :text, content: match[1])
214
+ tokens << Token.new(
215
+ type: :link_close,
216
+ tag: "a",
217
+ nesting: -1,
218
+ markup: "autolink"
219
+ )
220
+ pos += match[0].length
221
+ elsif (match = remaining.match(/\A#{HTML_INLINE_REGEXP}/))
222
+ flush_text(text_buffer, tokens)
223
+ text_buffer = ""
224
+ tokens << Token.new(type: :html_inline, content: match[0])
225
+ pos += match[0].length
226
+ else
227
+ text_buffer += remaining[0]
228
+ pos += 1
229
+ end
230
+ elsif remaining.start_with?(" \n")
231
+ flush_text(text_buffer, tokens)
232
+ text_buffer = ""
233
+ tokens << Token.new(type: :hardbreak, tag: "br")
234
+ pos += 3
235
+ elsif remaining[0] == "\n"
236
+ flush_text(text_buffer, tokens)
237
+ text_buffer = ""
238
+ tokens << Token.new(type: :softbreak)
239
+ pos += 1
240
+ else
241
+ text_buffer += remaining[0]
242
+ pos += 1
243
+ end
244
+ end
245
+
246
+ flush_text(text_buffer, tokens)
247
+ end
248
+
249
+ private
250
+
251
+ def flush_text(buffer, tokens)
252
+ return if buffer.empty?
253
+
254
+ tokens << Token.new(type: :text, content: buffer)
255
+ end
256
+ end
257
+ end
258
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mdlint
4
+ module Parser
5
+ class State
6
+ attr_reader :src, :lines, :line_offsets
7
+ attr_accessor :line, :pos, :tokens, :level
8
+
9
+ def initialize(src)
10
+ @src = src
11
+ @lines = src.split("\n", -1)
12
+ @line_offsets = build_line_offsets
13
+ @line = 0
14
+ @pos = 0
15
+ @tokens = []
16
+ @level = 0
17
+ end
18
+
19
+ def eof?
20
+ @line >= @lines.length
21
+ end
22
+
23
+ def current_line
24
+ @lines[@line]
25
+ end
26
+
27
+ def next_line
28
+ @line += 1
29
+ end
30
+
31
+ def skip_blank_lines
32
+ while !eof? && blank_line?(@line)
33
+ @line += 1
34
+ end
35
+ end
36
+
37
+ def blank_line?(line_num = @line)
38
+ return true if line_num >= @lines.length
39
+
40
+ @lines[line_num].match?(/\A\s*\z/)
41
+ end
42
+
43
+ def remaining_lines
44
+ @lines[@line..]
45
+ end
46
+
47
+ def peek_line(offset = 1)
48
+ @lines[@line + offset]
49
+ end
50
+
51
+ private
52
+
53
+ def build_line_offsets
54
+ offsets = [0]
55
+ @lines.each do |line|
56
+ offsets << offsets.last + line.length + 1
57
+ end
58
+ offsets
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "parser/state"
4
+ require_relative "parser/block_parser"
5
+ require_relative "parser/inline_parser"
6
+
7
+ module Mdlint
8
+ module Parser
9
+ class << self
10
+ def parse(src)
11
+ block_parser = BlockParser.new
12
+ tokens = block_parser.parse(src)
13
+ parse_inline_tokens(tokens)
14
+ tokens
15
+ end
16
+
17
+ private
18
+
19
+ def parse_inline_tokens(tokens)
20
+ inline_parser = InlineParser.new
21
+ tokens.each do |token|
22
+ next unless token.type == :inline
23
+
24
+ token.children = inline_parser.parse(token.content)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end