d-mark 0.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -7
- data/Gemfile.lock +16 -12
- data/Guardfile +3 -0
- data/NEWS.md +11 -3
- data/README.adoc +218 -0
- data/Rakefile +13 -2
- data/d-mark.gemspec +5 -4
- data/lib/d-mark.rb +2 -0
- data/lib/d-mark/cli.rb +28 -0
- data/lib/d-mark/parser.rb +460 -0
- data/lib/{dmark → d-mark}/translator.rb +5 -3
- data/lib/d-mark/version.rb +3 -0
- data/samples/identifiers-and-patterns.dmark +418 -1
- data/samples/trivial.dmark +1 -0
- data/samples/trivial.rb +20 -0
- data/spec/d-mark/parser_spec.rb +271 -0
- data/spec/spec_helper.rb +2 -0
- metadata +30 -18
- data/README.md +0 -70
- data/lib/dmark.rb +0 -9
- data/lib/dmark/lexer.rb +0 -235
- data/lib/dmark/nodes.rb +0 -76
- data/lib/dmark/parser.rb +0 -28
- data/lib/dmark/tokens.rb +0 -49
- data/lib/dmark/version.rb +0 -3
- data/samples/identifiers-and-patterns.html +0 -59
- data/scripts/translate-to-html.rb +0 -46
- data/tasks/doc.rake +0 -13
- data/tasks/rubocop.rake +0 -6
- data/tasks/test.rake +0 -6
data/lib/dmark.rb
DELETED
data/lib/dmark/lexer.rb
DELETED
@@ -1,235 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
class Lexer
|
3
|
-
INDENTATION = 2
|
4
|
-
|
5
|
-
def initialize(string)
|
6
|
-
@string = string
|
7
|
-
|
8
|
-
@element_stack = []
|
9
|
-
@tokens = []
|
10
|
-
@pending_blanks = 0
|
11
|
-
end
|
12
|
-
|
13
|
-
def run
|
14
|
-
@string.lines.each_with_index do |line, line_nr|
|
15
|
-
case line
|
16
|
-
when /^\s+$/
|
17
|
-
# blank line
|
18
|
-
@pending_blanks += 1
|
19
|
-
when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\.\s*$/
|
20
|
-
# empty element
|
21
|
-
indentation = Regexp.last_match[1]
|
22
|
-
element = Regexp.last_match[2]
|
23
|
-
attributes = parse_attributes(Regexp.last_match[4])
|
24
|
-
|
25
|
-
unwind_stack_until(indentation.size)
|
26
|
-
|
27
|
-
@element_stack << element
|
28
|
-
@tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
|
29
|
-
when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\. (.*)$/
|
30
|
-
# element with inline content
|
31
|
-
indentation = Regexp.last_match[1]
|
32
|
-
element = Regexp.last_match[2]
|
33
|
-
attributes = parse_attributes(Regexp.last_match[4])
|
34
|
-
data = Regexp.last_match[5]
|
35
|
-
|
36
|
-
unwind_stack_until(indentation.size)
|
37
|
-
|
38
|
-
@tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
|
39
|
-
@tokens.concat(lex_inline(data, line_nr + 1))
|
40
|
-
@tokens << DMark::Tokens::TagEndToken.new(name: element)
|
41
|
-
when /^(\s*)(.*)$/
|
42
|
-
# other line (e.g. data)
|
43
|
-
indentation = Regexp.last_match[1]
|
44
|
-
data = Regexp.last_match[2]
|
45
|
-
|
46
|
-
unwind_stack_until(indentation.size)
|
47
|
-
|
48
|
-
if @element_stack.empty?
|
49
|
-
# FIXME: unify format of messages (uppercase, lowercase, …)
|
50
|
-
raise LexerError.new("Can’t insert raw data at root level", line, line_nr, 1)
|
51
|
-
end
|
52
|
-
|
53
|
-
extra_indentation = [indentation.size - INDENTATION * @element_stack.size, 0].max
|
54
|
-
|
55
|
-
@tokens.concat(lex_inline(' ' * extra_indentation + data + "\n", line_nr + 1))
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
unwind_stack_until(0)
|
60
|
-
|
61
|
-
@tokens
|
62
|
-
end
|
63
|
-
|
64
|
-
private
|
65
|
-
|
66
|
-
def parse_attributes(data)
|
67
|
-
# FIXME: write a proper parser
|
68
|
-
|
69
|
-
(data || '').split(',').map { |part| part.split('=') }.each_with_object({}) do |pair, res|
|
70
|
-
res[pair.first] = pair.last || pair.first
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def unwind_stack_until(num)
|
75
|
-
while @element_stack.size * INDENTATION > num
|
76
|
-
elem = @element_stack.pop
|
77
|
-
|
78
|
-
@tokens << DMark::Tokens::TagEndToken.new(name: elem)
|
79
|
-
end
|
80
|
-
|
81
|
-
append_text(@tokens, "\n" * @pending_blanks)
|
82
|
-
@pending_blanks = 0
|
83
|
-
end
|
84
|
-
|
85
|
-
def append_text(out, text)
|
86
|
-
if out.empty? || !out.last.is_a?(DMark::Tokens::TextToken)
|
87
|
-
out << DMark::Tokens::TextToken.new(text: text)
|
88
|
-
else
|
89
|
-
out.last.text << text
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
class LexerError < StandardError
|
94
|
-
def initialize(message, line, line_nr, col_nr)
|
95
|
-
@message = message
|
96
|
-
@line = line
|
97
|
-
@line_nr = line_nr
|
98
|
-
@col_nr = col_nr
|
99
|
-
end
|
100
|
-
|
101
|
-
class Coloriser
|
102
|
-
def red
|
103
|
-
"\e[31m".freeze
|
104
|
-
end
|
105
|
-
|
106
|
-
def bold
|
107
|
-
"\e[1m".freeze
|
108
|
-
end
|
109
|
-
|
110
|
-
def reset
|
111
|
-
"\e[0m".freeze
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class NullColoriser
|
116
|
-
def red
|
117
|
-
''.freeze
|
118
|
-
end
|
119
|
-
|
120
|
-
def bold
|
121
|
-
''.freeze
|
122
|
-
end
|
123
|
-
|
124
|
-
def reset
|
125
|
-
''.freeze
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def message
|
130
|
-
formatted_message(NullColoriser.new)
|
131
|
-
end
|
132
|
-
|
133
|
-
def message_for_tty
|
134
|
-
formatted_message(Coloriser.new)
|
135
|
-
end
|
136
|
-
|
137
|
-
def formatted_message(coloriser)
|
138
|
-
line_excerpt_start = [@col_nr - 38, 0].max
|
139
|
-
line_excerpt_end = @col_nr + 38
|
140
|
-
line_excerpt = @line[line_excerpt_start..line_excerpt_end]
|
141
|
-
|
142
|
-
if line_excerpt_start > 0
|
143
|
-
line_excerpt[0] = '…'
|
144
|
-
end
|
145
|
-
|
146
|
-
if line_excerpt_end < @line.size
|
147
|
-
line_excerpt[-1] = '…'
|
148
|
-
end
|
149
|
-
|
150
|
-
[
|
151
|
-
"#{coloriser.red}#{coloriser.bold}ERROR#{coloriser.reset} (line #{@line_nr}, col #{@col_nr}): #{coloriser.red}#{@message}#{coloriser.reset}",
|
152
|
-
'',
|
153
|
-
line_excerpt,
|
154
|
-
coloriser.red + ' ' * (@col_nr - 1 - line_excerpt_start) + '^' + coloriser.reset,
|
155
|
-
'',
|
156
|
-
].join("\n")
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
def lex_inline(string, line_nr)
|
161
|
-
stack = []
|
162
|
-
state = :root
|
163
|
-
tokens = []
|
164
|
-
name = ''
|
165
|
-
attributes = ''
|
166
|
-
col_nr = 0
|
167
|
-
|
168
|
-
string.chars.each_with_index do |char|
|
169
|
-
col_nr += 1
|
170
|
-
|
171
|
-
case state
|
172
|
-
when :root
|
173
|
-
case char
|
174
|
-
when '%'
|
175
|
-
state = :after_pct
|
176
|
-
when '}'
|
177
|
-
if stack.empty?
|
178
|
-
message = 'Unexpected `}`. Try escaping it as `%}`.'
|
179
|
-
raise LexerError.new(message, string, line_nr, col_nr)
|
180
|
-
else
|
181
|
-
data = stack.pop
|
182
|
-
case data.first
|
183
|
-
when :raw
|
184
|
-
append_text(tokens, data.last)
|
185
|
-
when :elem
|
186
|
-
tokens << DMark::Tokens::TagEndToken.new(name: data.last)
|
187
|
-
else
|
188
|
-
raise "Unexpected entry on stack: #{data.inspect}"
|
189
|
-
end
|
190
|
-
end
|
191
|
-
else
|
192
|
-
append_text(tokens, char)
|
193
|
-
end
|
194
|
-
when :after_pct
|
195
|
-
# FIXME: require at least one character after %
|
196
|
-
|
197
|
-
case char
|
198
|
-
when 'a'..'z', '0'..'9', '-'
|
199
|
-
name << char
|
200
|
-
when '%' # escaped
|
201
|
-
state = :root
|
202
|
-
col_nr -= 1
|
203
|
-
append_text(tokens, '%')
|
204
|
-
when '}' # escaped
|
205
|
-
state = :root
|
206
|
-
col_nr -= 1
|
207
|
-
append_text(tokens, '}')
|
208
|
-
when '['
|
209
|
-
state = :after_lbracket
|
210
|
-
when '{'
|
211
|
-
state = :root
|
212
|
-
stack << [:elem, name]
|
213
|
-
tokens << DMark::Tokens::TagBeginToken.new(name: name, attributes: parse_attributes(attributes))
|
214
|
-
name = ''
|
215
|
-
attributes = ''
|
216
|
-
else
|
217
|
-
raise LexerError.new("unexpected `#{char}` after `%`", string, line_nr, col_nr)
|
218
|
-
end
|
219
|
-
when :after_lbracket
|
220
|
-
case char
|
221
|
-
when ']'
|
222
|
-
# FIXME: might make sense to have after_rbracket instead (to prevent %foo[a][b]{…})
|
223
|
-
state = :after_pct
|
224
|
-
else
|
225
|
-
attributes << char
|
226
|
-
end
|
227
|
-
else
|
228
|
-
raise "Unexpected state: #{state.inspect}"
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
tokens
|
233
|
-
end
|
234
|
-
end
|
235
|
-
end
|
data/lib/dmark/nodes.rb
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
module Nodes
|
3
|
-
class Node
|
4
|
-
attr_reader :children
|
5
|
-
|
6
|
-
def initialize
|
7
|
-
@children = []
|
8
|
-
end
|
9
|
-
|
10
|
-
def inspect(_indent = 0)
|
11
|
-
'Node()'
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
class RootNode < Node
|
16
|
-
def inspect(indent = 0)
|
17
|
-
io = ''
|
18
|
-
io << ' ' * indent
|
19
|
-
io << 'Root('
|
20
|
-
io << "\n" if children.any?
|
21
|
-
children.each { |c| io << c.inspect(indent + 1) }
|
22
|
-
io << ' ' * indent if children.any?
|
23
|
-
io << ')'
|
24
|
-
io << "\n"
|
25
|
-
io
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
class TextNode < Node
|
30
|
-
attr_reader :text
|
31
|
-
|
32
|
-
def initialize(text:)
|
33
|
-
super()
|
34
|
-
@text = text
|
35
|
-
end
|
36
|
-
|
37
|
-
def inspect(indent = 0)
|
38
|
-
io = ''
|
39
|
-
io << ' ' * indent
|
40
|
-
io << 'Text('
|
41
|
-
io << @text.inspect
|
42
|
-
io << "\n" if children.any?
|
43
|
-
children.each { |c| io << c.inspect(indent + 1) }
|
44
|
-
io << ' ' * indent if children.any?
|
45
|
-
io << ')'
|
46
|
-
io << "\n"
|
47
|
-
io
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class ElementNode < Node
|
52
|
-
attr_reader :name
|
53
|
-
attr_reader :attributes
|
54
|
-
|
55
|
-
def initialize(name:, attributes:)
|
56
|
-
super()
|
57
|
-
@name = name
|
58
|
-
@attributes = attributes
|
59
|
-
end
|
60
|
-
|
61
|
-
def inspect(indent = 0)
|
62
|
-
io = ''
|
63
|
-
io << ' ' * indent
|
64
|
-
io << 'Element('
|
65
|
-
io << @name
|
66
|
-
io << ',' << @attributes.inspect unless @attributes.empty?
|
67
|
-
io << "\n" if children.any?
|
68
|
-
children.each { |c| io << c.inspect(indent + 1) }
|
69
|
-
io << ' ' * indent if children.any?
|
70
|
-
io << ')'
|
71
|
-
io << "\n"
|
72
|
-
io
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
data/lib/dmark/parser.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
class Parser
|
3
|
-
def initialize(tokens)
|
4
|
-
@tokens = tokens
|
5
|
-
|
6
|
-
@root_node = DMark::Nodes::RootNode.new
|
7
|
-
end
|
8
|
-
|
9
|
-
def run
|
10
|
-
node_stack = [@root_node]
|
11
|
-
|
12
|
-
@tokens.each do |token|
|
13
|
-
case token
|
14
|
-
when DMark::Tokens::TextToken
|
15
|
-
node_stack.last.children << DMark::Nodes::TextNode.new(text: token.text)
|
16
|
-
when DMark::Tokens::TagBeginToken
|
17
|
-
new_node = DMark::Nodes::ElementNode.new(name: token.name, attributes: token.attributes)
|
18
|
-
node_stack.last.children << new_node
|
19
|
-
node_stack.push(new_node)
|
20
|
-
when DMark::Tokens::TagEndToken
|
21
|
-
node_stack.pop
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
@root_node
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
data/lib/dmark/tokens.rb
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
module Tokens
|
3
|
-
class Token
|
4
|
-
def to_s
|
5
|
-
raise NotImplementedError
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class TextToken < Token
|
10
|
-
attr_reader :text
|
11
|
-
|
12
|
-
def initialize(text:)
|
13
|
-
@text = text
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_s
|
17
|
-
"Text(#{@text.inspect})"
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
class AbstractTagToken < Token
|
22
|
-
attr_reader :name
|
23
|
-
|
24
|
-
def initialize(name:)
|
25
|
-
@name = name
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
class TagBeginToken < AbstractTagToken
|
30
|
-
attr_reader :attributes
|
31
|
-
|
32
|
-
def initialize(name:, attributes:)
|
33
|
-
super(name: name)
|
34
|
-
|
35
|
-
@attributes = attributes
|
36
|
-
end
|
37
|
-
|
38
|
-
def to_s
|
39
|
-
"TagBegin(#{name.inspect}, #{attributes.inspect})"
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
class TagEndToken < AbstractTagToken
|
44
|
-
def to_s
|
45
|
-
"TagEnd(#{name.inspect})"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
data/lib/dmark/version.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
<p>In Nanoc, every item (page or asset) and every layout has a unique <i>identifier</i>: a string derived from the file’s path. A <i>pattern</i> is an expression that is used to select items or layouts based on their identifier.</p>
|
2
|
-
<h2>Identifiers</h2>
|
3
|
-
<p>Identifiers come in two types: the <i>full</i> type, new in Nanoc 4, and the <i>legacy</i> type, used in Nanoc 3.</p>
|
4
|
-
<dl><dt>full</dt><dd>An identifier with the full type is the filename, with the path to the content directory removed. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the full identifier <i>/about.md</i>.</dd>
|
5
|
-
<dt>legacy</dt><dd>An identifier with the legacy type is the filename, with the path to the content directory removed, the extension removed, and a slash appended. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the legacy identifier <i>/about/</i>. This corresponds closely with paths in clean URLs.</dd></dl>
|
6
|
-
<p>The following methods are useful for full identifiers:</p>
|
7
|
-
<dl><dt><code>identifier.without_ext</code> → <i>String</i></dt><dd>identifier with the last extension removed</dd>
|
8
|
-
<dt><code>identifier.without_exts</code> → <i>String</i></dt><dd>identifier with all extensions removed</dd>
|
9
|
-
<dt><code donkey="true">identifier.ext</code> → <i>String</i></dt><dd>the last extension of this identifier</dd>
|
10
|
-
<dt><code>identifier.exts</code> → <i>String</i></dt><dd>all extensions of this identifier</dd>
|
11
|
-
<dt><code>identifier + string</code> → <i>String</i></dt><dd>identifier with the given string appended</dd></dl>
|
12
|
-
<p>Here are some < examples:</p>
|
13
|
-
<pre>identifier = Nanoc::Identifier.new('/about.md')
|
14
|
-
|
15
|
-
identifier.without_ext
|
16
|
-
# => "/about"
|
17
|
-
|
18
|
-
identifier.ext
|
19
|
-
# => "md"
|
20
|
-
</pre>
|
21
|
-
<p>The following method is useful for legacy identifiers:</p>
|
22
|
-
<dl><dt><code>identifier.chop</code> → <i>String</i></dt><dd>identifier with the last character removed</dd></dl>
|
23
|
-
<p>Here are some examples:</p>
|
24
|
-
<pre>identifier = Nanoc::Identifier.new('/about/', type: :legacy)
|
25
|
-
|
26
|
-
identifier.chop
|
27
|
-
# => "/about"
|
28
|
-
|
29
|
-
identifier.chop + '.html'
|
30
|
-
# => "/about.html"
|
31
|
-
|
32
|
-
identifier + 'index.html'
|
33
|
-
# => "/about/index.html"
|
34
|
-
</pre>
|
35
|
-
<h2>Patterns</h2>
|
36
|
-
<p>Patterns are used to find items and layouts based on their identifier. They come in three varieties:</p>
|
37
|
-
<ul><li>glob patterns</li><li>regular expression patterns</li><li>legacy patterns</li></ul>
|
38
|
-
<h3>Glob patterns</h3>
|
39
|
-
<p>Glob patterns are strings that contain wildcard characters. Wildcard characters are characters that can be substituted for other characters in a identifier. An example of a glob pattern is <i>/projects/*.md</i>, which matches all files with a <i>md</i> extension in the <i>/projects</i> directory.</p>
|
40
|
-
<p>Globs are commonplace in Unix-like environments. For example, the Unix command for listing all files with the <i>md</i> extension in the current directory is <code>ls *.md</code>. In this example, the argument to the <code>ls</code> command is a wildcard.</p>
|
41
|
-
<p>Nanoc supports the following wildcards in glob patterns:</p>
|
42
|
-
<dl><dt><code>*</code></dt><dd>Matches any file or directory name. Does not cross directory boundaries. For example, <i>/projects/*.md</i> matches <i>/projects/nanoc.md</i>, but not <i>/projects/cri.adoc</i> nor <i>/projects/nanoc/about.md</i>.</dd>
|
43
|
-
<dt><code>**/</code></dt><dd>Matches zero or more levels of nested directories. For example, <i>/projects/**/*.md</i> matches both <i>/projects/nanoc.md</i> and <i>/projects/nanoc/history.md</i>.</dd>
|
44
|
-
<dt><code>?</code></dt><dd>Matches a single character.</dd>
|
45
|
-
<dt><code>[abc]</code></dt><dd>Matches any single character in the set. For example, <i>/people/[kt]im.md</i> matches only <i>/people/kim.md</i> and <i>/people/tim.md</i>.</dd>
|
46
|
-
<dt><code>{foo,bar}</code></dt><dd>Matches either string in the comma-separated list. More than two strings are possible. For example, <i>/c{at,ub,ount}s.txt</i> matches <i>/cats.txt</i>, <i>/cubs.txt</i> and <i>/counts.txt</i>, but not <i>/cabs.txt</i>.</dd></dl>
|
47
|
-
<p>A glob pattern that matches every item is <i>/**/*</i>. A glob pattern that matches every item/layout with the extension <i>md</i> is <i>/**/*.md</i>.</p>
|
48
|
-
<h3>Regular expression patterns</h3>
|
49
|
-
<p>You can use a regular expression to select items and layouts.</p>
|
50
|
-
<p>For matching identifiers, the <code>%r{…}</code> syntax is (arguably) nicer than the <code>/…/</code> syntax. The latter is not a good fit for identifiers (or filenames), because all slashes need to be escaped. The <code>\A</code> and <code>\z</code> anchors are also useful to make sure the entire identifier is matched.</p>
|
51
|
-
<p>An example of a regular expression pattern is <code>%r{\A/projects/(cri|nanoc)\.md\z}</code>, which matches both <i>/projects/nanoc.md</i> and <i>/projects/cri.md</i>.</p>
|
52
|
-
<h3>Legacy patterns</h3>
|
53
|
-
<p>Legacy patterns are strings that contain wildcard characters. The wildcard characters behave differently than the glob wildcard characters.</p>
|
54
|
-
<p>To enable legacy patterns, set <code>string_pattern_type</code> to <code>"legacy"</code> in the configuration. For example:</p>
|
55
|
-
<pre>string_pattern_type: "legacy"
|
56
|
-
</pre>
|
57
|
-
<p>For legacy patterns, Nanoc supports the following wildcards:</p>
|
58
|
-
<dl><dt><code>*</code></dt><dd>Matches zero or more characters, including a slash. For example, <i>/projects/*/</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd>
|
59
|
-
<dt><code>+</code></dt><dd>Matches one or more characters, including a slash. For example, <i>/projects/+</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd></dl>
|