d-mark 0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -7
- data/Gemfile.lock +16 -12
- data/Guardfile +3 -0
- data/NEWS.md +11 -3
- data/README.adoc +218 -0
- data/Rakefile +13 -2
- data/d-mark.gemspec +5 -4
- data/lib/d-mark.rb +2 -0
- data/lib/d-mark/cli.rb +28 -0
- data/lib/d-mark/parser.rb +460 -0
- data/lib/{dmark → d-mark}/translator.rb +5 -3
- data/lib/d-mark/version.rb +3 -0
- data/samples/identifiers-and-patterns.dmark +418 -1
- data/samples/trivial.dmark +1 -0
- data/samples/trivial.rb +20 -0
- data/spec/d-mark/parser_spec.rb +271 -0
- data/spec/spec_helper.rb +2 -0
- metadata +30 -18
- data/README.md +0 -70
- data/lib/dmark.rb +0 -9
- data/lib/dmark/lexer.rb +0 -235
- data/lib/dmark/nodes.rb +0 -76
- data/lib/dmark/parser.rb +0 -28
- data/lib/dmark/tokens.rb +0 -49
- data/lib/dmark/version.rb +0 -3
- data/samples/identifiers-and-patterns.html +0 -59
- data/scripts/translate-to-html.rb +0 -46
- data/tasks/doc.rake +0 -13
- data/tasks/rubocop.rake +0 -6
- data/tasks/test.rake +0 -6
data/lib/dmark.rb
DELETED
data/lib/dmark/lexer.rb
DELETED
@@ -1,235 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
class Lexer
|
3
|
-
INDENTATION = 2
|
4
|
-
|
5
|
-
def initialize(string)
|
6
|
-
@string = string
|
7
|
-
|
8
|
-
@element_stack = []
|
9
|
-
@tokens = []
|
10
|
-
@pending_blanks = 0
|
11
|
-
end
|
12
|
-
|
13
|
-
def run
|
14
|
-
@string.lines.each_with_index do |line, line_nr|
|
15
|
-
case line
|
16
|
-
when /^\s+$/
|
17
|
-
# blank line
|
18
|
-
@pending_blanks += 1
|
19
|
-
when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\.\s*$/
|
20
|
-
# empty element
|
21
|
-
indentation = Regexp.last_match[1]
|
22
|
-
element = Regexp.last_match[2]
|
23
|
-
attributes = parse_attributes(Regexp.last_match[4])
|
24
|
-
|
25
|
-
unwind_stack_until(indentation.size)
|
26
|
-
|
27
|
-
@element_stack << element
|
28
|
-
@tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
|
29
|
-
when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\. (.*)$/
|
30
|
-
# element with inline content
|
31
|
-
indentation = Regexp.last_match[1]
|
32
|
-
element = Regexp.last_match[2]
|
33
|
-
attributes = parse_attributes(Regexp.last_match[4])
|
34
|
-
data = Regexp.last_match[5]
|
35
|
-
|
36
|
-
unwind_stack_until(indentation.size)
|
37
|
-
|
38
|
-
@tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
|
39
|
-
@tokens.concat(lex_inline(data, line_nr + 1))
|
40
|
-
@tokens << DMark::Tokens::TagEndToken.new(name: element)
|
41
|
-
when /^(\s*)(.*)$/
|
42
|
-
# other line (e.g. data)
|
43
|
-
indentation = Regexp.last_match[1]
|
44
|
-
data = Regexp.last_match[2]
|
45
|
-
|
46
|
-
unwind_stack_until(indentation.size)
|
47
|
-
|
48
|
-
if @element_stack.empty?
|
49
|
-
# FIXME: unify format of messages (uppercase, lowercase, …)
|
50
|
-
raise LexerError.new("Can’t insert raw data at root level", line, line_nr, 1)
|
51
|
-
end
|
52
|
-
|
53
|
-
extra_indentation = [indentation.size - INDENTATION * @element_stack.size, 0].max
|
54
|
-
|
55
|
-
@tokens.concat(lex_inline(' ' * extra_indentation + data + "\n", line_nr + 1))
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
unwind_stack_until(0)
|
60
|
-
|
61
|
-
@tokens
|
62
|
-
end
|
63
|
-
|
64
|
-
private
|
65
|
-
|
66
|
-
def parse_attributes(data)
|
67
|
-
# FIXME: write a proper parser
|
68
|
-
|
69
|
-
(data || '').split(',').map { |part| part.split('=') }.each_with_object({}) do |pair, res|
|
70
|
-
res[pair.first] = pair.last || pair.first
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def unwind_stack_until(num)
|
75
|
-
while @element_stack.size * INDENTATION > num
|
76
|
-
elem = @element_stack.pop
|
77
|
-
|
78
|
-
@tokens << DMark::Tokens::TagEndToken.new(name: elem)
|
79
|
-
end
|
80
|
-
|
81
|
-
append_text(@tokens, "\n" * @pending_blanks)
|
82
|
-
@pending_blanks = 0
|
83
|
-
end
|
84
|
-
|
85
|
-
def append_text(out, text)
|
86
|
-
if out.empty? || !out.last.is_a?(DMark::Tokens::TextToken)
|
87
|
-
out << DMark::Tokens::TextToken.new(text: text)
|
88
|
-
else
|
89
|
-
out.last.text << text
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
class LexerError < StandardError
|
94
|
-
def initialize(message, line, line_nr, col_nr)
|
95
|
-
@message = message
|
96
|
-
@line = line
|
97
|
-
@line_nr = line_nr
|
98
|
-
@col_nr = col_nr
|
99
|
-
end
|
100
|
-
|
101
|
-
class Coloriser
|
102
|
-
def red
|
103
|
-
"\e[31m".freeze
|
104
|
-
end
|
105
|
-
|
106
|
-
def bold
|
107
|
-
"\e[1m".freeze
|
108
|
-
end
|
109
|
-
|
110
|
-
def reset
|
111
|
-
"\e[0m".freeze
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
class NullColoriser
|
116
|
-
def red
|
117
|
-
''.freeze
|
118
|
-
end
|
119
|
-
|
120
|
-
def bold
|
121
|
-
''.freeze
|
122
|
-
end
|
123
|
-
|
124
|
-
def reset
|
125
|
-
''.freeze
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def message
|
130
|
-
formatted_message(NullColoriser.new)
|
131
|
-
end
|
132
|
-
|
133
|
-
def message_for_tty
|
134
|
-
formatted_message(Coloriser.new)
|
135
|
-
end
|
136
|
-
|
137
|
-
def formatted_message(coloriser)
|
138
|
-
line_excerpt_start = [@col_nr - 38, 0].max
|
139
|
-
line_excerpt_end = @col_nr + 38
|
140
|
-
line_excerpt = @line[line_excerpt_start..line_excerpt_end]
|
141
|
-
|
142
|
-
if line_excerpt_start > 0
|
143
|
-
line_excerpt[0] = '…'
|
144
|
-
end
|
145
|
-
|
146
|
-
if line_excerpt_end < @line.size
|
147
|
-
line_excerpt[-1] = '…'
|
148
|
-
end
|
149
|
-
|
150
|
-
[
|
151
|
-
"#{coloriser.red}#{coloriser.bold}ERROR#{coloriser.reset} (line #{@line_nr}, col #{@col_nr}): #{coloriser.red}#{@message}#{coloriser.reset}",
|
152
|
-
'',
|
153
|
-
line_excerpt,
|
154
|
-
coloriser.red + ' ' * (@col_nr - 1 - line_excerpt_start) + '^' + coloriser.reset,
|
155
|
-
'',
|
156
|
-
].join("\n")
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
def lex_inline(string, line_nr)
|
161
|
-
stack = []
|
162
|
-
state = :root
|
163
|
-
tokens = []
|
164
|
-
name = ''
|
165
|
-
attributes = ''
|
166
|
-
col_nr = 0
|
167
|
-
|
168
|
-
string.chars.each_with_index do |char|
|
169
|
-
col_nr += 1
|
170
|
-
|
171
|
-
case state
|
172
|
-
when :root
|
173
|
-
case char
|
174
|
-
when '%'
|
175
|
-
state = :after_pct
|
176
|
-
when '}'
|
177
|
-
if stack.empty?
|
178
|
-
message = 'Unexpected `}`. Try escaping it as `%}`.'
|
179
|
-
raise LexerError.new(message, string, line_nr, col_nr)
|
180
|
-
else
|
181
|
-
data = stack.pop
|
182
|
-
case data.first
|
183
|
-
when :raw
|
184
|
-
append_text(tokens, data.last)
|
185
|
-
when :elem
|
186
|
-
tokens << DMark::Tokens::TagEndToken.new(name: data.last)
|
187
|
-
else
|
188
|
-
raise "Unexpected entry on stack: #{data.inspect}"
|
189
|
-
end
|
190
|
-
end
|
191
|
-
else
|
192
|
-
append_text(tokens, char)
|
193
|
-
end
|
194
|
-
when :after_pct
|
195
|
-
# FIXME: require at least one character after %
|
196
|
-
|
197
|
-
case char
|
198
|
-
when 'a'..'z', '0'..'9', '-'
|
199
|
-
name << char
|
200
|
-
when '%' # escaped
|
201
|
-
state = :root
|
202
|
-
col_nr -= 1
|
203
|
-
append_text(tokens, '%')
|
204
|
-
when '}' # escaped
|
205
|
-
state = :root
|
206
|
-
col_nr -= 1
|
207
|
-
append_text(tokens, '}')
|
208
|
-
when '['
|
209
|
-
state = :after_lbracket
|
210
|
-
when '{'
|
211
|
-
state = :root
|
212
|
-
stack << [:elem, name]
|
213
|
-
tokens << DMark::Tokens::TagBeginToken.new(name: name, attributes: parse_attributes(attributes))
|
214
|
-
name = ''
|
215
|
-
attributes = ''
|
216
|
-
else
|
217
|
-
raise LexerError.new("unexpected `#{char}` after `%`", string, line_nr, col_nr)
|
218
|
-
end
|
219
|
-
when :after_lbracket
|
220
|
-
case char
|
221
|
-
when ']'
|
222
|
-
# FIXME: might make sense to have after_rbracket instead (to prevent %foo[a][b]{…})
|
223
|
-
state = :after_pct
|
224
|
-
else
|
225
|
-
attributes << char
|
226
|
-
end
|
227
|
-
else
|
228
|
-
raise "Unexpected state: #{state.inspect}"
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
tokens
|
233
|
-
end
|
234
|
-
end
|
235
|
-
end
|
data/lib/dmark/nodes.rb
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
module Nodes
|
3
|
-
class Node
|
4
|
-
attr_reader :children
|
5
|
-
|
6
|
-
def initialize
|
7
|
-
@children = []
|
8
|
-
end
|
9
|
-
|
10
|
-
def inspect(_indent = 0)
|
11
|
-
'Node()'
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
class RootNode < Node
|
16
|
-
def inspect(indent = 0)
|
17
|
-
io = ''
|
18
|
-
io << ' ' * indent
|
19
|
-
io << 'Root('
|
20
|
-
io << "\n" if children.any?
|
21
|
-
children.each { |c| io << c.inspect(indent + 1) }
|
22
|
-
io << ' ' * indent if children.any?
|
23
|
-
io << ')'
|
24
|
-
io << "\n"
|
25
|
-
io
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
class TextNode < Node
|
30
|
-
attr_reader :text
|
31
|
-
|
32
|
-
def initialize(text:)
|
33
|
-
super()
|
34
|
-
@text = text
|
35
|
-
end
|
36
|
-
|
37
|
-
def inspect(indent = 0)
|
38
|
-
io = ''
|
39
|
-
io << ' ' * indent
|
40
|
-
io << 'Text('
|
41
|
-
io << @text.inspect
|
42
|
-
io << "\n" if children.any?
|
43
|
-
children.each { |c| io << c.inspect(indent + 1) }
|
44
|
-
io << ' ' * indent if children.any?
|
45
|
-
io << ')'
|
46
|
-
io << "\n"
|
47
|
-
io
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
class ElementNode < Node
|
52
|
-
attr_reader :name
|
53
|
-
attr_reader :attributes
|
54
|
-
|
55
|
-
def initialize(name:, attributes:)
|
56
|
-
super()
|
57
|
-
@name = name
|
58
|
-
@attributes = attributes
|
59
|
-
end
|
60
|
-
|
61
|
-
def inspect(indent = 0)
|
62
|
-
io = ''
|
63
|
-
io << ' ' * indent
|
64
|
-
io << 'Element('
|
65
|
-
io << @name
|
66
|
-
io << ',' << @attributes.inspect unless @attributes.empty?
|
67
|
-
io << "\n" if children.any?
|
68
|
-
children.each { |c| io << c.inspect(indent + 1) }
|
69
|
-
io << ' ' * indent if children.any?
|
70
|
-
io << ')'
|
71
|
-
io << "\n"
|
72
|
-
io
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
data/lib/dmark/parser.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
class Parser
|
3
|
-
def initialize(tokens)
|
4
|
-
@tokens = tokens
|
5
|
-
|
6
|
-
@root_node = DMark::Nodes::RootNode.new
|
7
|
-
end
|
8
|
-
|
9
|
-
def run
|
10
|
-
node_stack = [@root_node]
|
11
|
-
|
12
|
-
@tokens.each do |token|
|
13
|
-
case token
|
14
|
-
when DMark::Tokens::TextToken
|
15
|
-
node_stack.last.children << DMark::Nodes::TextNode.new(text: token.text)
|
16
|
-
when DMark::Tokens::TagBeginToken
|
17
|
-
new_node = DMark::Nodes::ElementNode.new(name: token.name, attributes: token.attributes)
|
18
|
-
node_stack.last.children << new_node
|
19
|
-
node_stack.push(new_node)
|
20
|
-
when DMark::Tokens::TagEndToken
|
21
|
-
node_stack.pop
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
@root_node
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
data/lib/dmark/tokens.rb
DELETED
@@ -1,49 +0,0 @@
|
|
1
|
-
module DMark
|
2
|
-
module Tokens
|
3
|
-
class Token
|
4
|
-
def to_s
|
5
|
-
raise NotImplementedError
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class TextToken < Token
|
10
|
-
attr_reader :text
|
11
|
-
|
12
|
-
def initialize(text:)
|
13
|
-
@text = text
|
14
|
-
end
|
15
|
-
|
16
|
-
def to_s
|
17
|
-
"Text(#{@text.inspect})"
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
class AbstractTagToken < Token
|
22
|
-
attr_reader :name
|
23
|
-
|
24
|
-
def initialize(name:)
|
25
|
-
@name = name
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
class TagBeginToken < AbstractTagToken
|
30
|
-
attr_reader :attributes
|
31
|
-
|
32
|
-
def initialize(name:, attributes:)
|
33
|
-
super(name: name)
|
34
|
-
|
35
|
-
@attributes = attributes
|
36
|
-
end
|
37
|
-
|
38
|
-
def to_s
|
39
|
-
"TagBegin(#{name.inspect}, #{attributes.inspect})"
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
class TagEndToken < AbstractTagToken
|
44
|
-
def to_s
|
45
|
-
"TagEnd(#{name.inspect})"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
data/lib/dmark/version.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
<p>In Nanoc, every item (page or asset) and every layout has a unique <i>identifier</i>: a string derived from the file’s path. A <i>pattern</i> is an expression that is used to select items or layouts based on their identifier.</p>
|
2
|
-
<h2>Identifiers</h2>
|
3
|
-
<p>Identifiers come in two types: the <i>full</i> type, new in Nanoc 4, and the <i>legacy</i> type, used in Nanoc 3.</p>
|
4
|
-
<dl><dt>full</dt><dd>An identifier with the full type is the filename, with the path to the content directory removed. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the full identifier <i>/about.md</i>.</dd>
|
5
|
-
<dt>legacy</dt><dd>An identifier with the legacy type is the filename, with the path to the content directory removed, the extension removed, and a slash appended. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the legacy identifier <i>/about/</i>. This corresponds closely with paths in clean URLs.</dd></dl>
|
6
|
-
<p>The following methods are useful for full identifiers:</p>
|
7
|
-
<dl><dt><code>identifier.without_ext</code> → <i>String</i></dt><dd>identifier with the last extension removed</dd>
|
8
|
-
<dt><code>identifier.without_exts</code> → <i>String</i></dt><dd>identifier with all extensions removed</dd>
|
9
|
-
<dt><code donkey="true">identifier.ext</code> → <i>String</i></dt><dd>the last extension of this identifier</dd>
|
10
|
-
<dt><code>identifier.exts</code> → <i>String</i></dt><dd>all extensions of this identifier</dd>
|
11
|
-
<dt><code>identifier + string</code> → <i>String</i></dt><dd>identifier with the given string appended</dd></dl>
|
12
|
-
<p>Here are some < examples:</p>
|
13
|
-
<pre>identifier = Nanoc::Identifier.new('/about.md')
|
14
|
-
|
15
|
-
identifier.without_ext
|
16
|
-
# => "/about"
|
17
|
-
|
18
|
-
identifier.ext
|
19
|
-
# => "md"
|
20
|
-
</pre>
|
21
|
-
<p>The following method is useful for legacy identifiers:</p>
|
22
|
-
<dl><dt><code>identifier.chop</code> → <i>String</i></dt><dd>identifier with the last character removed</dd></dl>
|
23
|
-
<p>Here are some examples:</p>
|
24
|
-
<pre>identifier = Nanoc::Identifier.new('/about/', type: :legacy)
|
25
|
-
|
26
|
-
identifier.chop
|
27
|
-
# => "/about"
|
28
|
-
|
29
|
-
identifier.chop + '.html'
|
30
|
-
# => "/about.html"
|
31
|
-
|
32
|
-
identifier + 'index.html'
|
33
|
-
# => "/about/index.html"
|
34
|
-
</pre>
|
35
|
-
<h2>Patterns</h2>
|
36
|
-
<p>Patterns are used to find items and layouts based on their identifier. They come in three varieties:</p>
|
37
|
-
<ul><li>glob patterns</li><li>regular expression patterns</li><li>legacy patterns</li></ul>
|
38
|
-
<h3>Glob patterns</h3>
|
39
|
-
<p>Glob patterns are strings that contain wildcard characters. Wildcard characters are characters that can be substituted for other characters in a identifier. An example of a glob pattern is <i>/projects/*.md</i>, which matches all files with a <i>md</i> extension in the <i>/projects</i> directory.</p>
|
40
|
-
<p>Globs are commonplace in Unix-like environments. For example, the Unix command for listing all files with the <i>md</i> extension in the current directory is <code>ls *.md</code>. In this example, the argument to the <code>ls</code> command is a wildcard.</p>
|
41
|
-
<p>Nanoc supports the following wildcards in glob patterns:</p>
|
42
|
-
<dl><dt><code>*</code></dt><dd>Matches any file or directory name. Does not cross directory boundaries. For example, <i>/projects/*.md</i> matches <i>/projects/nanoc.md</i>, but not <i>/projects/cri.adoc</i> nor <i>/projects/nanoc/about.md</i>.</dd>
|
43
|
-
<dt><code>**/</code></dt><dd>Matches zero or more levels of nested directories. For example, <i>/projects/**/*.md</i> matches both <i>/projects/nanoc.md</i> and <i>/projects/nanoc/history.md</i>.</dd>
|
44
|
-
<dt><code>?</code></dt><dd>Matches a single character.</dd>
|
45
|
-
<dt><code>[abc]</code></dt><dd>Matches any single character in the set. For example, <i>/people/[kt]im.md</i> matches only <i>/people/kim.md</i> and <i>/people/tim.md</i>.</dd>
|
46
|
-
<dt><code>{foo,bar}</code></dt><dd>Matches either string in the comma-separated list. More than two strings are possible. For example, <i>/c{at,ub,ount}s.txt</i> matches <i>/cats.txt</i>, <i>/cubs.txt</i> and <i>/counts.txt</i>, but not <i>/cabs.txt</i>.</dd></dl>
|
47
|
-
<p>A glob pattern that matches every item is <i>/**/*</i>. A glob pattern that matches every item/layout with the extension <i>md</i> is <i>/**/*.md</i>.</p>
|
48
|
-
<h3>Regular expression patterns</h3>
|
49
|
-
<p>You can use a regular expression to select items and layouts.</p>
|
50
|
-
<p>For matching identifiers, the <code>%r{…}</code> syntax is (arguably) nicer than the <code>/…/</code> syntax. The latter is not a good fit for identifiers (or filenames), because all slashes need to be escaped. The <code>\A</code> and <code>\z</code> anchors are also useful to make sure the entire identifier is matched.</p>
|
51
|
-
<p>An example of a regular expression pattern is <code>%r{\A/projects/(cri|nanoc)\.md\z}</code>, which matches both <i>/projects/nanoc.md</i> and <i>/projects/cri.md</i>.</p>
|
52
|
-
<h3>Legacy patterns</h3>
|
53
|
-
<p>Legacy patterns are strings that contain wildcard characters. The wildcard characters behave differently than the glob wildcard characters.</p>
|
54
|
-
<p>To enable legacy patterns, set <code>string_pattern_type</code> to <code>"legacy"</code> in the configuration. For example:</p>
|
55
|
-
<pre>string_pattern_type: "legacy"
|
56
|
-
</pre>
|
57
|
-
<p>For legacy patterns, Nanoc supports the following wildcards:</p>
|
58
|
-
<dl><dt><code>*</code></dt><dd>Matches zero or more characters, including a slash. For example, <i>/projects/*/</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd>
|
59
|
-
<dt><code>+</code></dt><dd>Matches one or more characters, including a slash. For example, <i>/projects/+</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd></dl>
|