d-mark 0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +0,0 @@
1
- module DMark
2
- end
3
-
4
- require_relative 'dmark/lexer'
5
- require_relative 'dmark/nodes'
6
- require_relative 'dmark/parser'
7
- require_relative 'dmark/tokens'
8
- require_relative 'dmark/translator'
9
- require_relative 'dmark/version'
@@ -1,235 +0,0 @@
1
- module DMark
2
- class Lexer
3
- INDENTATION = 2
4
-
5
- def initialize(string)
6
- @string = string
7
-
8
- @element_stack = []
9
- @tokens = []
10
- @pending_blanks = 0
11
- end
12
-
13
- def run
14
- @string.lines.each_with_index do |line, line_nr|
15
- case line
16
- when /^\s+$/
17
- # blank line
18
- @pending_blanks += 1
19
- when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\.\s*$/
20
- # empty element
21
- indentation = Regexp.last_match[1]
22
- element = Regexp.last_match[2]
23
- attributes = parse_attributes(Regexp.last_match[4])
24
-
25
- unwind_stack_until(indentation.size)
26
-
27
- @element_stack << element
28
- @tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
29
- when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\. (.*)$/
30
- # element with inline content
31
- indentation = Regexp.last_match[1]
32
- element = Regexp.last_match[2]
33
- attributes = parse_attributes(Regexp.last_match[4])
34
- data = Regexp.last_match[5]
35
-
36
- unwind_stack_until(indentation.size)
37
-
38
- @tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
39
- @tokens.concat(lex_inline(data, line_nr + 1))
40
- @tokens << DMark::Tokens::TagEndToken.new(name: element)
41
- when /^(\s*)(.*)$/
42
- # other line (e.g. data)
43
- indentation = Regexp.last_match[1]
44
- data = Regexp.last_match[2]
45
-
46
- unwind_stack_until(indentation.size)
47
-
48
- if @element_stack.empty?
49
- # FIXME: unify format of messages (uppercase, lowercase, …)
50
- raise LexerError.new("Can’t insert raw data at root level", line, line_nr, 1)
51
- end
52
-
53
- extra_indentation = [indentation.size - INDENTATION * @element_stack.size, 0].max
54
-
55
- @tokens.concat(lex_inline(' ' * extra_indentation + data + "\n", line_nr + 1))
56
- end
57
- end
58
-
59
- unwind_stack_until(0)
60
-
61
- @tokens
62
- end
63
-
64
- private
65
-
66
- def parse_attributes(data)
67
- # FIXME: write a proper parser
68
-
69
- (data || '').split(',').map { |part| part.split('=') }.each_with_object({}) do |pair, res|
70
- res[pair.first] = pair.last || pair.first
71
- end
72
- end
73
-
74
- def unwind_stack_until(num)
75
- while @element_stack.size * INDENTATION > num
76
- elem = @element_stack.pop
77
-
78
- @tokens << DMark::Tokens::TagEndToken.new(name: elem)
79
- end
80
-
81
- append_text(@tokens, "\n" * @pending_blanks)
82
- @pending_blanks = 0
83
- end
84
-
85
- def append_text(out, text)
86
- if out.empty? || !out.last.is_a?(DMark::Tokens::TextToken)
87
- out << DMark::Tokens::TextToken.new(text: text)
88
- else
89
- out.last.text << text
90
- end
91
- end
92
-
93
- class LexerError < StandardError
94
- def initialize(message, line, line_nr, col_nr)
95
- @message = message
96
- @line = line
97
- @line_nr = line_nr
98
- @col_nr = col_nr
99
- end
100
-
101
- class Coloriser
102
- def red
103
- "\e[31m".freeze
104
- end
105
-
106
- def bold
107
- "\e[1m".freeze
108
- end
109
-
110
- def reset
111
- "\e[0m".freeze
112
- end
113
- end
114
-
115
- class NullColoriser
116
- def red
117
- ''.freeze
118
- end
119
-
120
- def bold
121
- ''.freeze
122
- end
123
-
124
- def reset
125
- ''.freeze
126
- end
127
- end
128
-
129
- def message
130
- formatted_message(NullColoriser.new)
131
- end
132
-
133
- def message_for_tty
134
- formatted_message(Coloriser.new)
135
- end
136
-
137
- def formatted_message(coloriser)
138
- line_excerpt_start = [@col_nr - 38, 0].max
139
- line_excerpt_end = @col_nr + 38
140
- line_excerpt = @line[line_excerpt_start..line_excerpt_end]
141
-
142
- if line_excerpt_start > 0
143
- line_excerpt[0] = '…'
144
- end
145
-
146
- if line_excerpt_end < @line.size
147
- line_excerpt[-1] = '…'
148
- end
149
-
150
- [
151
- "#{coloriser.red}#{coloriser.bold}ERROR#{coloriser.reset} (line #{@line_nr}, col #{@col_nr}): #{coloriser.red}#{@message}#{coloriser.reset}",
152
- '',
153
- line_excerpt,
154
- coloriser.red + ' ' * (@col_nr - 1 - line_excerpt_start) + '^' + coloriser.reset,
155
- '',
156
- ].join("\n")
157
- end
158
- end
159
-
160
- def lex_inline(string, line_nr)
161
- stack = []
162
- state = :root
163
- tokens = []
164
- name = ''
165
- attributes = ''
166
- col_nr = 0
167
-
168
- string.chars.each_with_index do |char|
169
- col_nr += 1
170
-
171
- case state
172
- when :root
173
- case char
174
- when '%'
175
- state = :after_pct
176
- when '}'
177
- if stack.empty?
178
- message = 'Unexpected `}`. Try escaping it as `%}`.'
179
- raise LexerError.new(message, string, line_nr, col_nr)
180
- else
181
- data = stack.pop
182
- case data.first
183
- when :raw
184
- append_text(tokens, data.last)
185
- when :elem
186
- tokens << DMark::Tokens::TagEndToken.new(name: data.last)
187
- else
188
- raise "Unexpected entry on stack: #{data.inspect}"
189
- end
190
- end
191
- else
192
- append_text(tokens, char)
193
- end
194
- when :after_pct
195
- # FIXME: require at least one character after %
196
-
197
- case char
198
- when 'a'..'z', '0'..'9', '-'
199
- name << char
200
- when '%' # escaped
201
- state = :root
202
- col_nr -= 1
203
- append_text(tokens, '%')
204
- when '}' # escaped
205
- state = :root
206
- col_nr -= 1
207
- append_text(tokens, '}')
208
- when '['
209
- state = :after_lbracket
210
- when '{'
211
- state = :root
212
- stack << [:elem, name]
213
- tokens << DMark::Tokens::TagBeginToken.new(name: name, attributes: parse_attributes(attributes))
214
- name = ''
215
- attributes = ''
216
- else
217
- raise LexerError.new("unexpected `#{char}` after `%`", string, line_nr, col_nr)
218
- end
219
- when :after_lbracket
220
- case char
221
- when ']'
222
- # FIXME: might make sense to have after_rbracket instead (to prevent %foo[a][b]{…})
223
- state = :after_pct
224
- else
225
- attributes << char
226
- end
227
- else
228
- raise "Unexpected state: #{state.inspect}"
229
- end
230
- end
231
-
232
- tokens
233
- end
234
- end
235
- end
@@ -1,76 +0,0 @@
1
- module DMark
2
- module Nodes
3
- class Node
4
- attr_reader :children
5
-
6
- def initialize
7
- @children = []
8
- end
9
-
10
- def inspect(_indent = 0)
11
- 'Node()'
12
- end
13
- end
14
-
15
- class RootNode < Node
16
- def inspect(indent = 0)
17
- io = ''
18
- io << ' ' * indent
19
- io << 'Root('
20
- io << "\n" if children.any?
21
- children.each { |c| io << c.inspect(indent + 1) }
22
- io << ' ' * indent if children.any?
23
- io << ')'
24
- io << "\n"
25
- io
26
- end
27
- end
28
-
29
- class TextNode < Node
30
- attr_reader :text
31
-
32
- def initialize(text:)
33
- super()
34
- @text = text
35
- end
36
-
37
- def inspect(indent = 0)
38
- io = ''
39
- io << ' ' * indent
40
- io << 'Text('
41
- io << @text.inspect
42
- io << "\n" if children.any?
43
- children.each { |c| io << c.inspect(indent + 1) }
44
- io << ' ' * indent if children.any?
45
- io << ')'
46
- io << "\n"
47
- io
48
- end
49
- end
50
-
51
- class ElementNode < Node
52
- attr_reader :name
53
- attr_reader :attributes
54
-
55
- def initialize(name:, attributes:)
56
- super()
57
- @name = name
58
- @attributes = attributes
59
- end
60
-
61
- def inspect(indent = 0)
62
- io = ''
63
- io << ' ' * indent
64
- io << 'Element('
65
- io << @name
66
- io << ',' << @attributes.inspect unless @attributes.empty?
67
- io << "\n" if children.any?
68
- children.each { |c| io << c.inspect(indent + 1) }
69
- io << ' ' * indent if children.any?
70
- io << ')'
71
- io << "\n"
72
- io
73
- end
74
- end
75
- end
76
- end
@@ -1,28 +0,0 @@
1
- module DMark
2
- class Parser
3
- def initialize(tokens)
4
- @tokens = tokens
5
-
6
- @root_node = DMark::Nodes::RootNode.new
7
- end
8
-
9
- def run
10
- node_stack = [@root_node]
11
-
12
- @tokens.each do |token|
13
- case token
14
- when DMark::Tokens::TextToken
15
- node_stack.last.children << DMark::Nodes::TextNode.new(text: token.text)
16
- when DMark::Tokens::TagBeginToken
17
- new_node = DMark::Nodes::ElementNode.new(name: token.name, attributes: token.attributes)
18
- node_stack.last.children << new_node
19
- node_stack.push(new_node)
20
- when DMark::Tokens::TagEndToken
21
- node_stack.pop
22
- end
23
- end
24
-
25
- @root_node
26
- end
27
- end
28
- end
@@ -1,49 +0,0 @@
1
- module DMark
2
- module Tokens
3
- class Token
4
- def to_s
5
- raise NotImplementedError
6
- end
7
- end
8
-
9
- class TextToken < Token
10
- attr_reader :text
11
-
12
- def initialize(text:)
13
- @text = text
14
- end
15
-
16
- def to_s
17
- "Text(#{@text.inspect})"
18
- end
19
- end
20
-
21
- class AbstractTagToken < Token
22
- attr_reader :name
23
-
24
- def initialize(name:)
25
- @name = name
26
- end
27
- end
28
-
29
- class TagBeginToken < AbstractTagToken
30
- attr_reader :attributes
31
-
32
- def initialize(name:, attributes:)
33
- super(name: name)
34
-
35
- @attributes = attributes
36
- end
37
-
38
- def to_s
39
- "TagBegin(#{name.inspect}, #{attributes.inspect})"
40
- end
41
- end
42
-
43
- class TagEndToken < AbstractTagToken
44
- def to_s
45
- "TagEnd(#{name.inspect})"
46
- end
47
- end
48
- end
49
- end
@@ -1,3 +0,0 @@
1
- module DMark
2
- VERSION = '0.1'.freeze
3
- end
@@ -1,59 +0,0 @@
1
- <p>In Nanoc, every item (page or asset) and every layout has a unique <i>identifier</i>: a string derived from the file’s path. A <i>pattern</i> is an expression that is used to select items or layouts based on their identifier.</p>
2
- <h2>Identifiers</h2>
3
- <p>Identifiers come in two types: the <i>full</i> type, new in Nanoc 4, and the <i>legacy</i> type, used in Nanoc 3.</p>
4
- <dl><dt>full</dt><dd>An identifier with the full type is the filename, with the path to the content directory removed. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the full identifier <i>/about.md</i>.</dd>
5
- <dt>legacy</dt><dd>An identifier with the legacy type is the filename, with the path to the content directory removed, the extension removed, and a slash appended. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the legacy identifier <i>/about/</i>. This corresponds closely with paths in clean URLs.</dd></dl>
6
- <p>The following methods are useful for full identifiers:</p>
7
- <dl><dt><code>identifier.without_ext</code> → <i>String</i></dt><dd>identifier with the last extension removed</dd>
8
- <dt><code>identifier.without_exts</code> → <i>String</i></dt><dd>identifier with all extensions removed</dd>
9
- <dt><code donkey="true">identifier.ext</code> → <i>String</i></dt><dd>the last extension of this identifier</dd>
10
- <dt><code>identifier.exts</code> → <i>String</i></dt><dd>all extensions of this identifier</dd>
11
- <dt><code>identifier + string</code> → <i>String</i></dt><dd>identifier with the given string appended</dd></dl>
12
- <p>Here are some &lt; examples:</p>
13
- <pre>identifier = Nanoc::Identifier.new('/about.md')
14
-
15
- identifier.without_ext
16
- # => "/about"
17
-
18
- identifier.ext
19
- # => "md"
20
- </pre>
21
- <p>The following method is useful for legacy identifiers:</p>
22
- <dl><dt><code>identifier.chop</code> → <i>String</i></dt><dd>identifier with the last character removed</dd></dl>
23
- <p>Here are some examples:</p>
24
- <pre>identifier = Nanoc::Identifier.new('/about/', type: :legacy)
25
-
26
- identifier.chop
27
- # => "/about"
28
-
29
- identifier.chop + '.html'
30
- # => "/about.html"
31
-
32
- identifier + 'index.html'
33
- # => "/about/index.html"
34
- </pre>
35
- <h2>Patterns</h2>
36
- <p>Patterns are used to find items and layouts based on their identifier. They come in three varieties:</p>
37
- <ul><li>glob patterns</li><li>regular expression patterns</li><li>legacy patterns</li></ul>
38
- <h3>Glob patterns</h3>
39
- <p>Glob patterns are strings that contain wildcard characters. Wildcard characters are characters that can be substituted for other characters in a identifier. An example of a glob pattern is <i>/projects/*.md</i>, which matches all files with a <i>md</i> extension in the <i>/projects</i> directory.</p>
40
- <p>Globs are commonplace in Unix-like environments. For example, the Unix command for listing all files with the <i>md</i> extension in the current directory is <code>ls *.md</code>. In this example, the argument to the <code>ls</code> command is a wildcard.</p>
41
- <p>Nanoc supports the following wildcards in glob patterns:</p>
42
- <dl><dt><code>*</code></dt><dd>Matches any file or directory name. Does not cross directory boundaries. For example, <i>/projects/*.md</i> matches <i>/projects/nanoc.md</i>, but not <i>/projects/cri.adoc</i> nor <i>/projects/nanoc/about.md</i>.</dd>
43
- <dt><code>**/</code></dt><dd>Matches zero or more levels of nested directories. For example, <i>/projects/**/*.md</i> matches both <i>/projects/nanoc.md</i> and <i>/projects/nanoc/history.md</i>.</dd>
44
- <dt><code>?</code></dt><dd>Matches a single character.</dd>
45
- <dt><code>[abc]</code></dt><dd>Matches any single character in the set. For example, <i>/people/[kt]im.md</i> matches only <i>/people/kim.md</i> and <i>/people/tim.md</i>.</dd>
46
- <dt><code>{foo,bar}</code></dt><dd>Matches either string in the comma-separated list. More than two strings are possible. For example, <i>/c{at,ub,ount}s.txt</i> matches <i>/cats.txt</i>, <i>/cubs.txt</i> and <i>/counts.txt</i>, but not <i>/cabs.txt</i>.</dd></dl>
47
- <p>A glob pattern that matches every item is <i>/**/*</i>. A glob pattern that matches every item/layout with the extension <i>md</i> is <i>/**/*.md</i>.</p>
48
- <h3>Regular expression patterns</h3>
49
- <p>You can use a regular expression to select items and layouts.</p>
50
- <p>For matching identifiers, the <code>%r{…}</code> syntax is (arguably) nicer than the <code>/…/</code> syntax. The latter is not a good fit for identifiers (or filenames), because all slashes need to be escaped. The <code>\A</code> and <code>\z</code> anchors are also useful to make sure the entire identifier is matched.</p>
51
- <p>An example of a regular expression pattern is <code>%r{\A/projects/(cri|nanoc)\.md\z}</code>, which matches both <i>/projects/nanoc.md</i> and <i>/projects/cri.md</i>.</p>
52
- <h3>Legacy patterns</h3>
53
- <p>Legacy patterns are strings that contain wildcard characters. The wildcard characters behave differently than the glob wildcard characters.</p>
54
- <p>To enable legacy patterns, set <code>string_pattern_type</code> to <code>"legacy"</code> in the configuration. For example:</p>
55
- <pre>string_pattern_type: "legacy"
56
- </pre>
57
- <p>For legacy patterns, Nanoc supports the following wildcards:</p>
58
- <dl><dt><code>*</code></dt><dd>Matches zero or more characters, including a slash. For example, <i>/projects/*/</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd>
59
- <dt><code>+</code></dt><dd>Matches one or more characters, including a slash. For example, <i>/projects/+</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd></dl>