d-mark 0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +0,0 @@
1
- module DMark
2
- end
3
-
4
- require_relative 'dmark/lexer'
5
- require_relative 'dmark/nodes'
6
- require_relative 'dmark/parser'
7
- require_relative 'dmark/tokens'
8
- require_relative 'dmark/translator'
9
- require_relative 'dmark/version'
@@ -1,235 +0,0 @@
1
- module DMark
2
- class Lexer
3
- INDENTATION = 2
4
-
5
- def initialize(string)
6
- @string = string
7
-
8
- @element_stack = []
9
- @tokens = []
10
- @pending_blanks = 0
11
- end
12
-
13
- def run
14
- @string.lines.each_with_index do |line, line_nr|
15
- case line
16
- when /^\s+$/
17
- # blank line
18
- @pending_blanks += 1
19
- when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\.\s*$/
20
- # empty element
21
- indentation = Regexp.last_match[1]
22
- element = Regexp.last_match[2]
23
- attributes = parse_attributes(Regexp.last_match[4])
24
-
25
- unwind_stack_until(indentation.size)
26
-
27
- @element_stack << element
28
- @tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
29
- when /^(\s*)([a-z0-9-]+)(\[(.*?)\])?\. (.*)$/
30
- # element with inline content
31
- indentation = Regexp.last_match[1]
32
- element = Regexp.last_match[2]
33
- attributes = parse_attributes(Regexp.last_match[4])
34
- data = Regexp.last_match[5]
35
-
36
- unwind_stack_until(indentation.size)
37
-
38
- @tokens << DMark::Tokens::TagBeginToken.new(name: element, attributes: attributes)
39
- @tokens.concat(lex_inline(data, line_nr + 1))
40
- @tokens << DMark::Tokens::TagEndToken.new(name: element)
41
- when /^(\s*)(.*)$/
42
- # other line (e.g. data)
43
- indentation = Regexp.last_match[1]
44
- data = Regexp.last_match[2]
45
-
46
- unwind_stack_until(indentation.size)
47
-
48
- if @element_stack.empty?
49
- # FIXME: unify format of messages (uppercase, lowercase, …)
50
- raise LexerError.new("Can’t insert raw data at root level", line, line_nr, 1)
51
- end
52
-
53
- extra_indentation = [indentation.size - INDENTATION * @element_stack.size, 0].max
54
-
55
- @tokens.concat(lex_inline(' ' * extra_indentation + data + "\n", line_nr + 1))
56
- end
57
- end
58
-
59
- unwind_stack_until(0)
60
-
61
- @tokens
62
- end
63
-
64
- private
65
-
66
- def parse_attributes(data)
67
- # FIXME: write a proper parser
68
-
69
- (data || '').split(',').map { |part| part.split('=') }.each_with_object({}) do |pair, res|
70
- res[pair.first] = pair.last || pair.first
71
- end
72
- end
73
-
74
- def unwind_stack_until(num)
75
- while @element_stack.size * INDENTATION > num
76
- elem = @element_stack.pop
77
-
78
- @tokens << DMark::Tokens::TagEndToken.new(name: elem)
79
- end
80
-
81
- append_text(@tokens, "\n" * @pending_blanks)
82
- @pending_blanks = 0
83
- end
84
-
85
- def append_text(out, text)
86
- if out.empty? || !out.last.is_a?(DMark::Tokens::TextToken)
87
- out << DMark::Tokens::TextToken.new(text: text)
88
- else
89
- out.last.text << text
90
- end
91
- end
92
-
93
- class LexerError < StandardError
94
- def initialize(message, line, line_nr, col_nr)
95
- @message = message
96
- @line = line
97
- @line_nr = line_nr
98
- @col_nr = col_nr
99
- end
100
-
101
- class Coloriser
102
- def red
103
- "\e[31m".freeze
104
- end
105
-
106
- def bold
107
- "\e[1m".freeze
108
- end
109
-
110
- def reset
111
- "\e[0m".freeze
112
- end
113
- end
114
-
115
- class NullColoriser
116
- def red
117
- ''.freeze
118
- end
119
-
120
- def bold
121
- ''.freeze
122
- end
123
-
124
- def reset
125
- ''.freeze
126
- end
127
- end
128
-
129
- def message
130
- formatted_message(NullColoriser.new)
131
- end
132
-
133
- def message_for_tty
134
- formatted_message(Coloriser.new)
135
- end
136
-
137
- def formatted_message(coloriser)
138
- line_excerpt_start = [@col_nr - 38, 0].max
139
- line_excerpt_end = @col_nr + 38
140
- line_excerpt = @line[line_excerpt_start..line_excerpt_end]
141
-
142
- if line_excerpt_start > 0
143
- line_excerpt[0] = '…'
144
- end
145
-
146
- if line_excerpt_end < @line.size
147
- line_excerpt[-1] = '…'
148
- end
149
-
150
- [
151
- "#{coloriser.red}#{coloriser.bold}ERROR#{coloriser.reset} (line #{@line_nr}, col #{@col_nr}): #{coloriser.red}#{@message}#{coloriser.reset}",
152
- '',
153
- line_excerpt,
154
- coloriser.red + ' ' * (@col_nr - 1 - line_excerpt_start) + '^' + coloriser.reset,
155
- '',
156
- ].join("\n")
157
- end
158
- end
159
-
160
- def lex_inline(string, line_nr)
161
- stack = []
162
- state = :root
163
- tokens = []
164
- name = ''
165
- attributes = ''
166
- col_nr = 0
167
-
168
- string.chars.each_with_index do |char|
169
- col_nr += 1
170
-
171
- case state
172
- when :root
173
- case char
174
- when '%'
175
- state = :after_pct
176
- when '}'
177
- if stack.empty?
178
- message = 'Unexpected `}`. Try escaping it as `%}`.'
179
- raise LexerError.new(message, string, line_nr, col_nr)
180
- else
181
- data = stack.pop
182
- case data.first
183
- when :raw
184
- append_text(tokens, data.last)
185
- when :elem
186
- tokens << DMark::Tokens::TagEndToken.new(name: data.last)
187
- else
188
- raise "Unexpected entry on stack: #{data.inspect}"
189
- end
190
- end
191
- else
192
- append_text(tokens, char)
193
- end
194
- when :after_pct
195
- # FIXME: require at least one character after %
196
-
197
- case char
198
- when 'a'..'z', '0'..'9', '-'
199
- name << char
200
- when '%' # escaped
201
- state = :root
202
- col_nr -= 1
203
- append_text(tokens, '%')
204
- when '}' # escaped
205
- state = :root
206
- col_nr -= 1
207
- append_text(tokens, '}')
208
- when '['
209
- state = :after_lbracket
210
- when '{'
211
- state = :root
212
- stack << [:elem, name]
213
- tokens << DMark::Tokens::TagBeginToken.new(name: name, attributes: parse_attributes(attributes))
214
- name = ''
215
- attributes = ''
216
- else
217
- raise LexerError.new("unexpected `#{char}` after `%`", string, line_nr, col_nr)
218
- end
219
- when :after_lbracket
220
- case char
221
- when ']'
222
- # FIXME: might make sense to have after_rbracket instead (to prevent %foo[a][b]{…})
223
- state = :after_pct
224
- else
225
- attributes << char
226
- end
227
- else
228
- raise "Unexpected state: #{state.inspect}"
229
- end
230
- end
231
-
232
- tokens
233
- end
234
- end
235
- end
@@ -1,76 +0,0 @@
1
- module DMark
2
- module Nodes
3
- class Node
4
- attr_reader :children
5
-
6
- def initialize
7
- @children = []
8
- end
9
-
10
- def inspect(_indent = 0)
11
- 'Node()'
12
- end
13
- end
14
-
15
- class RootNode < Node
16
- def inspect(indent = 0)
17
- io = ''
18
- io << ' ' * indent
19
- io << 'Root('
20
- io << "\n" if children.any?
21
- children.each { |c| io << c.inspect(indent + 1) }
22
- io << ' ' * indent if children.any?
23
- io << ')'
24
- io << "\n"
25
- io
26
- end
27
- end
28
-
29
- class TextNode < Node
30
- attr_reader :text
31
-
32
- def initialize(text:)
33
- super()
34
- @text = text
35
- end
36
-
37
- def inspect(indent = 0)
38
- io = ''
39
- io << ' ' * indent
40
- io << 'Text('
41
- io << @text.inspect
42
- io << "\n" if children.any?
43
- children.each { |c| io << c.inspect(indent + 1) }
44
- io << ' ' * indent if children.any?
45
- io << ')'
46
- io << "\n"
47
- io
48
- end
49
- end
50
-
51
- class ElementNode < Node
52
- attr_reader :name
53
- attr_reader :attributes
54
-
55
- def initialize(name:, attributes:)
56
- super()
57
- @name = name
58
- @attributes = attributes
59
- end
60
-
61
- def inspect(indent = 0)
62
- io = ''
63
- io << ' ' * indent
64
- io << 'Element('
65
- io << @name
66
- io << ',' << @attributes.inspect unless @attributes.empty?
67
- io << "\n" if children.any?
68
- children.each { |c| io << c.inspect(indent + 1) }
69
- io << ' ' * indent if children.any?
70
- io << ')'
71
- io << "\n"
72
- io
73
- end
74
- end
75
- end
76
- end
@@ -1,28 +0,0 @@
1
- module DMark
2
- class Parser
3
- def initialize(tokens)
4
- @tokens = tokens
5
-
6
- @root_node = DMark::Nodes::RootNode.new
7
- end
8
-
9
- def run
10
- node_stack = [@root_node]
11
-
12
- @tokens.each do |token|
13
- case token
14
- when DMark::Tokens::TextToken
15
- node_stack.last.children << DMark::Nodes::TextNode.new(text: token.text)
16
- when DMark::Tokens::TagBeginToken
17
- new_node = DMark::Nodes::ElementNode.new(name: token.name, attributes: token.attributes)
18
- node_stack.last.children << new_node
19
- node_stack.push(new_node)
20
- when DMark::Tokens::TagEndToken
21
- node_stack.pop
22
- end
23
- end
24
-
25
- @root_node
26
- end
27
- end
28
- end
@@ -1,49 +0,0 @@
1
- module DMark
2
- module Tokens
3
- class Token
4
- def to_s
5
- raise NotImplementedError
6
- end
7
- end
8
-
9
- class TextToken < Token
10
- attr_reader :text
11
-
12
- def initialize(text:)
13
- @text = text
14
- end
15
-
16
- def to_s
17
- "Text(#{@text.inspect})"
18
- end
19
- end
20
-
21
- class AbstractTagToken < Token
22
- attr_reader :name
23
-
24
- def initialize(name:)
25
- @name = name
26
- end
27
- end
28
-
29
- class TagBeginToken < AbstractTagToken
30
- attr_reader :attributes
31
-
32
- def initialize(name:, attributes:)
33
- super(name: name)
34
-
35
- @attributes = attributes
36
- end
37
-
38
- def to_s
39
- "TagBegin(#{name.inspect}, #{attributes.inspect})"
40
- end
41
- end
42
-
43
- class TagEndToken < AbstractTagToken
44
- def to_s
45
- "TagEnd(#{name.inspect})"
46
- end
47
- end
48
- end
49
- end
@@ -1,3 +0,0 @@
1
- module DMark
2
- VERSION = '0.1'.freeze
3
- end
@@ -1,59 +0,0 @@
1
- <p>In Nanoc, every item (page or asset) and every layout has a unique <i>identifier</i>: a string derived from the file’s path. A <i>pattern</i> is an expression that is used to select items or layouts based on their identifier.</p>
2
- <h2>Identifiers</h2>
3
- <p>Identifiers come in two types: the <i>full</i> type, new in Nanoc 4, and the <i>legacy</i> type, used in Nanoc 3.</p>
4
- <dl><dt>full</dt><dd>An identifier with the full type is the filename, with the path to the content directory removed. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the full identifier <i>/about.md</i>.</dd>
5
- <dt>legacy</dt><dd>An identifier with the legacy type is the filename, with the path to the content directory removed, the extension removed, and a slash appended. For example, the file <i>/Users/denis/stoneship/content/about.md</i> will have the legacy identifier <i>/about/</i>. This corresponds closely with paths in clean URLs.</dd></dl>
6
- <p>The following methods are useful for full identifiers:</p>
7
- <dl><dt><code>identifier.without_ext</code> → <i>String</i></dt><dd>identifier with the last extension removed</dd>
8
- <dt><code>identifier.without_exts</code> → <i>String</i></dt><dd>identifier with all extensions removed</dd>
9
- <dt><code donkey="true">identifier.ext</code> → <i>String</i></dt><dd>the last extension of this identifier</dd>
10
- <dt><code>identifier.exts</code> → <i>String</i></dt><dd>all extensions of this identifier</dd>
11
- <dt><code>identifier + string</code> → <i>String</i></dt><dd>identifier with the given string appended</dd></dl>
12
- <p>Here are some &lt; examples:</p>
13
- <pre>identifier = Nanoc::Identifier.new('/about.md')
14
-
15
- identifier.without_ext
16
- # => "/about"
17
-
18
- identifier.ext
19
- # => "md"
20
- </pre>
21
- <p>The following method is useful for legacy identifiers:</p>
22
- <dl><dt><code>identifier.chop</code> → <i>String</i></dt><dd>identifier with the last character removed</dd></dl>
23
- <p>Here are some examples:</p>
24
- <pre>identifier = Nanoc::Identifier.new('/about/', type: :legacy)
25
-
26
- identifier.chop
27
- # => "/about"
28
-
29
- identifier.chop + '.html'
30
- # => "/about.html"
31
-
32
- identifier + 'index.html'
33
- # => "/about/index.html"
34
- </pre>
35
- <h2>Patterns</h2>
36
- <p>Patterns are used to find items and layouts based on their identifier. They come in three varieties:</p>
37
- <ul><li>glob patterns</li><li>regular expression patterns</li><li>legacy patterns</li></ul>
38
- <h3>Glob patterns</h3>
39
- <p>Glob patterns are strings that contain wildcard characters. Wildcard characters are characters that can be substituted for other characters in a identifier. An example of a glob pattern is <i>/projects/*.md</i>, which matches all files with a <i>md</i> extension in the <i>/projects</i> directory.</p>
40
- <p>Globs are commonplace in Unix-like environments. For example, the Unix command for listing all files with the <i>md</i> extension in the current directory is <code>ls *.md</code>. In this example, the argument to the <code>ls</code> command is a wildcard.</p>
41
- <p>Nanoc supports the following wildcards in glob patterns:</p>
42
- <dl><dt><code>*</code></dt><dd>Matches any file or directory name. Does not cross directory boundaries. For example, <i>/projects/*.md</i> matches <i>/projects/nanoc.md</i>, but not <i>/projects/cri.adoc</i> nor <i>/projects/nanoc/about.md</i>.</dd>
43
- <dt><code>**/</code></dt><dd>Matches zero or more levels of nested directories. For example, <i>/projects/**/*.md</i> matches both <i>/projects/nanoc.md</i> and <i>/projects/nanoc/history.md</i>.</dd>
44
- <dt><code>?</code></dt><dd>Matches a single character.</dd>
45
- <dt><code>[abc]</code></dt><dd>Matches any single character in the set. For example, <i>/people/[kt]im.md</i> matches only <i>/people/kim.md</i> and <i>/people/tim.md</i>.</dd>
46
- <dt><code>{foo,bar}</code></dt><dd>Matches either string in the comma-separated list. More than two strings are possible. For example, <i>/c{at,ub,ount}s.txt</i> matches <i>/cats.txt</i>, <i>/cubs.txt</i> and <i>/counts.txt</i>, but not <i>/cabs.txt</i>.</dd></dl>
47
- <p>A glob pattern that matches every item is <i>/**/*</i>. A glob pattern that matches every item/layout with the extension <i>md</i> is <i>/**/*.md</i>.</p>
48
- <h3>Regular expression patterns</h3>
49
- <p>You can use a regular expression to select items and layouts.</p>
50
- <p>For matching identifiers, the <code>%r{…}</code> syntax is (arguably) nicer than the <code>/…/</code> syntax. The latter is not a good fit for identifiers (or filenames), because all slashes need to be escaped. The <code>\A</code> and <code>\z</code> anchors are also useful to make sure the entire identifier is matched.</p>
51
- <p>An example of a regular expression pattern is <code>%r{\A/projects/(cri|nanoc)\.md\z}</code>, which matches both <i>/projects/nanoc.md</i> and <i>/projects/cri.md</i>.</p>
52
- <h3>Legacy patterns</h3>
53
- <p>Legacy patterns are strings that contain wildcard characters. The wildcard characters behave differently than the glob wildcard characters.</p>
54
- <p>To enable legacy patterns, set <code>string_pattern_type</code> to <code>"legacy"</code> in the configuration. For example:</p>
55
- <pre>string_pattern_type: "legacy"
56
- </pre>
57
- <p>For legacy patterns, Nanoc supports the following wildcards:</p>
58
- <dl><dt><code>*</code></dt><dd>Matches zero or more characters, including a slash. For example, <i>/projects/*/</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd>
59
- <dt><code>+</code></dt><dd>Matches one or more characters, including a slash. For example, <i>/projects/+</i> matches <i>/projects/nanoc/</i> and <i>/projects/nanoc/about/</i>, but not <i>/projects/</i>.</dd></dl>