syntax_tree-xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +6 -0
- data/.github/workflows/main.yml +34 -0
- data/.gitignore +10 -0
- data/CHANGELOG.md +16 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +36 -0
- data/LICENSE +21 -0
- data/README.md +76 -0
- data/Rakefile +16 -0
- data/lib/syntax_tree/xml/format.rb +238 -0
- data/lib/syntax_tree/xml/nodes.rb +413 -0
- data/lib/syntax_tree/xml/parser.rb +384 -0
- data/lib/syntax_tree/xml/pretty_print.rb +88 -0
- data/lib/syntax_tree/xml/version.rb +7 -0
- data/lib/syntax_tree/xml/visitor.rb +61 -0
- data/lib/syntax_tree/xml.rb +29 -0
- data/syntax_tree-xml.gemspec +33 -0
- metadata +145 -0
@@ -0,0 +1,413 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxTree
|
4
|
+
module XML
|
5
|
+
# A Location represents a position for a node in the source file.
|
6
|
+
class Location
|
7
|
+
attr_reader :start_char, :end_char, :start_line, :end_line
|
8
|
+
|
9
|
+
def initialize(start_char:, end_char:, start_line:, end_line:)
|
10
|
+
@start_char = start_char
|
11
|
+
@end_char = end_char
|
12
|
+
@start_line = start_line
|
13
|
+
@end_line = end_line
|
14
|
+
end
|
15
|
+
|
16
|
+
def deconstruct_keys(keys)
|
17
|
+
{
|
18
|
+
start_char: start_char,
|
19
|
+
end_char: end_char,
|
20
|
+
start_line: start_line,
|
21
|
+
end_line: end_line
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
def to(other)
|
26
|
+
Location.new(
|
27
|
+
start_char: start_char,
|
28
|
+
start_line: start_line,
|
29
|
+
end_char: other.end_char,
|
30
|
+
end_line: other.end_line
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def <=>(other)
|
35
|
+
start_char <=> other.start_char
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# A parent node that contains a bit of shared functionality.
|
40
|
+
class Node
|
41
|
+
def format(q)
|
42
|
+
Format.new(q).visit(self)
|
43
|
+
end
|
44
|
+
|
45
|
+
def pretty_print(q)
|
46
|
+
PrettyPrint.new(q).visit(self)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# A Token is any kind of lexical token from the source. It has a type, a
|
51
|
+
# value which is a subset of the source, and an index where it starts in
|
52
|
+
# the source.
|
53
|
+
class Token < Node
|
54
|
+
attr_reader :type, :value, :location
|
55
|
+
|
56
|
+
def initialize(type:, value:, location:)
|
57
|
+
@type = type
|
58
|
+
@value = value
|
59
|
+
@location = location
|
60
|
+
end
|
61
|
+
|
62
|
+
def accept(visitor)
|
63
|
+
visitor.visit_token(self)
|
64
|
+
end
|
65
|
+
|
66
|
+
def child_nodes
|
67
|
+
[]
|
68
|
+
end
|
69
|
+
|
70
|
+
alias deconstruct child_nodes
|
71
|
+
|
72
|
+
def deconstruct_keys(keys)
|
73
|
+
{ type: type, value: value, location: location }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# The Document node is the top of the syntax tree. It contains an optional
|
78
|
+
# prolog, an optional doctype declaration, any number of optional
|
79
|
+
# miscellenous elements like comments, whitespace, or processing
|
80
|
+
# instructions, and a root element.
|
81
|
+
class Document < Node
|
82
|
+
attr_reader :prolog, :miscs, :doctype, :element, :location
|
83
|
+
|
84
|
+
def initialize(prolog:, miscs:, doctype:, element:, location:)
|
85
|
+
@prolog = prolog
|
86
|
+
@miscs = miscs
|
87
|
+
@doctype = doctype
|
88
|
+
@element = element
|
89
|
+
@location = location
|
90
|
+
end
|
91
|
+
|
92
|
+
def accept(visitor)
|
93
|
+
visitor.visit_document(self)
|
94
|
+
end
|
95
|
+
|
96
|
+
def child_nodes
|
97
|
+
[prolog, *miscs, doctype, element].compact
|
98
|
+
end
|
99
|
+
|
100
|
+
alias deconstruct child_nodes
|
101
|
+
|
102
|
+
def deconstruct_keys(keys)
|
103
|
+
{
|
104
|
+
prolog: prolog,
|
105
|
+
miscs: miscs,
|
106
|
+
doctype: doctype,
|
107
|
+
element: element,
|
108
|
+
location: location
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# The prolog to the document includes an XML declaration which opens the
|
114
|
+
# tag, any number of attributes, and a closing of the tag.
|
115
|
+
class Prolog < Node
|
116
|
+
attr_reader :opening, :attributes, :closing, :location
|
117
|
+
|
118
|
+
def initialize(opening:, attributes:, closing:, location:)
|
119
|
+
@opening = opening
|
120
|
+
@attributes = attributes
|
121
|
+
@closing = closing
|
122
|
+
@location = location
|
123
|
+
end
|
124
|
+
|
125
|
+
def accept(visitor)
|
126
|
+
visitor.visit_prolog(self)
|
127
|
+
end
|
128
|
+
|
129
|
+
def child_nodes
|
130
|
+
[opening, *attributes, closing]
|
131
|
+
end
|
132
|
+
|
133
|
+
alias deconstruct child_nodes
|
134
|
+
|
135
|
+
def deconstruct_keys(keys)
|
136
|
+
{
|
137
|
+
opening: opening,
|
138
|
+
attributes: attributes,
|
139
|
+
closing: closing,
|
140
|
+
location: location
|
141
|
+
}
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# A document type declaration is a special kind of tag that specifies the
|
146
|
+
# type of the document. It contains an opening declaration, the name of
|
147
|
+
# the document type, an optional external identifier, and a closing of the
|
148
|
+
# tag.
|
149
|
+
class DocType < Node
|
150
|
+
attr_reader :opening, :name, :external_id, :closing, :location
|
151
|
+
|
152
|
+
def initialize(opening:, name:, external_id:, closing:, location:)
|
153
|
+
@opening = opening
|
154
|
+
@name = name
|
155
|
+
@external_id = external_id
|
156
|
+
@closing = closing
|
157
|
+
@location = location
|
158
|
+
end
|
159
|
+
|
160
|
+
def accept(visitor)
|
161
|
+
visitor.visit_doctype(self)
|
162
|
+
end
|
163
|
+
|
164
|
+
def child_nodes
|
165
|
+
[opening, name, external_id, closing].compact
|
166
|
+
end
|
167
|
+
|
168
|
+
alias deconstruct child_nodes
|
169
|
+
|
170
|
+
def deconstruct_keys(keys)
|
171
|
+
{
|
172
|
+
opening: opening,
|
173
|
+
name: name,
|
174
|
+
external_id: external_id,
|
175
|
+
closing: closing,
|
176
|
+
location: location
|
177
|
+
}
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# An external ID is a child of a document type declaration. It represents
|
182
|
+
# the location where the external identifier is located. It contains a
|
183
|
+
# type (either system or public), an optional public id literal, and the
|
184
|
+
# system literal.
|
185
|
+
class ExternalID < Node
|
186
|
+
attr_reader :type, :public_id, :system_id, :location
|
187
|
+
|
188
|
+
def initialize(type:, public_id:, system_id:, location:)
|
189
|
+
@type = type
|
190
|
+
@public_id = public_id
|
191
|
+
@system_id = system_id
|
192
|
+
end
|
193
|
+
|
194
|
+
def accept(visitor)
|
195
|
+
visitor.visit_external_id(self)
|
196
|
+
end
|
197
|
+
|
198
|
+
def child_nodes
|
199
|
+
[type, public_id, system_id].compact
|
200
|
+
end
|
201
|
+
|
202
|
+
alias deconstruct child_nodes
|
203
|
+
|
204
|
+
def deconstruct_keys(keys)
|
205
|
+
{
|
206
|
+
type: type,
|
207
|
+
public_id: public_id,
|
208
|
+
system_id: system_id,
|
209
|
+
location: location
|
210
|
+
}
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# An element is a child of the document. It contains an opening tag, any
|
215
|
+
# optional content within the tag, and a closing tag. It can also
|
216
|
+
# potentially contain an opening tag that self-closes, in which case the
|
217
|
+
# content and closing tag will be nil.
|
218
|
+
class Element < Node
|
219
|
+
# The opening tag of an element. It contains the opening character (<),
|
220
|
+
# the name of the element, any optional attributes, and the closing
|
221
|
+
# token (either > or />).
|
222
|
+
class OpeningTag < Node
|
223
|
+
attr_reader :opening, :name, :attributes, :closing, :location
|
224
|
+
|
225
|
+
def initialize(opening:, name:, attributes:, closing:, location:)
|
226
|
+
@opening = opening
|
227
|
+
@name = name
|
228
|
+
@attributes = attributes
|
229
|
+
@closing = closing
|
230
|
+
@location = location
|
231
|
+
end
|
232
|
+
|
233
|
+
def accept(visitor)
|
234
|
+
visitor.visit_opening_tag(self)
|
235
|
+
end
|
236
|
+
|
237
|
+
def child_nodes
|
238
|
+
[opening, name, *attributes, closing]
|
239
|
+
end
|
240
|
+
|
241
|
+
alias deconstruct child_nodes
|
242
|
+
|
243
|
+
def deconstruct_keys(keys)
|
244
|
+
{
|
245
|
+
opening: opening,
|
246
|
+
name: name,
|
247
|
+
attributes: attributes,
|
248
|
+
closing: closing,
|
249
|
+
location: location
|
250
|
+
}
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
# The closing tag of an element. It contains the opening character (<),
|
255
|
+
# the name of the element, and the closing character (>).
|
256
|
+
class ClosingTag < Node
|
257
|
+
attr_reader :opening, :name, :closing, :location
|
258
|
+
|
259
|
+
def initialize(opening:, name:, closing:, location:)
|
260
|
+
@opening = opening
|
261
|
+
@name = name
|
262
|
+
@closing = closing
|
263
|
+
@location = location
|
264
|
+
end
|
265
|
+
|
266
|
+
def accept(visitor)
|
267
|
+
visitor.visit_closing_tag(self)
|
268
|
+
end
|
269
|
+
|
270
|
+
def child_nodes
|
271
|
+
[opening, name, closing]
|
272
|
+
end
|
273
|
+
|
274
|
+
alias deconstruct child_nodes
|
275
|
+
|
276
|
+
def deconstruct_keys(keys)
|
277
|
+
{ opening: opening, name: name, closing: closing, location: location }
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
attr_reader :opening_tag, :content, :closing_tag, :location
|
282
|
+
|
283
|
+
def initialize(opening_tag:, content:, closing_tag:, location:)
|
284
|
+
@opening_tag = opening_tag
|
285
|
+
@content = content
|
286
|
+
@closing_tag = closing_tag
|
287
|
+
@location = location
|
288
|
+
end
|
289
|
+
|
290
|
+
def accept(visitor)
|
291
|
+
visitor.visit_element(self)
|
292
|
+
end
|
293
|
+
|
294
|
+
def child_nodes
|
295
|
+
[opening_tag, *content, closing_tag].compact
|
296
|
+
end
|
297
|
+
|
298
|
+
alias deconstruct child_nodes
|
299
|
+
|
300
|
+
def deconstruct_keys(keys)
|
301
|
+
{
|
302
|
+
opening_tag: opening_tag,
|
303
|
+
content: content,
|
304
|
+
closing_tag: closing_tag,
|
305
|
+
location: location
|
306
|
+
}
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# A Reference is either a character or entity reference. It contains a
|
311
|
+
# single value that is the token it contains.
|
312
|
+
class Reference < Node
|
313
|
+
attr_reader :value, :location
|
314
|
+
|
315
|
+
def initialize(value:, location:)
|
316
|
+
@value = value
|
317
|
+
@location = location
|
318
|
+
end
|
319
|
+
|
320
|
+
def accept(visitor)
|
321
|
+
visitor.visit_reference(self)
|
322
|
+
end
|
323
|
+
|
324
|
+
def child_nodes
|
325
|
+
[value]
|
326
|
+
end
|
327
|
+
|
328
|
+
alias deconstruct child_nodes
|
329
|
+
|
330
|
+
def deconstruct_keys(keys)
|
331
|
+
{ value: value, location: location }
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
# An Attribute is a key-value pair within a tag. It contains the key, the
|
336
|
+
# equals sign, and the value.
|
337
|
+
class Attribute < Node
|
338
|
+
attr_reader :key, :equals, :value, :location
|
339
|
+
|
340
|
+
def initialize(key:, equals:, value:, location:)
|
341
|
+
@key = key
|
342
|
+
@equals = equals
|
343
|
+
@value = value
|
344
|
+
@location = location
|
345
|
+
end
|
346
|
+
|
347
|
+
def accept(visitor)
|
348
|
+
visitor.visit_attribute(self)
|
349
|
+
end
|
350
|
+
|
351
|
+
def child_nodes
|
352
|
+
[key, equals, value]
|
353
|
+
end
|
354
|
+
|
355
|
+
alias deconstruct child_nodes
|
356
|
+
|
357
|
+
def deconstruct_keys(keys)
|
358
|
+
{ key: key, equals: equals, value: value, location: location }
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
# A CharData contains either plain text or whitespace within an element.
|
363
|
+
# It wraps a single token value.
|
364
|
+
class CharData < Node
|
365
|
+
attr_reader :value, :location
|
366
|
+
|
367
|
+
def initialize(value:, location:)
|
368
|
+
@value = value
|
369
|
+
@location = location
|
370
|
+
end
|
371
|
+
|
372
|
+
def accept(visitor)
|
373
|
+
visitor.visit_char_data(self)
|
374
|
+
end
|
375
|
+
|
376
|
+
def child_nodes
|
377
|
+
[value]
|
378
|
+
end
|
379
|
+
|
380
|
+
alias deconstruct child_nodes
|
381
|
+
|
382
|
+
def deconstruct_keys(keys)
|
383
|
+
{ value: value, location: location }
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
# A Misc is a catch-all for miscellaneous content outside the root tag of
|
388
|
+
# the XML document. It contains a single token which can be either a
|
389
|
+
# comment, a processing instruction, or whitespace.
|
390
|
+
class Misc < Node
|
391
|
+
attr_reader :value, :location
|
392
|
+
|
393
|
+
def initialize(value:, location:)
|
394
|
+
@value = value
|
395
|
+
@location = location
|
396
|
+
end
|
397
|
+
|
398
|
+
def accept(visitor)
|
399
|
+
visitor.visit_misc(self)
|
400
|
+
end
|
401
|
+
|
402
|
+
def child_nodes
|
403
|
+
[value]
|
404
|
+
end
|
405
|
+
|
406
|
+
alias deconstruct child_nodes
|
407
|
+
|
408
|
+
def deconstruct_keys(keys)
|
409
|
+
{ value: value, location: location }
|
410
|
+
end
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|