zenml 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 607af5731fb778c2c3de36cfcb3cb4a3c67d203b6b41ed020d4e5ac24e74451a
4
- data.tar.gz: 21905106dd5237550faff1df06e4d61632c967aa3bef8b12e012fe72cf0d7f97
3
+ metadata.gz: e1d728cf53633e86d66698254bfd83f184360263c6c45d1c6713cd6879a1cf39
4
+ data.tar.gz: d7b9b1aa8c36692b4b7a81e286564c3a3f0b1d48fa1b71876f25d8b5028c0bb2
5
5
  SHA512:
6
- metadata.gz: 75d4e5de085ed17a5870133bfc56217107c3afa40997b1b1e66511170f02d2d1f1effc93cfd5fed06df275698ad4ab35b9a4e09cb038a1d7c88b5517e3d5b40f
7
- data.tar.gz: ae779f165f87b50fda1bc73a815d3319edf5fce618ed41e72a57991615d46a55fdacdcc7ebd6b447566720d78bea09946ca04281837f4e10d4b11587843e475d
6
+ metadata.gz: 3f11c1f41c8dc6b5734e6200ab8d9204b9abe21156bed9af52e5998cd01f215a24083285073adb41dd94ebc75c86292a5ad632eb2359ff3b9d0e46a6c39b3c20
7
+ data.tar.gz: faa1cae292f85d8fc75aa9724c73205a98ea87f07f05f5aabba91636a5bd9b90b958ae55c3f1b6f9b5861eb3a6b400017c2e0f9a782f4565c77c6ac8921391bc
data/source/zenml.rb CHANGED
@@ -6,10 +6,11 @@ require 'zenml/utility'
6
6
 
7
7
  module Zenithal
8
8
 
9
- VERSION = "1.0.0"
9
+ VERSION = "1.1.0"
10
10
 
11
11
  require 'zenml/error'
12
12
  require 'zenml/reader'
13
+ require 'zenml/parser_utility'
13
14
  require 'zenml/parser'
14
15
  require 'zenml/converter'
15
16
 
@@ -3,9 +3,8 @@
3
3
 
4
4
  class ZenithalParseError < StandardError
5
5
 
6
- def initialize(reader, message = "")
7
- whole_message = "[line #{reader.lineno}] #{message}"
8
- super(whole_message)
6
+ def initialize(message = "")
7
+ super(message)
9
8
  end
10
9
 
11
10
  end
@@ -0,0 +1,550 @@
1
+ # coding: utf-8
2
+
3
+
4
+ require 'pp'
5
+ require 'rexml/document'
6
+ include REXML
7
+
8
+
9
+ class ZenithalOldParser
10
+
11
+ TAG_START = "\\"
12
+ MACRO_START = "&"
13
+ ESCAPE_START = "`"
14
+ ATTRIBUTE_START = "|"
15
+ ATTRIBUTE_END = "|"
16
+ ATTRIBUTE_EQUAL = "="
17
+ ATTRIBUTE_VALUE_START = "\""
18
+ ATTRIBUTE_VALUE_END = "\""
19
+ ATTRIBUTE_SEPARATOR = ","
20
+ CONTENT_START = "<"
21
+ CONTENT_END = ">"
22
+ BRACE_START = "{"
23
+ BRACE_END = "}"
24
+ BRACKET_START = "["
25
+ BRACKET_END = "]"
26
+ SLASH_START = "/"
27
+ SLASH_END = "/"
28
+ COMMENT_DELIMITER = "#"
29
+ INSTRUCTION_MARK = "?"
30
+ TRIM_MARK = "*"
31
+ VERBAL_MARK = "~"
32
+ MULTIPLE_MARK = "+"
33
+ SYSTEM_INSTRUCTION_NAME = "zml"
34
+ ENTITIES = {"amp" => "&", "lt" => "<", "gt" => ">", "apos" => "'", "quot" => "\"",
35
+ "lcub" => "{", "rcub" => "}", "lbrace" => "{", "rbrace" => "}", "lsqb" => "[", "rsqb" => "]", "lbrack" => "[", "rbrack" => "]",
36
+ "sol" => "/", "bsol" => "\\", "verbar" => "|", "vert" => "|", "grave" => "`", "num" => "#"}
37
+ ESCAPES = ["&", "<", ">", "'", "\"", "{", "}", "[", "]", "/", "\\", "|", "`", "#"]
38
+ VALID_START_CHARS = [0x3A, 0x41..0x5A, 0x5F, 0x61..0x7A, 0xC0..0xD6, 0xD8..0xF6, 0xF8..0x2FF, 0x370..0x37D, 0x37F..0x1FFF, 0x200C..0x200D,
39
+ 0x2070..0x218F, 0x2C00..0x2FEF, 0x3001..0xD7FF, 0xF900..0xFDCF, 0xFDF0..0xFFFD, 0x10000..0xEFFFF]
40
+ VALID_MIDDLE_CHARS = [0x2D, 0x2E, 0x30..0x39, 0xB7, 0x0300..0x036F, 0x203F..0x2040]
41
+
42
+ attr_writer :brace_name
43
+ attr_writer :bracket_name
44
+ attr_writer :slash_name
45
+
46
+ def initialize(source)
47
+ @source = StringReader.new(source)
48
+ @version = nil
49
+ @brace_name = nil
50
+ @bracket_name = nil
51
+ @slash_name = nil
52
+ @macros = {}
53
+ end
54
+
55
+ def parse
56
+ document = Document.new
57
+ children = parse_nodes
58
+ children.each do |child|
59
+ document.add(child)
60
+ end
61
+ return document
62
+ end
63
+
64
+ def parse_nodes(option = {}, in_slash = false)
65
+ children = []
66
+ while char = @source.read
67
+ if char == TAG_START || char == MACRO_START
68
+ @source.unread
69
+ children.concat(parse_element)
70
+ elsif @brace_name && char == BRACE_START
71
+ @source.unread
72
+ children << parse_brace
73
+ elsif @bracket_name && char == BRACKET_START
74
+ @source.unread
75
+ children << parse_bracket
76
+ elsif @slash_name && !in_slash && char == SLASH_START
77
+ @source.unread
78
+ children << parse_slash
79
+ elsif char == COMMENT_DELIMITER
80
+ @source.unread
81
+ children << parse_comment
82
+ elsif char == CONTENT_END || (@brace_name && char == BRACE_END) || (@bracket_name && char == BRACKET_END) || (@slash_name && char == SLASH_END)
83
+ @source.unread
84
+ break
85
+ else
86
+ @source.unread
87
+ children << parse_text(option)
88
+ end
89
+ end
90
+ return children
91
+ end
92
+
93
+ def parse_verbal_nodes(option = {})
94
+ children = []
95
+ while char = @source.read
96
+ next_char = @source.peek
97
+ if char == CONTENT_END
98
+ @source.unread
99
+ break
100
+ else
101
+ @source.unread
102
+ children << parse_verbal_text(option)
103
+ end
104
+ end
105
+ return children
106
+ end
107
+
108
+ def parse_element
109
+ first_char = @source.read
110
+ unless first_char == TAG_START || first_char == MACRO_START
111
+ raise ZenithalParseError.new(@source)
112
+ end
113
+ name, option = parse_element_name
114
+ if first_char == MACRO_START
115
+ option[:macro] = true
116
+ end
117
+ attributes = parse_attributes
118
+ children_list = parse_children_list(option)
119
+ nodes = create_nodes(name, attributes, children_list, option)
120
+ return nodes
121
+ end
122
+
123
+ def parse_element_name
124
+ name, marks, option = "", [], {}
125
+ while char = @source.read
126
+ if char == ATTRIBUTE_START || char == CONTENT_START || char == CONTENT_END || char =~ /\s/
127
+ @source.unread
128
+ break
129
+ elsif char == INSTRUCTION_MARK || char == TRIM_MARK || char == VERBAL_MARK || char == MULTIPLE_MARK
130
+ marks << char
131
+ elsif name.empty? && marks.empty? && ZenithalOldParser.valid_start_char?(char)
132
+ name << char
133
+ elsif !name.empty? && marks.empty? && ZenithalOldParser.valid_char?(char)
134
+ name << char
135
+ else
136
+ raise ZenithalParseError.new(@source)
137
+ end
138
+ end
139
+ skip_spaces
140
+ if marks.include?(INSTRUCTION_MARK)
141
+ option[:instruction] = true
142
+ end
143
+ if marks.include?(TRIM_MARK)
144
+ option[:trim_indents] = true
145
+ end
146
+ if marks.include?(VERBAL_MARK)
147
+ option[:verbal] = true
148
+ end
149
+ if marks.include?(MULTIPLE_MARK)
150
+ option[:multiple] = true
151
+ end
152
+ return name, option
153
+ end
154
+
155
+ def parse_attributes
156
+ attributes = {}
157
+ if @source.read == ATTRIBUTE_START
158
+ current_key = nil
159
+ skip_spaces
160
+ loop do
161
+ key, value = parse_attribute
162
+ attributes[key] = value
163
+ char = @source.read
164
+ if char == ATTRIBUTE_SEPARATOR
165
+ skip_spaces
166
+ elsif char == ATTRIBUTE_END
167
+ @source.unread
168
+ break
169
+ else
170
+ raise ZenithalParseError.new(@source)
171
+ end
172
+ end
173
+ unless @source.read == ATTRIBUTE_END
174
+ raise ZenithalParseError.new(@source)
175
+ end
176
+ else
177
+ @source.unread
178
+ end
179
+ return attributes
180
+ end
181
+
182
+ def parse_attribute
183
+ key = parse_attribute_key
184
+ skip_spaces
185
+ if @source.read == ATTRIBUTE_EQUAL
186
+ skip_spaces
187
+ value = parse_attribute_value
188
+ else
189
+ @source.unread
190
+ value = key
191
+ end
192
+ skip_spaces
193
+ return key, value
194
+ end
195
+
196
+ def parse_attribute_key
197
+ key = ""
198
+ while char = @source.read
199
+ if char == ATTRIBUTE_EQUAL || char == ATTRIBUTE_END || char =~ /\s/
200
+ @source.unread
201
+ break
202
+ elsif key.empty? && ZenithalOldParser.valid_start_char?(char)
203
+ key << char
204
+ elsif !key.empty? && ZenithalOldParser.valid_char?(char)
205
+ key << char
206
+ else
207
+ raise ZenithalParseError.new(@source)
208
+ end
209
+ end
210
+ return key
211
+ end
212
+
213
+ def parse_attribute_value
214
+ unless @source.read == ATTRIBUTE_VALUE_START
215
+ raise ZenithalParseError.new(@source)
216
+ end
217
+ value = ""
218
+ while char = @source.read
219
+ next_char = @source.peek
220
+ if char == ATTRIBUTE_VALUE_END
221
+ break
222
+ elsif char == ESCAPE_START && ESCAPES.include?(next_char)
223
+ @source.unread
224
+ value << parse_escape_string
225
+ else
226
+ value << char
227
+ end
228
+ end
229
+ return value
230
+ end
231
+
232
+ def parse_children_list(option = {})
233
+ children_list = []
234
+ first_char = @source.read
235
+ if first_char == CONTENT_START
236
+ loop do
237
+ children = []
238
+ if option[:verbal] || option[:instruction]
239
+ children = parse_verbal_nodes(option)
240
+ else
241
+ children = parse_nodes(option)
242
+ end
243
+ if option[:trim_indents]
244
+ trim_indents(children)
245
+ end
246
+ children_list << children
247
+ unless @source.read == CONTENT_END
248
+ raise ZenithalParseError.new(@source)
249
+ end
250
+ space_count = skip_spaces
251
+ unless @source.read == CONTENT_START
252
+ @source.unread(space_count + 1)
253
+ break
254
+ end
255
+ end
256
+ elsif first_char == CONTENT_END
257
+ children_list << []
258
+ else
259
+ raise ZenithalParseError.new(@source)
260
+ end
261
+ return children_list
262
+ end
263
+
264
+ def create_nodes(name, attributes, children_list, option = {})
265
+ nodes = []
266
+ unless option[:macro]
267
+ if option[:instruction]
268
+ unless children_list.size <= 1
269
+ raise ZenithalParseError.new(@source)
270
+ end
271
+ nodes = create_instructions(name, attributes, children_list.first)
272
+ if name == SYSTEM_INSTRUCTION_NAME
273
+ skip_spaces
274
+ end
275
+ else
276
+ unless option[:multiple] || children_list.size <= 1
277
+ raise ZenithalParseError.new(@source)
278
+ end
279
+ nodes = create_elements(name, attributes, children_list)
280
+ end
281
+ else
282
+ nodes = process_macro(name, attributes, children_list)
283
+ end
284
+ return nodes
285
+ end
286
+
287
+ def create_elements(name, attributes, children_list)
288
+ elements = []
289
+ children_list.each do |children|
290
+ element = Element.new(name)
291
+ attributes.each do |key, value|
292
+ element.add_attribute(key, value)
293
+ end
294
+ children.each do |child|
295
+ element.add(child)
296
+ end
297
+ elements << element
298
+ end
299
+ return elements
300
+ end
301
+
302
+ def create_instructions(target, attributes, children)
303
+ instructions = []
304
+ if target == SYSTEM_INSTRUCTION_NAME
305
+ @version = attributes["version"] if attributes["version"]
306
+ @brace_name = attributes["brace"] if attributes["brace"]
307
+ @bracket_name = attributes["bracket"] if attributes["bracket"]
308
+ @slash_name = attributes["slash"] if attributes["slash"]
309
+ elsif target == "xml"
310
+ instruction = XMLDecl.new
311
+ instruction.version = attributes["version"] || XMLDecl::DEFAULT_VERSION
312
+ instruction.encoding = attributes["encoding"]
313
+ instruction.standalone = attributes["standalone"]
314
+ instructions << instruction
315
+ else
316
+ instruction = Instruction.new(target)
317
+ actual_contents = []
318
+ attributes.each do |key, value|
319
+ actual_contents << "#{key}=\"#{value}\""
320
+ end
321
+ if children.first && !children.first.empty?
322
+ actual_contents << children.first
323
+ end
324
+ instruction.content = actual_contents.join(" ")
325
+ instructions << instruction
326
+ end
327
+ return instructions
328
+ end
329
+
330
+ def process_macro(name, attributes, children_list)
331
+ elements = []
332
+ if @macros.key?(name)
333
+ elements = @macros[name].call(attributes, children_list)
334
+ elsif ENTITIES.key?(name)
335
+ text = Text.new(ENTITIES[name], true, nil, false)
336
+ elements << text
337
+ else
338
+ raise ZenithalParseError.new(@source)
339
+ end
340
+ return elements
341
+ end
342
+
343
+ def register_macro(name, &block)
344
+ @macros.store(name, block)
345
+ end
346
+
347
+ def parse_brace
348
+ unless @source.read == BRACE_START
349
+ raise ZenithalParseError.new(@source)
350
+ end
351
+ children = parse_nodes
352
+ unless @source.read == BRACE_END
353
+ raise ZenithalParseError.new(@source)
354
+ end
355
+ element = Element.new(@brace_name)
356
+ children.each do |child|
357
+ element.add(child)
358
+ end
359
+ return element
360
+ end
361
+
362
+ def parse_bracket
363
+ unless @source.read == BRACKET_START
364
+ raise ZenithalParseError.new(@source)
365
+ end
366
+ children = parse_nodes
367
+ unless @source.read == BRACKET_END
368
+ raise ZenithalParseError.new(@source)
369
+ end
370
+ element = Element.new(@bracket_name)
371
+ children.each do |child|
372
+ element.add(child)
373
+ end
374
+ return element
375
+ end
376
+
377
+ def parse_slash
378
+ unless @source.read == SLASH_START
379
+ raise ZenithalParseError.new(@source)
380
+ end
381
+ children = parse_nodes({}, true)
382
+ unless @source.read == SLASH_END
383
+ raise ZenithalParseError.new(@source)
384
+ end
385
+ element = Element.new(@slash_name)
386
+ children.each do |child|
387
+ element.add(child)
388
+ end
389
+ return element
390
+ end
391
+
392
+ def parse_comment
393
+ unless @source.read == COMMENT_DELIMITER
394
+ raise ZenithalParseError.new(@source)
395
+ end
396
+ char = @source.read
397
+ string = ""
398
+ if char == COMMENT_DELIMITER
399
+ while char = @source.read
400
+ if char == "\n"
401
+ @source.unread
402
+ break
403
+ else
404
+ string << char
405
+ end
406
+ end
407
+ elsif char == CONTENT_START
408
+ while char = @source.read
409
+ if char == CONTENT_END
410
+ next_char = @source.read
411
+ if next_char == COMMENT_DELIMITER
412
+ break
413
+ else
414
+ string << char
415
+ @source.unread
416
+ end
417
+ else
418
+ string << char
419
+ end
420
+ end
421
+ else
422
+ raise ZenithalParseError.new(@source)
423
+ end
424
+ comment = Comment.new(" #{string.strip} ")
425
+ return comment
426
+ end
427
+
428
+ def parse_text(option = {})
429
+ string = ""
430
+ while char = @source.read
431
+ next_char = @source.peek
432
+ if char == TAG_START || char == MACRO_START
433
+ @source.unread
434
+ break
435
+ elsif (@brace_name && char == BRACE_START) || (@bracket_name && char == BRACKET_START) || (@slash_name && char == SLASH_START)
436
+ @source.unread
437
+ break
438
+ elsif char == CONTENT_END
439
+ @source.unread
440
+ break
441
+ elsif (@brace_name && char == BRACE_END) || (@bracket_name && char == BRACKET_END) || (@slash_name && char == SLASH_END)
442
+ @source.unread
443
+ break
444
+ elsif char == COMMENT_DELIMITER
445
+ @source.unread
446
+ break
447
+ elsif char == ESCAPE_START && ESCAPES.include?(next_char)
448
+ @source.unread
449
+ string << parse_escape_string
450
+ else
451
+ string << char
452
+ end
453
+ end
454
+ text = Text.new(string, true, nil, false)
455
+ return text
456
+ end
457
+
458
+ def parse_verbal_text(option = {})
459
+ string = ""
460
+ while char = @source.read
461
+ next_char = @source.peek
462
+ if char == CONTENT_END
463
+ @source.unread
464
+ break
465
+ elsif char == ESCAPE_START && ESCAPES.include?(next_char)
466
+ @source.unread
467
+ string << parse_escape_string
468
+ else
469
+ string << char
470
+ end
471
+ end
472
+ text = Text.new(string, true, nil, false)
473
+ return text
474
+ end
475
+
476
+ def parse_escape_string
477
+ unless @source.read == ESCAPE_START
478
+ raise ZenithalParseError.new(@source)
479
+ end
480
+ char = @source.read
481
+ return char
482
+ end
483
+
484
+ def skip_spaces
485
+ count = 0
486
+ while @source.read =~ /\s/
487
+ count += 1
488
+ end
489
+ @source.unread
490
+ return count
491
+ end
492
+
493
+ def trim_spaces(children)
494
+ if children.first.is_a?(Text)
495
+ children.first.value = children.first.value.lstrip
496
+ end
497
+ if children.last.is_a?(Text)
498
+ children.last.value = children.last.value.rstrip
499
+ end
500
+ end
501
+
502
+ def trim_indents(children)
503
+ texts = []
504
+ if children.last.is_a?(Text)
505
+ children.last.value = children.last.value.rstrip
506
+ end
507
+ children.each do |child|
508
+ case child
509
+ when Text
510
+ texts << child
511
+ when Parent
512
+ texts.concat(ZenithalOldParser.get_all_texts(child))
513
+ end
514
+ end
515
+ indent_length = 10000
516
+ texts.each do |text|
517
+ text.value.scan(/\n(\x20+)/) do |match|
518
+ indent_length = [match[0].length, indent_length].min
519
+ end
520
+ end
521
+ texts.each do |text|
522
+ text.value = text.value.gsub(/\n(\x20+)/){"\n" + " " * ($1.length - indent_length)}
523
+ end
524
+ if children.first.is_a?(Text)
525
+ children.first.value = children.first.value.lstrip
526
+ end
527
+ end
528
+
529
+ def self.valid_start_char?(char)
530
+ return VALID_START_CHARS.any?{|s| s === char.ord}
531
+ end
532
+
533
+ def self.valid_char?(char)
534
+ return VALID_START_CHARS.any?{|s| s === char.ord} || VALID_MIDDLE_CHARS.any?{|s| s === char.ord}
535
+ end
536
+
537
+ def self.get_all_texts
538
+ texts = []
539
+ self.children.each do |child|
540
+ case child
541
+ when Text
542
+ texts << child
543
+ when Parent
544
+ texts.concat(get_all_texts(child))
545
+ end
546
+ end
547
+ return texts
548
+ end
549
+
550
+ end