arboretum 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arboretum/doctree.rb +1566 -0
- data/lib/arboretum/scandent.rb +882 -0
- data/lib/arboretum/xml.rb +169 -0
- metadata +4 -1
@@ -0,0 +1,169 @@
|
|
1
|
+
module Arboretum
|
2
|
+
module XML
|
3
|
+
module IO
|
4
|
+
require 'ox'
|
5
|
+
require_relative 'doctree'
|
6
|
+
|
7
|
+
class XMLParseException < StandardError
|
8
|
+
def initialize(msg="An error occurred while parsing XML")
|
9
|
+
super(msg)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class ArboretumIO
|
14
|
+
def self.read(file_path, style: :clean, type: :auto)
|
15
|
+
sax_parser = ArboretumSax.new(style)
|
16
|
+
type = file_path[(file_path.rindex(/\..+/))+1..-1].to_sym if type == :auto
|
17
|
+
File.open(file_path, 'r') do |f|
|
18
|
+
case type
|
19
|
+
when :xml, :xhtml
|
20
|
+
Ox.sax_parse(sax_parser, f, :skip => :skip_off)
|
21
|
+
when :html
|
22
|
+
Ox.sax_parse(sax_parser, f, :skip => :skip_off, :smart => true)
|
23
|
+
else
|
24
|
+
puts "Warning: Invalid file type `#{type}` given for read. Using `:xml` instead..."
|
25
|
+
Ox.sax_parse(sax_parser, f, :skip => :skip_off)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
sax_parser.tree
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.write(tree, file_path, style: :pretty, type: :auto)
|
32
|
+
type = file_path[(file_path.rindex(/\..+/))+1..-1].to_sym if type == :auto
|
33
|
+
if ![:xml, :xhtml, :html].include?(type)
|
34
|
+
puts "Warning: Invalid file type `#{type}` given for write. Using `:xml` instead..."
|
35
|
+
type = :xml
|
36
|
+
end
|
37
|
+
File.open(file_path, 'w') do |f|
|
38
|
+
f.write(tree.dump_markup(style, type))
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class ArboretumSax < Ox::Sax
|
44
|
+
include Arboretum::DocTree::Elements
|
45
|
+
|
46
|
+
attr_accessor :tree
|
47
|
+
|
48
|
+
def initialize(style)
|
49
|
+
# Must initalize line to have access
|
50
|
+
@line = nil # Integer
|
51
|
+
|
52
|
+
# Root document element
|
53
|
+
root = DocRootElement.new # DocRootElement
|
54
|
+
|
55
|
+
# Intialize a tree for the parser to build on
|
56
|
+
@tree = Tree.new(root) # Tree
|
57
|
+
|
58
|
+
# Contextual information for parsing process
|
59
|
+
@open_elements = [root] # Array of Elements
|
60
|
+
|
61
|
+
# Read style
|
62
|
+
@style = style
|
63
|
+
end
|
64
|
+
|
65
|
+
def print_read_failure(method_name, args, reason="None given...")
|
66
|
+
puts "Reading of line #{@line} failed..."
|
67
|
+
puts "Reason: #{reason}"
|
68
|
+
puts "Method: #{method_name.to_s}"
|
69
|
+
puts "Args: #{args}"
|
70
|
+
puts "--------"
|
71
|
+
end
|
72
|
+
|
73
|
+
def start_element(name)
|
74
|
+
# Extract namespace if one exists
|
75
|
+
element_ns = nil
|
76
|
+
element_tag = name
|
77
|
+
tag_str = name.to_s
|
78
|
+
if tag_str.include?(':')
|
79
|
+
tag_split = tag_str.split(':')
|
80
|
+
raise XMLParseException if tag_split.length != 2
|
81
|
+
# Final information
|
82
|
+
element_ns = tag_split[0].to_sym
|
83
|
+
element_tag = tag_split[1].to_sym
|
84
|
+
end
|
85
|
+
|
86
|
+
# Add TaggedElement to tree
|
87
|
+
opened_element = TaggedElement.new(element_ns, element_tag)
|
88
|
+
@open_elements.last.append_child(opened_element)
|
89
|
+
|
90
|
+
# Open the element if paired
|
91
|
+
@open_elements.push(opened_element)
|
92
|
+
end
|
93
|
+
def attr(name, str)
|
94
|
+
# Get the most recently opened element
|
95
|
+
most_recent_open = @open_elements.last
|
96
|
+
# Append found values to attribute
|
97
|
+
most_recent_open.add_attr_value(name, str)
|
98
|
+
end
|
99
|
+
def comment(str)
|
100
|
+
# Add CommentElement to tree
|
101
|
+
comment_element = CommentElement.new(str)
|
102
|
+
@open_elements.last.append_child(comment_element)
|
103
|
+
# Do not open the element (no children)
|
104
|
+
end
|
105
|
+
def text(str)
|
106
|
+
if @style == :clean
|
107
|
+
# Compress, but preserve, whitespace
|
108
|
+
str.gsub!(/\s+/, ' ')
|
109
|
+
|
110
|
+
# Add TextElement to tree
|
111
|
+
text_element = TextElement.new(str)
|
112
|
+
@open_elements.last.append_child(text_element)
|
113
|
+
# Do not open the element (no children)
|
114
|
+
|
115
|
+
elsif @style == :preserve
|
116
|
+
# Add TextElement to tree
|
117
|
+
text_element = TextElement.new(str)
|
118
|
+
@open_elements.last.append_child(text_element)
|
119
|
+
# Do not open the element (no children)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
def end_element(name)
|
123
|
+
# Close the most recent element/Get the closed element
|
124
|
+
closed_element = @open_elements.pop
|
125
|
+
|
126
|
+
# Partially ensure the correct element has been closed
|
127
|
+
if not "#{closed_element.namespaced_tag}".eql?(name.to_s)
|
128
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
129
|
+
reason = "Non-matching element open/close tags: #{closed_element.tag} and #{name}"
|
130
|
+
self.print_read_failure(__method__, args, reason)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Unimplemented calls w/debug output
|
135
|
+
def instruct(target)
|
136
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
137
|
+
reason = "Unimplemented method"
|
138
|
+
self.print_read_failure(__method__, args, reason)
|
139
|
+
end
|
140
|
+
def end_instruct(target)
|
141
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
142
|
+
reason = "Unimplemented method"
|
143
|
+
self.print_read_failure(__method__, args, reason)
|
144
|
+
end
|
145
|
+
def doctype(str)
|
146
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
147
|
+
reason = "Unimplemented method"
|
148
|
+
self.print_read_failure(__method__, args, reason)
|
149
|
+
end
|
150
|
+
def cdata(str)
|
151
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
152
|
+
reason = "Unimplemented method"
|
153
|
+
self.print_read_failure(__method__, args, reason)
|
154
|
+
end
|
155
|
+
def error(msg, line, column)
|
156
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
157
|
+
reason = "A read error occured"
|
158
|
+
self.print_read_failure(__method__, args, reason)
|
159
|
+
end
|
160
|
+
def abort(name)
|
161
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
162
|
+
reason = "Unimplemented method"
|
163
|
+
self.print_read_failure(__method__, args, reason)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end # IO
|
167
|
+
|
168
|
+
end # XML
|
169
|
+
end # Arboretum
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arboretum
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomjw64
|
@@ -37,6 +37,9 @@ extensions: []
|
|
37
37
|
extra_rdoc_files: []
|
38
38
|
files:
|
39
39
|
- lib/arboretum.rb
|
40
|
+
- lib/arboretum/doctree.rb
|
41
|
+
- lib/arboretum/scandent.rb
|
42
|
+
- lib/arboretum/xml.rb
|
40
43
|
homepage: https://github.com/tomjw64/arboretum
|
41
44
|
licenses:
|
42
45
|
- MIT
|