arboretum 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/arboretum/doctree.rb +1566 -0
- data/lib/arboretum/scandent.rb +882 -0
- data/lib/arboretum/xml.rb +169 -0
- metadata +4 -1
@@ -0,0 +1,169 @@
|
|
1
|
+
module Arboretum
|
2
|
+
module XML
|
3
|
+
module IO
|
4
|
+
require 'ox'
|
5
|
+
require_relative 'doctree'
|
6
|
+
|
7
|
+
class XMLParseException < StandardError
|
8
|
+
def initialize(msg="An error occurred while parsing XML")
|
9
|
+
super(msg)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class ArboretumIO
|
14
|
+
def self.read(file_path, style: :clean, type: :auto)
|
15
|
+
sax_parser = ArboretumSax.new(style)
|
16
|
+
type = file_path[(file_path.rindex(/\..+/))+1..-1].to_sym if type == :auto
|
17
|
+
File.open(file_path, 'r') do |f|
|
18
|
+
case type
|
19
|
+
when :xml, :xhtml
|
20
|
+
Ox.sax_parse(sax_parser, f, :skip => :skip_off)
|
21
|
+
when :html
|
22
|
+
Ox.sax_parse(sax_parser, f, :skip => :skip_off, :smart => true)
|
23
|
+
else
|
24
|
+
puts "Warning: Invalid file type `#{type}` given for read. Using `:xml` instead..."
|
25
|
+
Ox.sax_parse(sax_parser, f, :skip => :skip_off)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
sax_parser.tree
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.write(tree, file_path, style: :pretty, type: :auto)
|
32
|
+
type = file_path[(file_path.rindex(/\..+/))+1..-1].to_sym if type == :auto
|
33
|
+
if ![:xml, :xhtml, :html].include?(type)
|
34
|
+
puts "Warning: Invalid file type `#{type}` given for write. Using `:xml` instead..."
|
35
|
+
type = :xml
|
36
|
+
end
|
37
|
+
File.open(file_path, 'w') do |f|
|
38
|
+
f.write(tree.dump_markup(style, type))
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class ArboretumSax < Ox::Sax
|
44
|
+
include Arboretum::DocTree::Elements
|
45
|
+
|
46
|
+
attr_accessor :tree
|
47
|
+
|
48
|
+
def initialize(style)
|
49
|
+
# Must initalize line to have access
|
50
|
+
@line = nil # Integer
|
51
|
+
|
52
|
+
# Root document element
|
53
|
+
root = DocRootElement.new # DocRootElement
|
54
|
+
|
55
|
+
# Intialize a tree for the parser to build on
|
56
|
+
@tree = Tree.new(root) # Tree
|
57
|
+
|
58
|
+
# Contextual information for parsing process
|
59
|
+
@open_elements = [root] # Array of Elements
|
60
|
+
|
61
|
+
# Read style
|
62
|
+
@style = style
|
63
|
+
end
|
64
|
+
|
65
|
+
def print_read_failure(method_name, args, reason="None given...")
|
66
|
+
puts "Reading of line #{@line} failed..."
|
67
|
+
puts "Reason: #{reason}"
|
68
|
+
puts "Method: #{method_name.to_s}"
|
69
|
+
puts "Args: #{args}"
|
70
|
+
puts "--------"
|
71
|
+
end
|
72
|
+
|
73
|
+
def start_element(name)
|
74
|
+
# Extract namespace if one exists
|
75
|
+
element_ns = nil
|
76
|
+
element_tag = name
|
77
|
+
tag_str = name.to_s
|
78
|
+
if tag_str.include?(':')
|
79
|
+
tag_split = tag_str.split(':')
|
80
|
+
raise XMLParseException if tag_split.length != 2
|
81
|
+
# Final information
|
82
|
+
element_ns = tag_split[0].to_sym
|
83
|
+
element_tag = tag_split[1].to_sym
|
84
|
+
end
|
85
|
+
|
86
|
+
# Add TaggedElement to tree
|
87
|
+
opened_element = TaggedElement.new(element_ns, element_tag)
|
88
|
+
@open_elements.last.append_child(opened_element)
|
89
|
+
|
90
|
+
# Open the element if paired
|
91
|
+
@open_elements.push(opened_element)
|
92
|
+
end
|
93
|
+
def attr(name, str)
|
94
|
+
# Get the most recently opened element
|
95
|
+
most_recent_open = @open_elements.last
|
96
|
+
# Append found values to attribute
|
97
|
+
most_recent_open.add_attr_value(name, str)
|
98
|
+
end
|
99
|
+
def comment(str)
|
100
|
+
# Add CommentElement to tree
|
101
|
+
comment_element = CommentElement.new(str)
|
102
|
+
@open_elements.last.append_child(comment_element)
|
103
|
+
# Do not open the element (no children)
|
104
|
+
end
|
105
|
+
def text(str)
|
106
|
+
if @style == :clean
|
107
|
+
# Compress, but preserve, whitespace
|
108
|
+
str.gsub!(/\s+/, ' ')
|
109
|
+
|
110
|
+
# Add TextElement to tree
|
111
|
+
text_element = TextElement.new(str)
|
112
|
+
@open_elements.last.append_child(text_element)
|
113
|
+
# Do not open the element (no children)
|
114
|
+
|
115
|
+
elsif @style == :preserve
|
116
|
+
# Add TextElement to tree
|
117
|
+
text_element = TextElement.new(str)
|
118
|
+
@open_elements.last.append_child(text_element)
|
119
|
+
# Do not open the element (no children)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
def end_element(name)
|
123
|
+
# Close the most recent element/Get the closed element
|
124
|
+
closed_element = @open_elements.pop
|
125
|
+
|
126
|
+
# Partially ensure the correct element has been closed
|
127
|
+
if not "#{closed_element.namespaced_tag}".eql?(name.to_s)
|
128
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
129
|
+
reason = "Non-matching element open/close tags: #{closed_element.tag} and #{name}"
|
130
|
+
self.print_read_failure(__method__, args, reason)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Unimplemented calls w/debug output
|
135
|
+
def instruct(target)
|
136
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
137
|
+
reason = "Unimplemented method"
|
138
|
+
self.print_read_failure(__method__, args, reason)
|
139
|
+
end
|
140
|
+
def end_instruct(target)
|
141
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
142
|
+
reason = "Unimplemented method"
|
143
|
+
self.print_read_failure(__method__, args, reason)
|
144
|
+
end
|
145
|
+
def doctype(str)
|
146
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
147
|
+
reason = "Unimplemented method"
|
148
|
+
self.print_read_failure(__method__, args, reason)
|
149
|
+
end
|
150
|
+
def cdata(str)
|
151
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
152
|
+
reason = "Unimplemented method"
|
153
|
+
self.print_read_failure(__method__, args, reason)
|
154
|
+
end
|
155
|
+
def error(msg, line, column)
|
156
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
157
|
+
reason = "A read error occured"
|
158
|
+
self.print_read_failure(__method__, args, reason)
|
159
|
+
end
|
160
|
+
def abort(name)
|
161
|
+
args = method(__method__).parameters.map {|_,arg_name| binding.local_variable_get(arg_name)}
|
162
|
+
reason = "Unimplemented method"
|
163
|
+
self.print_read_failure(__method__, args, reason)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end # IO
|
167
|
+
|
168
|
+
end # XML
|
169
|
+
end # Arboretum
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arboretum
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomjw64
|
@@ -37,6 +37,9 @@ extensions: []
|
|
37
37
|
extra_rdoc_files: []
|
38
38
|
files:
|
39
39
|
- lib/arboretum.rb
|
40
|
+
- lib/arboretum/doctree.rb
|
41
|
+
- lib/arboretum/scandent.rb
|
42
|
+
- lib/arboretum/xml.rb
|
40
43
|
homepage: https://github.com/tomjw64/arboretum
|
41
44
|
licenses:
|
42
45
|
- MIT
|