manticore-smash 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,282 @@
1
+ # frozen_string_literal: false
2
+
3
+ # Copyright (C) 2024 Manticore Authors
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU Affero General Public License as published
7
+ # by the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU Affero General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Affero General Public License
16
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
17
+
18
+ module XmlUtils
19
+ class Token
20
+ attr_accessor :type, :value, :line, :position
21
+
22
+ TYPES = %i[
23
+ start_tag end_tag empty_tag
24
+ text cdata comment processing_instruction
25
+ doctype xml_decl attribute_name attribute_value
26
+ close_tag eq quote whitespace newline eof
27
+ ].freeze
28
+
29
+ def initialize(type, value = nil, line = nil, position = nil)
30
+ @type = type
31
+ @value = value
32
+ @line = line
33
+ @position = position
34
+ end
35
+
36
+ def to_s
37
+ "<Token #{@type}: #{@value.inspect}>"
38
+ end
39
+ end
40
+
41
+ class Tokenizer
42
+ XML_NAME_PATTERN = /[A-Za-z_][A-Za-z0-9_.:-]*/.freeze
43
+
44
+ def initialize(source)
45
+ @source = source.respond_to?(:read) ? source.read : source.to_s
46
+ @pos = 0
47
+ @line = 1
48
+ @col = 1
49
+ @tokens = []
50
+ end
51
+
52
+ def tokenize
53
+ until @pos >= @source.length
54
+ @tokens << next_token
55
+ end
56
+ @tokens << Token.new(:eof)
57
+ @tokens
58
+ end
59
+
60
+ private
61
+
62
+ def next_token
63
+ skip_spaces
64
+
65
+ if @pos >= @source.length
66
+ return Token.new(:eof, nil, @line, @col)
67
+ end
68
+
69
+ ch = peek
70
+
71
+ case ch
72
+ when '<'
73
+ advance
74
+ case peek
75
+ when '/'
76
+ advance
77
+ name = read_name
78
+ skip_until('>')
79
+ advance if peek == '>'
80
+ Token.new(:close_tag, name, @line, @col)
81
+ when '!'
82
+ advance
83
+ if peek(2) == '--'
84
+ advance(2)
85
+ read_comment
86
+ elsif peek(7).upcase == '[CDATA['
87
+ advance(7)
88
+ read_cdata
89
+ elsif peek(7).upcase == 'DOCTYPE'
90
+ advance(7)
91
+ read_doctype
92
+ else
93
+ raise ParseException.new("Invalid markup after <!", @line, @col)
94
+ end
95
+ when '?'
96
+ advance
97
+ read_processing_instruction
98
+ else
99
+ read_tag
100
+ end
101
+ when '&'
102
+ read_entity_ref
103
+ else
104
+ read_text
105
+ end
106
+ end
107
+
108
+ def read_tag
109
+ name = read_name
110
+ skip_spaces
111
+
112
+ attrs = {}
113
+ until peek == '>' || peek == '/' || @pos >= @source.length
114
+ attr_name = read_name
115
+ skip_spaces
116
+ if peek == '='
117
+ advance
118
+ skip_spaces
119
+ attr_value = read_quoted_string
120
+ else
121
+ attr_value = attr_name
122
+ end
123
+ attrs[attr_name] = attr_value
124
+ skip_spaces
125
+ end
126
+
127
+ if peek == '/'
128
+ advance
129
+ token_type = :empty_tag
130
+ else
131
+ token_type = :start_tag
132
+ end
133
+
134
+ skip_until('>')
135
+ advance if peek == '>'
136
+
137
+ Token.new(token_type, { name: name, attributes: attrs }, @line, @col)
138
+ end
139
+
140
+ def read_comment
141
+ start_line = @line
142
+ start_col = @col
143
+ content = ""
144
+ until @pos + 2 >= @source.length
145
+ if peek(3) == '-->'
146
+ advance(3)
147
+ break
148
+ end
149
+ content << advance
150
+ end
151
+ Token.new(:comment, content, start_line, start_col)
152
+ end
153
+
154
+ def read_cdata
155
+ start_line = @line
156
+ start_col = @col
157
+ content = ""
158
+ until @pos + 2 >= @source.length
159
+ if peek(3) == ']]>'
160
+ advance(3)
161
+ break
162
+ end
163
+ content << advance
164
+ end
165
+ Token.new(:cdata, content, start_line, start_col)
166
+ end
167
+
168
+ def read_doctype
169
+ start_line = @line
170
+ start_col = @col
171
+ content = ""
172
+ depth = 1
173
+ while @pos < @source.length && depth > 0
174
+ ch = advance
175
+ if ch == '<'
176
+ depth += 1
177
+ elsif ch == '>'
178
+ depth -= 1
179
+ end
180
+ content << ch unless depth == 0
181
+ end
182
+ Token.new(:doctype, content.strip, start_line, start_col)
183
+ end
184
+
185
+ def read_processing_instruction
186
+ start_line = @line
187
+ start_col = @col
188
+ target = read_name
189
+ skip_spaces
190
+ content = ""
191
+ until @pos + 1 >= @source.length
192
+ if peek(2) == '?>'
193
+ advance(2)
194
+ break
195
+ end
196
+ content << advance
197
+ end
198
+ Token.new(:processing_instruction, { target: target, content: content.strip }, start_line, start_col)
199
+ end
200
+
201
+ def read_text
202
+ start_line = @line
203
+ start_col = @col
204
+ text = ""
205
+ while @pos < @source.length && peek != '<' && peek != '&'
206
+ text << advance
207
+ end
208
+ Token.new(:text, text, start_line, start_col)
209
+ end
210
+
211
+ def read_entity_ref
212
+ start_line = @line
213
+ start_col = @col
214
+ advance # &
215
+ ref = ""
216
+ while @pos < @source.length && peek != ';'
217
+ ref << advance
218
+ end
219
+ advance if peek == ';' # ;
220
+ entity = case ref
221
+ when 'amp' then '&'
222
+ when 'lt' then '<'
223
+ when 'gt' then '>'
224
+ when 'quot' then '"'
225
+ when 'apos' then "'"
226
+ else "&#{ref};"
227
+ end
228
+ Token.new(:text, entity, start_line, start_col)
229
+ end
230
+
231
+ def read_name
232
+ name = ""
233
+ while @pos < @source.length && peek =~ /[A-Za-z0-9_.:-]/
234
+ name << advance
235
+ end
236
+ raise ParseException.new("Expected name", @line, @col) if name.empty?
237
+ name
238
+ end
239
+
240
+ def read_quoted_string
241
+ quote = advance
242
+ raise ParseException.new("Expected quote", @line, @col) unless quote == '"' || quote == "'"
243
+ value = ""
244
+ while @pos < @source.length && peek != quote
245
+ value << advance
246
+ end
247
+ advance if peek == quote
248
+ value
249
+ end
250
+
251
+ def peek(n = 1)
252
+ @source[@pos, n] || ''
253
+ end
254
+
255
+ def advance(n = 1)
256
+ ch = @source[@pos, n]
257
+ n.times do |i|
258
+ c = @source[@pos + i]
259
+ if c == "\n"
260
+ @line += 1
261
+ @col = 1
262
+ else
263
+ @col += 1
264
+ end
265
+ end
266
+ @pos += n
267
+ ch
268
+ end
269
+
270
+ def skip_spaces
271
+ while @pos < @source.length && peek =~ /\s/
272
+ advance
273
+ end
274
+ end
275
+
276
+ def skip_until(char)
277
+ while @pos < @source.length && peek != char
278
+ advance
279
+ end
280
+ end
281
+ end
282
+ end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: false
2
+
3
+ # Copyright (C) 2024 Manticore Authors
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU Affero General Public License as published
7
+ # by the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU Affero General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Affero General Public License
16
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
17
+
18
+ module XmlUtils
19
+ class TreeParser
20
+ def initialize(source, document = nil)
21
+ @tokens = Tokenizer.new(source).tokenize
22
+ @pos = 0
23
+ @document = document || Document.new
24
+ end
25
+
26
+ def parse
27
+ while current_token && current_token.type != :eof
28
+ parse_node(@document)
29
+ end
30
+ @document
31
+ end
32
+
33
+ private
34
+
35
+ def current_token
36
+ @tokens[@pos]
37
+ end
38
+
39
+ def advance
40
+ token = @tokens[@pos]
41
+ @pos += 1
42
+ token
43
+ end
44
+
45
+ def parse_node(parent)
46
+ token = current_token
47
+ return unless token
48
+
49
+ case token.type
50
+ when :xml_decl
51
+ advance
52
+ decl = parse_xml_decl(token.value)
53
+ parent.add(decl)
54
+ when :doctype
55
+ advance
56
+ parent.add(parse_doctype(token.value))
57
+ when :processing_instruction
58
+ advance
59
+ if token.value[:target] == 'xml'
60
+ parent.add(parse_xml_decl(token.value[:content]))
61
+ else
62
+ parent.add(ProcessingInstruction.new(token.value[:target], token.value[:content]))
63
+ end
64
+ when :comment
65
+ advance
66
+ parent.add(Comment.new(token.value))
67
+ when :cdata
68
+ advance
69
+ parent.add(CData.new(token.value, true))
70
+ when :start_tag, :empty_tag
71
+ parse_element(parent)
72
+ when :text
73
+ advance
74
+ text = token.value
75
+ parent.add(Text.new(text)) unless text.strip.empty?
76
+ when :close_tag
77
+ advance
78
+ else
79
+ advance
80
+ end
81
+ end
82
+
83
+ def parse_element(parent)
84
+ token = advance
85
+ tag_data = token.value
86
+
87
+ element = Element.new(tag_data[:name])
88
+ tag_data[:attributes].each do |name, value|
89
+ element.add_attribute(name, value)
90
+ end
91
+
92
+ if token.type == :empty_tag
93
+ parent.add(element)
94
+ return
95
+ end
96
+
97
+ parent.add(element)
98
+
99
+ loop do
100
+ break if @pos >= @tokens.length
101
+ next_token = current_token
102
+ if next_token.type == :close_tag
103
+ if next_token.value == tag_data[:name]
104
+ advance
105
+ break
106
+ else
107
+ raise ParseException.new(
108
+ "Unexpected close tag </#{next_token.value}>, expected </#{tag_data[:name]}>",
109
+ next_token.line,
110
+ next_token.position
111
+ )
112
+ end
113
+ elsif next_token.type == :start_tag
114
+ parse_element(element)
115
+ elsif next_token.type == :empty_tag
116
+ parse_element(element)
117
+ elsif next_token.type == :text
118
+ advance
119
+ element.add(Text.new(next_token.value))
120
+ elsif next_token.type == :cdata
121
+ advance
122
+ element.add(CData.new(next_token.value, true))
123
+ elsif next_token.type == :comment
124
+ advance
125
+ element.add(Comment.new(next_token.value))
126
+ elsif next_token.type == :processing_instruction
127
+ advance
128
+ element.add(ProcessingInstruction.new(next_token.value[:target], next_token.value[:content]))
129
+ else
130
+ advance
131
+ end
132
+ end
133
+ end
134
+
135
+ def parse_xml_decl(content)
136
+ decl = XMLDecl.new
137
+ if content =~ /version\s*=\s*["']([^"']+)["']/
138
+ decl.version = $1
139
+ end
140
+ if content =~ /encoding\s*=\s*["']([^"']+)["']/
141
+ decl.encoding = $1
142
+ end
143
+ if content =~ /standalone\s*=\s*["']([^"']+)["']/
144
+ decl.standalone = $1
145
+ end
146
+ decl
147
+ end
148
+
149
+ def parse_doctype(content)
150
+ if content =~ /^(\S+)(?:\s+PUBLIC\s+["']([^"']*)["']\s+["']([^"']*)["'])?(?:\s+SYSTEM\s+["']([^"']*)["'])?/
151
+ name = $1
152
+ public_id = $2
153
+ system_id = $3 || $4
154
+ external_id = public_id ? 'PUBLIC' : (system_id ? 'SYSTEM' : nil)
155
+ DocType.new(name, external_id, system_id, public_id)
156
+ else
157
+ DocType.new(content.strip)
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,273 @@
1
+ # frozen_string_literal: false
2
+
3
+ # Copyright (C) 2024 Manticore Authors
4
+ #
5
+ # This program is free software: you can redistribute it and/or modify
6
+ # it under the terms of the GNU Affero General Public License as published
7
+ # by the Free Software Foundation, either version 3 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # This program is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU Affero General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Affero General Public License
16
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
17
+
18
+ ['json','yaml'].each{|mod|require mod}
19
+
20
+ module XmlUtils
21
+ VERSION = "3.0.1"
22
+
23
+ def self.parse(source)
24
+ parser = TreeParser.new(source)
25
+ parser.parse
26
+ end
27
+
28
+ def self.new_document
29
+ Document.new
30
+ end
31
+
32
+ def self.create_element(name, attributes = {})
33
+ element = Element.new(name)
34
+ attributes.each { |k, v| element.add_attribute(k, v) }
35
+ element
36
+ end
37
+
38
+ def self.to_xml_string(node)
39
+ formatter = Formatters::Default.new
40
+ output = ""
41
+ formatter.write(node, output)
42
+ output
43
+ end
44
+ end
45
+
46
+ class XmlNode
47
+ attr_accessor :name, :attributes, :elements, :parent, :next, :prev
48
+
49
+ def initialize(option = {})
50
+ args = {parent: nil, attributes: {}, elements: [], prev: [], next: []}.merge(option)
51
+ @name = args[:name] || "OID:#{self.object_id}"
52
+ @attributes = args[:attributes]
53
+ @parent, @elements, @prev, @next = args[:parent], args[:elements], args[:prev], args[:next]
54
+
55
+ if @parent
56
+ @prev << @parent unless @prev.include?(@parent)
57
+ @parent.elements << self unless @parent.elements.include?(self)
58
+ @parent.next << self unless @parent.next.include?(self)
59
+ end
60
+ end
61
+
62
+ #####################################################################################################
63
+ # format #
64
+ #####################################################################################################
65
+
66
+ # 三元组 ([name, attributes, [name, attributes, ...]])
67
+ def to_triad
68
+ attrs,elems = {},[]
69
+ @attributes.each do|k,v|
70
+ unless k == :text
71
+ attrs[k] = v
72
+ else
73
+ elems += [v].flatten
74
+ end
75
+ end
76
+ elems += @elements.map{|c|c.to_triad}
77
+ [@name, attrs, elems]
78
+ end
79
+ alias :to_a :to_triad
80
+
81
+ # 文档化 ({name: [attributes, {name: [...]}]})
82
+ def to_doc
83
+ doc = {}
84
+ doc[@name] = []
85
+ doc[@name] << @attributes
86
+ @elements.each{|e|doc[@name] << e.to_doc}
87
+ return doc
88
+ end
89
+
90
+ # 对象化 (like js: {obj: {'-attr': val, '#text': text, obj: {...}}})
91
+ def to_obj
92
+ doc = {}
93
+ @attributes.each do|k,v|
94
+ h = k==:text ? '#' : '-'
95
+ doc["#{h}#{k}"] = v
96
+ end
97
+ @elements.each do|elem|
98
+ doc.merge! elem.to_obj
99
+ end
100
+ return {@name => doc}
101
+ end
102
+
103
+ # XML(手动序列化,需确保属性/文本已转义)
104
+ def to_xml
105
+ attrs, content = '', ''
106
+ @attributes.each do |k,v|
107
+ if k == :text
108
+ content += XmlNode.escape_xml([v].flatten.join("\n"))
109
+ elsif k == :namespace && !v
110
+ next
111
+ else
112
+ attrs += " #{k}=\"#{XmlNode.escape_xml_attr(v.to_s)}\""
113
+ end
114
+ end
115
+ return "<#{@name}#{attrs}/>" if @elements.size==0 && !@attributes[:text]
116
+ @elements.each do|e|
117
+ content += if e.is_a?(XmlNode)
118
+ e.to_xml
119
+ elsif e.instance_of?(String)
120
+ XmlNode.escape_xml(e)
121
+ end
122
+ end
123
+ return "<#{@name}#{attrs}>#{content}</#{@name}>"
124
+ end
125
+
126
+ def pretty format, method, indent=2
127
+ case method
128
+ when :xml
129
+ pretty_xml = ""
130
+ XmlUtils::Formatters::Default.new.write(XmlUtils.parse(self.send(format)), pretty_xml)
131
+ return pretty_xml
132
+ when :json
133
+ return JSON.pretty_generate(self.send(format))
134
+ else
135
+ raise ArgumentError, "Unknown pretty method: #{method.inspect}"
136
+ end
137
+ end
138
+
139
+ def self.escape_xml(text)
140
+ text.gsub('&', '&amp;')
141
+ .gsub('<', '&lt;')
142
+ .gsub('>', '&gt;')
143
+ end
144
+
145
+ def self.escape_xml_attr(text)
146
+ text.gsub('&', '&amp;')
147
+ .gsub('<', '&lt;')
148
+ .gsub('>', '&gt;')
149
+ .gsub('"', '&quot;')
150
+ .gsub("'", '&apos;')
151
+ end
152
+
153
+ def self.make_str_from xml
154
+ text = xml.dup
155
+ [['&lt;','<'], ['&gt;','>'], ['&amp;','&'], ['&apos;',"'"], ['&quot;','"']].each do |xstr, str|
156
+ text.gsub!(xstr, str)
157
+ end
158
+ text
159
+ end
160
+
161
+ def self.make_xml_from string
162
+ string.gsub('&', '&amp;')
163
+ .gsub('<', '&lt;')
164
+ .gsub('>', '&gt;')
165
+ .gsub("'", '&apos;')
166
+ .gsub('"', '&quot;')
167
+ end
168
+
169
+ #####################################################################################################
170
+ # attributes operation #
171
+ #####################################################################################################
172
+
173
+ def add_attributes hash
174
+ (@attributes[:text] ||= []) << hash[:text] if hash[:text]
175
+ hash.delete(:text)
176
+ @attributes.merge!(hash)
177
+ end
178
+
179
+ def modify_attributes hash
180
+ add_attributes hash
181
+ end
182
+
183
+ def delete_attribute key
184
+ @attributes.delete(key) unless key==:text
185
+ end
186
+
187
+ #####################################################################################################
188
+ # content operation #
189
+ #####################################################################################################
190
+
191
+ def add_content content
192
+ @elements << content
193
+ end
194
+
195
+ def modify_content content
196
+ @attributes[:text] = []
197
+ @elements.delete_if{|e|e.is_a?(XmlNode)}
198
+ @elements << content
199
+ end
200
+
201
+ def delete_content
202
+ @elements = @elements.find_all{|c|!c.instance_of?(XmlNode)}
203
+ end
204
+
205
+ def add_element elem
206
+ if elem.is_a?(XmlNode)
207
+ @elements << elem unless @elements.include?(elem)
208
+ @next << elem unless @next.include?(elem)
209
+ elem.parent = self
210
+ elem.prev << self unless elem.prev.include?(self)
211
+ end
212
+ end
213
+
214
+ def search_elements &block
215
+ return ( block ? @elements.find_all(&block) : [] )
216
+ end
217
+
218
+ def delete_elements &block
219
+ return [] unless block
220
+ elems = search_elements(&block)
221
+ elems.each{|elem|@elements.delete(elem)}
222
+ return elems
223
+ end
224
+
225
+ def self.copy node
226
+ duplicate = XmlNode.new(name: node.name, parent: nil, attributes: node.attributes.dup)
227
+ node.elements.map{|subnode|self.copy(subnode)}.each do|subnode|
228
+ duplicate.add_element subnode
229
+ end
230
+ return duplicate
231
+ end
232
+ end
233
+
234
+
235
+ module XmlParser
236
+ def self.load(filepath)
237
+ return File.exist?(filepath) ? XmlParser.parse(File.read(filepath)) : nil
238
+ end
239
+
240
+ def self.parse(s)
241
+ doc = XmlUtils.parse(s)
242
+ root_elem = doc.root
243
+ return nil unless root_elem
244
+
245
+ build_xmlnode(root_elem)
246
+ end
247
+
248
+ private
249
+
250
+ def self.build_xmlnode(element, parent = nil)
251
+ attrs = {}
252
+ element.attributes.each do |k, attr|
253
+ attrs[k.to_sym] = attr.value
254
+ end
255
+
256
+ ns = element.namespace(element.prefix)
257
+ attrs[:namespace] = ns if ns && !ns.empty?
258
+
259
+ text_content = element.children
260
+ .select { |c| c.is_a?(XmlUtils::Text) || c.is_a?(XmlUtils::CData) }
261
+ .map(&:to_s)
262
+ .join
263
+ attrs[:text] = text_content unless text_content.strip.empty?
264
+
265
+ node = XmlNode.new(name: element.name, parent: parent, attributes: attrs)
266
+
267
+ element.children.select { |c| c.is_a?(XmlUtils::Element) }.each do |child|
268
+ build_xmlnode(child, node)
269
+ end
270
+
271
+ node
272
+ end
273
+ end