mame-xmlparser 0.6.81.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Encodings/README.ja +18 -0
- data/Encodings/euc-jp.enc +0 -0
- data/Encodings/shift_jis.enc +0 -0
- data/History.txt +5 -0
- data/Manifest.txt +118 -0
- data/README +697 -0
- data/README.ja +789 -0
- data/README.txt +49 -0
- data/Rakefile +30 -0
- data/ext/encoding.h +91 -0
- data/ext/extconf.rb +58 -0
- data/ext/xmlparser.c +2231 -0
- data/lib/sax.rb +1 -0
- data/lib/saxdriver.rb +1 -0
- data/lib/wget.rb +47 -0
- data/lib/xml/dom/builder-ja.rb +58 -0
- data/lib/xml/dom/builder.rb +310 -0
- data/lib/xml/dom/core.rb +3276 -0
- data/lib/xml/dom/digest.rb +94 -0
- data/lib/xml/dom/visitor.rb +182 -0
- data/lib/xml/dom2/attr.rb +213 -0
- data/lib/xml/dom2/cdatasection.rb +76 -0
- data/lib/xml/dom2/characterdata.rb +177 -0
- data/lib/xml/dom2/comment.rb +81 -0
- data/lib/xml/dom2/core.rb +19 -0
- data/lib/xml/dom2/document.rb +317 -0
- data/lib/xml/dom2/documentfragment.rb +82 -0
- data/lib/xml/dom2/documenttype.rb +102 -0
- data/lib/xml/dom2/dombuilder.rb +277 -0
- data/lib/xml/dom2/dombuilderfilter.rb +12 -0
- data/lib/xml/dom2/domentityresolver.rb +13 -0
- data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
- data/lib/xml/dom2/domexception.rb +95 -0
- data/lib/xml/dom2/domimplementation.rb +61 -0
- data/lib/xml/dom2/dominputsource.rb +29 -0
- data/lib/xml/dom2/element.rb +533 -0
- data/lib/xml/dom2/entity.rb +110 -0
- data/lib/xml/dom2/entityreference.rb +107 -0
- data/lib/xml/dom2/namednodemap.rb +138 -0
- data/lib/xml/dom2/node.rb +587 -0
- data/lib/xml/dom2/nodelist.rb +231 -0
- data/lib/xml/dom2/notation.rb +86 -0
- data/lib/xml/dom2/processinginstruction.rb +155 -0
- data/lib/xml/dom2/text.rb +128 -0
- data/lib/xml/dom2/xpath.rb +398 -0
- data/lib/xml/encoding-ja.rb +42 -0
- data/lib/xml/parser.rb +13 -0
- data/lib/xml/parserns.rb +236 -0
- data/lib/xml/sax.rb +353 -0
- data/lib/xml/saxdriver.rb +370 -0
- data/lib/xml/xpath.rb +3284 -0
- data/lib/xml/xpath.ry +2352 -0
- data/lib/xmldigest.rb +1 -0
- data/lib/xmlencoding-ja.rb +11 -0
- data/lib/xmltree.rb +1 -0
- data/lib/xmltreebuilder-ja.rb +9 -0
- data/lib/xmltreebuilder.rb +1 -0
- data/lib/xmltreevisitor.rb +1 -0
- data/samples/buildertest.rb +47 -0
- data/samples/buildertest2.rb +50 -0
- data/samples/digesttest.rb +26 -0
- data/samples/digesttest2.rb +192 -0
- data/samples/doctype.rb +40 -0
- data/samples/doctype.xml +21 -0
- data/samples/doctypei.rb +19 -0
- data/samples/document.dtd +77 -0
- data/samples/dom2/dom2test1.rb +7 -0
- data/samples/dom2/dom2test2.rb +10 -0
- data/samples/dom2/gtkxpath.rb +259 -0
- data/samples/dom2/test1.xml +16 -0
- data/samples/dom2/test2.xml +7 -0
- data/samples/dtd/ext1.dtd +4 -0
- data/samples/dtd/ext2.dtd +1 -0
- data/samples/dtd/extdtd.rb +34 -0
- data/samples/dtd/extdtd.xml +5 -0
- data/samples/expat-1.2/CVS/Entries +8 -0
- data/samples/expat-1.2/CVS/Repository +1 -0
- data/samples/expat-1.2/CVS/Root +1 -0
- data/samples/expat-1.2/ext.ent +2 -0
- data/samples/expat-1.2/exttest.rb +82 -0
- data/samples/expat-1.2/exttesti.rb +81 -0
- data/samples/expat-1.2/hoge.dtd +7 -0
- data/samples/expat-1.2/idattr.xml +8 -0
- data/samples/expat-1.2/idtest.rb +21 -0
- data/samples/expat-1.2/idtest.xml +12 -0
- data/samples/expat-1.2/xmlextparser.rb +39 -0
- data/samples/gtktree.rb +146 -0
- data/samples/idattrtest.rb +28 -0
- data/samples/index_euc.xml +72 -0
- data/samples/index_jis.xml +72 -0
- data/samples/index_noenc.xml +71 -0
- data/samples/index_sjis.xml +72 -0
- data/samples/index_u16.xml +0 -0
- data/samples/index_u8.xml +72 -0
- data/samples/my-html.rb +65 -0
- data/samples/namespaces/CVS/Entries +5 -0
- data/samples/namespaces/CVS/Repository +1 -0
- data/samples/namespaces/CVS/Root +1 -0
- data/samples/namespaces/namespace1.rb +29 -0
- data/samples/namespaces/namespace1.xml +10 -0
- data/samples/namespaces/namespace2.rb +41 -0
- data/samples/namespaces/namespace2.xml +12 -0
- data/samples/nstest.rb +21 -0
- data/samples/saxtest.rb +70 -0
- data/samples/test/featurelist.rb +11 -0
- data/samples/test/skippedentity.rb +48 -0
- data/samples/test/useforeigndtd.rb +42 -0
- data/samples/treetest.rb +14 -0
- data/samples/visitor.rb +29 -0
- data/samples/visitortest.rb +36 -0
- data/samples/writer.rb +43 -0
- data/samples/xmlcheck.rb +40 -0
- data/samples/xmlcomments.rb +30 -0
- data/samples/xmlevent.rb +76 -0
- data/samples/xmliter.rb +57 -0
- data/samples/xmlstats.rb +167 -0
- data/samples/xpointer.rb +233 -0
- data/samples/xpointertest.rb +23 -0
- metadata +185 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
require 'xml/parser'
|
|
4
|
+
|
|
5
|
+
if XML::Parser.respond_to?(:getFeatureList)
|
|
6
|
+
XML::Parser.getFeatureList.each do |key, value|
|
|
7
|
+
puts "#{key}:\t#{value}"
|
|
8
|
+
end
|
|
9
|
+
else
|
|
10
|
+
puts "XML::Parser.getFeatureList requires expat-1.95.5 or later"
|
|
11
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
require 'xml/parser'
|
|
4
|
+
|
|
5
|
+
p = XML::Parser.new
|
|
6
|
+
|
|
7
|
+
def p.skippedEntity(entityName, is_param_ent)
|
|
8
|
+
p(["skippedEntity", entityName, is_param_ent])
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def p.default(data)
|
|
12
|
+
p(["default", data])
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def startElement(name, attrs)
|
|
16
|
+
p(["startElement", name, attrs])
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def endElement(name)
|
|
20
|
+
p(["endElement", name])
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def p.character(data)
|
|
24
|
+
p(["character", data])
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def p.externalEntityRef(context, base, sys, pub)
|
|
28
|
+
p(["externalEntityRef", context, base, sys, pub])
|
|
29
|
+
extp = XML::Parser.new(self, context)
|
|
30
|
+
extp.parse(<<EOF)
|
|
31
|
+
<!ENTITY % tttt "<!ENTITY test 'BOKE'>">
|
|
32
|
+
<!ENTITY test "%tttt;">
|
|
33
|
+
<!ENTITY test1 "%Tttt;">
|
|
34
|
+
%aaaa;
|
|
35
|
+
EOF
|
|
36
|
+
extp.done
|
|
37
|
+
end
|
|
38
|
+
p.setParamEntityParsing(1)
|
|
39
|
+
|
|
40
|
+
p.parse(<<EOF)
|
|
41
|
+
<!DOCTYPE test SYSTEM "ext.dtd" [
|
|
42
|
+
<!--ENTITY test "HOGE"-->
|
|
43
|
+
%TTTT;
|
|
44
|
+
]>
|
|
45
|
+
<test>
|
|
46
|
+
&test;
|
|
47
|
+
</test>
|
|
48
|
+
EOF
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
require 'xml/parser'
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
XML1=<<EOF
|
|
7
|
+
<!--DOCTYPE test SYSTEM "hoge.dtd" [
|
|
8
|
+
<!ENTITY a "internal">
|
|
9
|
+
]-->
|
|
10
|
+
<test>&a;</test>
|
|
11
|
+
EOF
|
|
12
|
+
|
|
13
|
+
XML2=<<EOF
|
|
14
|
+
<!ENTITY a "external">
|
|
15
|
+
EOF
|
|
16
|
+
|
|
17
|
+
p = XML::Parser.new
|
|
18
|
+
if p.respond_to?(:useForeignDTD)
|
|
19
|
+
p p.useForeignDTD(ARGV[0].to_i)
|
|
20
|
+
else
|
|
21
|
+
puts "XML::Parser#useForeignDTD requires expat-1.95.5 or later"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
p.setParamEntityParsing(1)
|
|
25
|
+
def p.startDoctypeDecl(name, sys, pub, internal)
|
|
26
|
+
p(["startDoctypeDecl", name, sys, pub, internal])
|
|
27
|
+
end
|
|
28
|
+
def p.endDoctypeDecl()
|
|
29
|
+
p(["endDoctypeDecl"])
|
|
30
|
+
end
|
|
31
|
+
def p.externalEntityRef(context, base, sys, pub)
|
|
32
|
+
p(["externalEntityRef", context, base, sys, pub])
|
|
33
|
+
extp = XML::Parser.new(self, context)
|
|
34
|
+
extp.parse(XML2)
|
|
35
|
+
extp.done
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def p.character(data)
|
|
39
|
+
p data
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
p.parse(XML1)
|
data/samples/treetest.rb
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby -Ke
|
|
2
|
+
|
|
3
|
+
require "xml/dom/core"
|
|
4
|
+
include XML::DOM
|
|
5
|
+
|
|
6
|
+
tree = Document.new(ProcessingInstruction.new("xml",
|
|
7
|
+
"version='1.0' encoding='EUC-JP'"),
|
|
8
|
+
Comment.new("������"),
|
|
9
|
+
Element.new("Test", [
|
|
10
|
+
Attr.new('attr1', "°��1"),
|
|
11
|
+
Attr.new('attr2', "°��2")],
|
|
12
|
+
Element.new("para", nil, "����ˤ���")))
|
|
13
|
+
print tree.to_s, "\n"
|
|
14
|
+
tree.dump
|
data/samples/visitor.rb
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
## Visitor sample
|
|
4
|
+
## 1998 by yoshidam
|
|
5
|
+
##
|
|
6
|
+
## The sample for Ruby style visitor.
|
|
7
|
+
## You can use "each" method as the iterator to visit all nodes,
|
|
8
|
+
## and can also use the other Enumerable module methods.
|
|
9
|
+
|
|
10
|
+
require 'xml/dom/builder'
|
|
11
|
+
require 'xml/dom/visitor'
|
|
12
|
+
require 'xml/encoding-ja'
|
|
13
|
+
include XML::Encoding_ja
|
|
14
|
+
|
|
15
|
+
p = XML::SimpleTreeBuilder.new(1)
|
|
16
|
+
tree = p.parse($<.read)
|
|
17
|
+
tree.documentElement.normalize
|
|
18
|
+
|
|
19
|
+
tree.each_with_index do |node, index|
|
|
20
|
+
print format("%03d: ", index)
|
|
21
|
+
case node.nodeType
|
|
22
|
+
when XML::SimpleTree::Node::ELEMENT_NODE
|
|
23
|
+
print "<#{node.nodeName}>\n"
|
|
24
|
+
when XML::SimpleTree::Node::DOCUMENT_NODE
|
|
25
|
+
print "#DOCUMENT\n"
|
|
26
|
+
else
|
|
27
|
+
print "#{Uconv.u8toeuc(node.to_s).inspect}\n"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
## Visitor test
|
|
4
|
+
## 1998 by yoshidam
|
|
5
|
+
##
|
|
6
|
+
## This sample comes from Ken MacLeod's sample of XML-Grove-0.05
|
|
7
|
+
## Copyright (C) 1998 Ken MacLeod
|
|
8
|
+
|
|
9
|
+
require 'xml/dom/builder'
|
|
10
|
+
require 'xml/dom/visitor'
|
|
11
|
+
|
|
12
|
+
class MyVisitor<XML::DOM::Visitor
|
|
13
|
+
def visit_Element(element, context, *rest)
|
|
14
|
+
context.push(element.nodeName)
|
|
15
|
+
attrs = []
|
|
16
|
+
element.attributes.each do |attr|
|
|
17
|
+
attrs.push(attr.to_s)
|
|
18
|
+
end
|
|
19
|
+
print "#{context.join(' ')} \\\\ (#{attrs.join(' ')})\n"
|
|
20
|
+
super(element, context, *rest)
|
|
21
|
+
print "#{context.join(' ')} //\n"
|
|
22
|
+
context.pop
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def visit_ProcessingInstruction(pi, context, *rest)
|
|
26
|
+
print "#{context.join(' ')} ?? #{pi.target}(#{pi.data})\n"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def visit_Text(text, context, *rest)
|
|
30
|
+
value = text.nodeValue
|
|
31
|
+
print "#{context.join(' ')} || #{value.dump}\n"
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
doc = XML::DOM::Builder.new.parse($<.read)
|
|
36
|
+
doc.accept(MyVisitor.new, [])
|
data/samples/writer.rb
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
## Visitor test
|
|
4
|
+
## 1998 by yoshidam
|
|
5
|
+
##
|
|
6
|
+
|
|
7
|
+
require 'xml/dom/builder'
|
|
8
|
+
require 'xml/dom/visitor'
|
|
9
|
+
|
|
10
|
+
class Writer
|
|
11
|
+
def visit_Document(document)
|
|
12
|
+
document.children_accept(self)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def visit_Element(element)
|
|
16
|
+
attrs = ""
|
|
17
|
+
element.attributes.each do |attr|
|
|
18
|
+
attrs += " " + attr.to_s
|
|
19
|
+
end
|
|
20
|
+
print "<#{element.nodeName}#{attrs}>"
|
|
21
|
+
element.children_accept(self);
|
|
22
|
+
print "</#{element.nodeName}>"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def visit_ProcessingInstruction(pi)
|
|
26
|
+
print "<?" + pi.nodeValue + "?>"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def visit_Text(text)
|
|
30
|
+
print text.nodeValue
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def visit_Comment(comment)
|
|
34
|
+
print "<!--" + comment.nodeValue + "-->"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def visit_CDATASection(cdata)
|
|
38
|
+
print "<![CDATA[" + cdata.nodeValue + "]]>"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
doc = XML::DOM::Builder.new.parse($<.read)
|
|
43
|
+
doc.accept(Writer.new)
|
data/samples/xmlcheck.rb
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
## XML checker
|
|
4
|
+
## 1999 by yoshidam
|
|
5
|
+
##
|
|
6
|
+
## Sep 14, 1999 yoshidam: unknownEncoding ���٥���б�
|
|
7
|
+
## Jul 26, 1998 yoshidam: Shift_JIS, ISO-2022-JP �б�
|
|
8
|
+
## ���顼ɽ�������� SP �������ѹ�
|
|
9
|
+
|
|
10
|
+
require 'xml/parser'
|
|
11
|
+
require 'nkf'
|
|
12
|
+
|
|
13
|
+
class XMLRetry<Exception; end
|
|
14
|
+
|
|
15
|
+
xml = $<.read
|
|
16
|
+
|
|
17
|
+
parser = XML::Parser.new
|
|
18
|
+
def parser.unknownEncoding(e)
|
|
19
|
+
raise XMLRetry, e
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
begin
|
|
23
|
+
parser.parse(xml)
|
|
24
|
+
print "well-formed\n"
|
|
25
|
+
exit 0
|
|
26
|
+
rescue XMLRetry
|
|
27
|
+
newencoding = nil
|
|
28
|
+
e = $!.to_s
|
|
29
|
+
if e =~ /^iso-2022-jp$/i
|
|
30
|
+
xml = NKF.nkf("-Je", xml)
|
|
31
|
+
newencoding = "EUC-JP"
|
|
32
|
+
end
|
|
33
|
+
parser = XML::Parser.new(newencoding)
|
|
34
|
+
retry
|
|
35
|
+
rescue XML::Parser::Error
|
|
36
|
+
line = parser.line
|
|
37
|
+
column = parser.column
|
|
38
|
+
print "#{$0}:#{$<.filename}:#{line}:#{column}:E: #{$!}\n"
|
|
39
|
+
exit 1
|
|
40
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
## Ruby version of xmlcomments
|
|
4
|
+
## 1998 by yoshidam
|
|
5
|
+
##
|
|
6
|
+
## This sample comes from Clark Cooper's sample of Perl extension
|
|
7
|
+
## module XML::Parser.
|
|
8
|
+
## (http://www.netheaven.com/~coopercc/xmlparser/samples/xmlcomments)
|
|
9
|
+
|
|
10
|
+
require 'xml/parser'
|
|
11
|
+
|
|
12
|
+
$count = 0
|
|
13
|
+
|
|
14
|
+
p = XML::Parser.new
|
|
15
|
+
|
|
16
|
+
def p.character(data)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def p.default(data)
|
|
20
|
+
if data =~ /^<!--/
|
|
21
|
+
line = self.line
|
|
22
|
+
data.gsub!(/\n/, "\n\t");
|
|
23
|
+
print "#{line}:\t#{data}\n";
|
|
24
|
+
$count += 1
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
p.parse($<)
|
|
29
|
+
|
|
30
|
+
print "Found #{$count} comments.\n"
|
data/samples/xmlevent.rb
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
require 'xml/parser'
|
|
4
|
+
require 'nkf'
|
|
5
|
+
#require 'uconv'
|
|
6
|
+
|
|
7
|
+
class XMLRetry<Exception; end
|
|
8
|
+
|
|
9
|
+
class SampleParser<XML::Parser
|
|
10
|
+
def startElement(name, attr)
|
|
11
|
+
line = self.line
|
|
12
|
+
column = self.column
|
|
13
|
+
byteIndex = self.byteIndex
|
|
14
|
+
print "L#{line}, #{column}, #{byteIndex}\n"
|
|
15
|
+
attr.each do |key, value|
|
|
16
|
+
# print Uconv.u8toeuc("A#{key} CDATA #{value}\n")
|
|
17
|
+
print "A#{key} CDATA #{value}\n"
|
|
18
|
+
end
|
|
19
|
+
# print Uconv.u8toeuc("(#{name}\n")
|
|
20
|
+
print "(#{name}\n"
|
|
21
|
+
self.defaultCurrent
|
|
22
|
+
end
|
|
23
|
+
private :startElement
|
|
24
|
+
|
|
25
|
+
def endElement(name)
|
|
26
|
+
# print Uconv.u8toeuc(")#{name}\n")
|
|
27
|
+
print ")#{name}\n"
|
|
28
|
+
end
|
|
29
|
+
private :endElement
|
|
30
|
+
|
|
31
|
+
def character(data)
|
|
32
|
+
data.gsub!("\n", "\\n")
|
|
33
|
+
# print Uconv.u8toeuc("-#{data}\n")
|
|
34
|
+
print "-#{data}\n"
|
|
35
|
+
end
|
|
36
|
+
private :character
|
|
37
|
+
|
|
38
|
+
def processingInstruction(target, data)
|
|
39
|
+
data.gsub!("\n", "\\n")
|
|
40
|
+
# print Uconv.u8toeuc("?#{target} #{data}\n")
|
|
41
|
+
print "?#{target} #{data}\n"
|
|
42
|
+
end
|
|
43
|
+
private :processingInstruction
|
|
44
|
+
|
|
45
|
+
def default(data)
|
|
46
|
+
return if data =~ /^<\?xml /
|
|
47
|
+
data.gsub!("\n", "\\n")
|
|
48
|
+
# print Uconv.u8toeuc("//#{data}\n")
|
|
49
|
+
print "//#{data}\n"
|
|
50
|
+
end
|
|
51
|
+
private :default
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
xml = $<.read
|
|
56
|
+
|
|
57
|
+
parser = SampleParser.new
|
|
58
|
+
def parser.unknownEncoding(e)
|
|
59
|
+
raise XMLRetry, e
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
begin
|
|
63
|
+
parser.parse(xml)
|
|
64
|
+
rescue XMLRetry
|
|
65
|
+
newencoding = nil
|
|
66
|
+
e = $!.to_s
|
|
67
|
+
if e =~ /^iso-2022-jp$/i
|
|
68
|
+
xml = NKF.nkf("-Je", xml)
|
|
69
|
+
newencoding = "EUC-JP"
|
|
70
|
+
end
|
|
71
|
+
parser = SampleParser.new(newencoding)
|
|
72
|
+
retry
|
|
73
|
+
rescue XML::Parser::Error
|
|
74
|
+
line = parser.line
|
|
75
|
+
print "Parse error(#{line}): #{$!}\n"
|
|
76
|
+
end
|
data/samples/xmliter.rb
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
require 'xml/parser'
|
|
4
|
+
require 'nkf'
|
|
5
|
+
#require 'uconv'
|
|
6
|
+
|
|
7
|
+
class XMLRetry<Exception; end
|
|
8
|
+
|
|
9
|
+
xml = $<.read
|
|
10
|
+
parser = XML::Parser.new
|
|
11
|
+
def parser.default; end
|
|
12
|
+
def parser.unknownEncoding(e)
|
|
13
|
+
raise XMLRetry, e
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
begin
|
|
17
|
+
parser.parse(xml) do |type, name, data|
|
|
18
|
+
case type
|
|
19
|
+
when XML::Parser::START_ELEM
|
|
20
|
+
data.each do |key, value|
|
|
21
|
+
# print Uconv.u8toeuc("A#{key} CDATA #{value}\n")
|
|
22
|
+
print "A#{key} CDATA #{value}\n"
|
|
23
|
+
end
|
|
24
|
+
# print Uconv.u8toeuc("(#{name}\n")
|
|
25
|
+
print "(#{name}\n"
|
|
26
|
+
when XML::Parser::END_ELEM
|
|
27
|
+
# print Uconv.u8toeuc(")#{name}\n")
|
|
28
|
+
print ")#{name}\n"
|
|
29
|
+
when XML::Parser::CDATA
|
|
30
|
+
data.gsub!("\n", "\\n")
|
|
31
|
+
# print Uconv.u8toeuc("-#{data}\n")
|
|
32
|
+
print "-#{data}\n"
|
|
33
|
+
when XML::Parser::PI
|
|
34
|
+
data.gsub!("\n", "\\n")
|
|
35
|
+
# print Uconv.u8toeuc("?#{name} #{data}\n")
|
|
36
|
+
print "?#{name} #{data}\n"
|
|
37
|
+
else
|
|
38
|
+
next if data =~ /^<\?xml /
|
|
39
|
+
data.gsub!("\n", "\\n")
|
|
40
|
+
# print Uconv.u8toeuc("//#{data}\n")
|
|
41
|
+
print "//#{data}\n"
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
rescue XMLRetry
|
|
45
|
+
newencoding = nil
|
|
46
|
+
e = $!.to_s
|
|
47
|
+
if e =~ /^iso-2022-jp$/i
|
|
48
|
+
xml = NKF.nkf("-Je", xml)
|
|
49
|
+
newencoding = "EUC-JP"
|
|
50
|
+
end
|
|
51
|
+
parser = XML::Parser.new(newencoding)
|
|
52
|
+
def parser.default; end
|
|
53
|
+
retry
|
|
54
|
+
rescue XML::Parser::Error
|
|
55
|
+
line = parser.line
|
|
56
|
+
print "Parse error(#{line}): #{$!}\n"
|
|
57
|
+
end
|
data/samples/xmlstats.rb
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
#! /usr/local/bin/ruby
|
|
2
|
+
|
|
3
|
+
## Ruby version of xmlstats
|
|
4
|
+
## 1999 by yoshidam
|
|
5
|
+
##
|
|
6
|
+
## This sample comes from Clark Cooper's sample of Perl extension
|
|
7
|
+
## module XML::Parser.
|
|
8
|
+
## (http://www.netheaven.com/~coopercc/xmlparser/samples/xmlstats)
|
|
9
|
+
##
|
|
10
|
+
## Try XML benchmark (http://www.xml.com/xml/pub/Benchmark/article.html)!
|
|
11
|
+
## Ruby is probably faster than Perl.
|
|
12
|
+
|
|
13
|
+
require 'xml/parser'
|
|
14
|
+
begin
|
|
15
|
+
require 'mbstring'
|
|
16
|
+
rescue LoadError
|
|
17
|
+
class String
|
|
18
|
+
def mblength
|
|
19
|
+
cnt = self.length
|
|
20
|
+
self.scan(/([\300-\367])/n) do |c|
|
|
21
|
+
if c[0] < "\340"
|
|
22
|
+
cnt -= 1
|
|
23
|
+
elsif c[0] < "\360"
|
|
24
|
+
cnt -= 2
|
|
25
|
+
else
|
|
26
|
+
cnt -= 3
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
cnt
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
$KCODE="UTF8"
|
|
35
|
+
|
|
36
|
+
class Elinfo
|
|
37
|
+
attr :name
|
|
38
|
+
attr :count, true
|
|
39
|
+
attr :minlev, true
|
|
40
|
+
attr :seen, true
|
|
41
|
+
attr :chars, true
|
|
42
|
+
attr :empty, true
|
|
43
|
+
attr :ptab, true
|
|
44
|
+
attr :ktab, true
|
|
45
|
+
attr :atab, true
|
|
46
|
+
|
|
47
|
+
def initialize(name, seen)
|
|
48
|
+
@name = name
|
|
49
|
+
@count = 0
|
|
50
|
+
@minlev = nil
|
|
51
|
+
@seen = seen
|
|
52
|
+
@chars = 0
|
|
53
|
+
@empty = true
|
|
54
|
+
@ptab = {}
|
|
55
|
+
@ptab.default = 0
|
|
56
|
+
@ktab = {}
|
|
57
|
+
@ktab.default = 0
|
|
58
|
+
@atab = {}
|
|
59
|
+
@atab.default = 0
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def <=>(b)
|
|
63
|
+
ret = self.minlev - b.minlev
|
|
64
|
+
if ret == 0
|
|
65
|
+
return self.seen - b.seen
|
|
66
|
+
end
|
|
67
|
+
ret
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class StatParser < XML::Parser
|
|
72
|
+
def initialize(*rest)
|
|
73
|
+
@elements = {}
|
|
74
|
+
@seen = 0
|
|
75
|
+
@root = nil
|
|
76
|
+
@context = []
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def startElement(name, attr)
|
|
80
|
+
if (elinf = @elements[name]).nil?
|
|
81
|
+
@elements[name] = elinf = Elinfo.new(name, @seen += 1)
|
|
82
|
+
end
|
|
83
|
+
elinf.count += 1
|
|
84
|
+
|
|
85
|
+
pinf = @context[-1]
|
|
86
|
+
if pinf
|
|
87
|
+
elinf.ptab[pinf.name] += 1
|
|
88
|
+
pinf.ktab[name] += 1
|
|
89
|
+
pinf.empty = false
|
|
90
|
+
else
|
|
91
|
+
@root = name
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
attr.each_key do |key|
|
|
95
|
+
elinf.atab[key] += 1
|
|
96
|
+
end
|
|
97
|
+
@context.push(elinf)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def endElement(name)
|
|
101
|
+
@context.pop
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def character(data)
|
|
105
|
+
inf = @context[-1]
|
|
106
|
+
inf.empty = false
|
|
107
|
+
inf.chars += data.mblength
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def set_minlev(name, level)
|
|
111
|
+
name = @root if name.nil?
|
|
112
|
+
inf = @elements[name]
|
|
113
|
+
if inf.minlev.nil? or inf.minlev > level
|
|
114
|
+
newlev = level + 1
|
|
115
|
+
inf.minlev = level
|
|
116
|
+
inf.ktab.each_key do |key|
|
|
117
|
+
set_minlev(key, newlev)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def elinf_sort
|
|
123
|
+
@elements.sort { |(a_name, a_inf), (b_name, b_inf)|
|
|
124
|
+
a_inf <=> b_inf
|
|
125
|
+
}.each do |name, inf|
|
|
126
|
+
yield(name, inf)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def showtab(label, tab, dosum)
|
|
132
|
+
if tab.length == 0; return end
|
|
133
|
+
print "\n ", label, ":\n"
|
|
134
|
+
sum = 0
|
|
135
|
+
|
|
136
|
+
tab.sort.each do |name, cnt|
|
|
137
|
+
sum = sum + cnt
|
|
138
|
+
printf(" %-16s %5d\n", name, cnt)
|
|
139
|
+
end
|
|
140
|
+
if dosum and tab.length > 1
|
|
141
|
+
print " =====\n"
|
|
142
|
+
printf(" %5d\n", sum);
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
p = StatParser.new
|
|
147
|
+
begin
|
|
148
|
+
p.parse($<.read)
|
|
149
|
+
rescue XML::ParserError
|
|
150
|
+
print "#{$0}: #{$!} (in line #{p.line})\n"
|
|
151
|
+
exit 1
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
p.set_minlev(nil, 0)
|
|
155
|
+
p.elinf_sort do |name, elinf|
|
|
156
|
+
print "\n================\n"
|
|
157
|
+
print name, ": ", elinf.count, "\n"
|
|
158
|
+
if elinf.chars > 0
|
|
159
|
+
print "Had ", elinf.chars, " bytes of character data\n"
|
|
160
|
+
end
|
|
161
|
+
if elinf.empty
|
|
162
|
+
print "Always empty\n"
|
|
163
|
+
end
|
|
164
|
+
showtab("Parents", elinf.ptab, false)
|
|
165
|
+
showtab("Children", elinf.ktab, true)
|
|
166
|
+
showtab("Attributes", elinf.atab, false)
|
|
167
|
+
end
|