xmlparser 0.6.81
Sign up to get free protection for your applications and to get access to all the features.
- data/MANIFEST +112 -0
- data/README +697 -0
- data/README.ja +789 -0
- data/Rakefile +34 -0
- data/ext/encoding.h +91 -0
- data/ext/xmlparser/mkrf_conf.rb +28 -0
- data/ext/xmlparser/xmlparser.c +2226 -0
- data/lib/sax.rb +1 -0
- data/lib/saxdriver.rb +1 -0
- data/lib/wget.rb +47 -0
- data/lib/xml/dom/builder-ja.rb +58 -0
- data/lib/xml/dom/builder.rb +310 -0
- data/lib/xml/dom/core.rb +3276 -0
- data/lib/xml/dom/digest.rb +94 -0
- data/lib/xml/dom/visitor.rb +182 -0
- data/lib/xml/dom2/attr.rb +213 -0
- data/lib/xml/dom2/cdatasection.rb +76 -0
- data/lib/xml/dom2/characterdata.rb +177 -0
- data/lib/xml/dom2/comment.rb +81 -0
- data/lib/xml/dom2/core.rb +19 -0
- data/lib/xml/dom2/document.rb +317 -0
- data/lib/xml/dom2/documentfragment.rb +82 -0
- data/lib/xml/dom2/documenttype.rb +102 -0
- data/lib/xml/dom2/dombuilder.rb +277 -0
- data/lib/xml/dom2/dombuilderfilter.rb +12 -0
- data/lib/xml/dom2/domentityresolver.rb +13 -0
- data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
- data/lib/xml/dom2/domexception.rb +95 -0
- data/lib/xml/dom2/domimplementation.rb +61 -0
- data/lib/xml/dom2/dominputsource.rb +29 -0
- data/lib/xml/dom2/element.rb +533 -0
- data/lib/xml/dom2/entity.rb +110 -0
- data/lib/xml/dom2/entityreference.rb +107 -0
- data/lib/xml/dom2/namednodemap.rb +138 -0
- data/lib/xml/dom2/node.rb +587 -0
- data/lib/xml/dom2/nodelist.rb +231 -0
- data/lib/xml/dom2/notation.rb +86 -0
- data/lib/xml/dom2/processinginstruction.rb +155 -0
- data/lib/xml/dom2/text.rb +128 -0
- data/lib/xml/dom2/xpath.rb +398 -0
- data/lib/xml/encoding-ja.rb +42 -0
- data/lib/xml/parser.rb +13 -0
- data/lib/xml/parserns.rb +236 -0
- data/lib/xml/sax.rb +353 -0
- data/lib/xml/saxdriver.rb +370 -0
- data/lib/xml/xpath.rb +3284 -0
- data/lib/xml/xpath.ry +2352 -0
- data/lib/xmldigest.rb +1 -0
- data/lib/xmltree.rb +1 -0
- data/lib/xmltreebuilder.rb +1 -0
- data/lib/xmltreevisitor.rb +1 -0
- metadata +111 -0
data/MANIFEST
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
MANIFEST
|
2
|
+
xmlparser.c
|
3
|
+
Makefile
|
4
|
+
samples/xmlcheck.rb
|
5
|
+
samples/xmlevent.rb
|
6
|
+
samples/xmliter.rb
|
7
|
+
samples/index_euc.xml
|
8
|
+
samples/index_jis.xml
|
9
|
+
samples/index_sjis.xml
|
10
|
+
samples/index_u16.xml
|
11
|
+
samples/index_u8.xml
|
12
|
+
samples/treetest.rb
|
13
|
+
samples/buildertest.rb
|
14
|
+
samples/document.dtd
|
15
|
+
samples/xpointertest.rb
|
16
|
+
samples/index_noenc.xml
|
17
|
+
samples/gtktree.rb
|
18
|
+
samples/videolist.xml
|
19
|
+
samples/buildertest2.rb
|
20
|
+
samples/xmlcomments.rb
|
21
|
+
samples/visitortest.rb
|
22
|
+
samples/my-html.rb
|
23
|
+
samples/visitor.rb
|
24
|
+
samples/namespaces
|
25
|
+
samples/namespaces/namespace2.rb
|
26
|
+
samples/namespaces/namespace1.rb
|
27
|
+
samples/namespaces/namespace2.xml
|
28
|
+
samples/namespaces/namespace1.xml
|
29
|
+
samples/writer.rb
|
30
|
+
samples/idattrtest.rb
|
31
|
+
samples/digesttest.rb
|
32
|
+
samples/xpointer.rb
|
33
|
+
samples/digesttest2.rb
|
34
|
+
samples/xmlstats.rb
|
35
|
+
samples/saxtest.rb
|
36
|
+
samples/dtd/ext1.dtd
|
37
|
+
samples/dtd/ext2.dtd
|
38
|
+
samples/dtd/extdtd.xml
|
39
|
+
samples/dtd/extdtd.rb
|
40
|
+
samples/doctype.rb
|
41
|
+
samples/doctype.xml
|
42
|
+
samples/doctypei.rb
|
43
|
+
samples/expat-1.2
|
44
|
+
samples/expat-1.2/ext.ent
|
45
|
+
samples/expat-1.2/idtest.rb
|
46
|
+
samples/expat-1.2/idtest.xml
|
47
|
+
samples/expat-1.2/exttest.rb
|
48
|
+
samples/expat-1.2/exttesti.rb
|
49
|
+
samples/expat-1.2/hoge.dtd
|
50
|
+
samples/expat-1.2/xmlextparser.rb
|
51
|
+
README.ja
|
52
|
+
lib/xmltreebuilder.rb
|
53
|
+
lib/xmltree.rb
|
54
|
+
lib/xmltreevisitor.rb
|
55
|
+
lib/xmltreebuilder-ja.rb
|
56
|
+
lib/xmldigest.rb
|
57
|
+
lib/xmlencoding-ja.rb
|
58
|
+
lib/wget.rb
|
59
|
+
lib/sax.rb
|
60
|
+
lib/saxdriver.rb
|
61
|
+
extconf.rb
|
62
|
+
README
|
63
|
+
encoding.h
|
64
|
+
Encodings/euc-jp.enc
|
65
|
+
Encodings/shift_jis.enc
|
66
|
+
Encodings/README.ja
|
67
|
+
lib/xml/dom/digest.rb
|
68
|
+
lib/xml/dom/core.rb
|
69
|
+
lib/xml/dom/builder.rb
|
70
|
+
lib/xml/dom/visitor.rb
|
71
|
+
lib/xml/dom/builder-ja.rb
|
72
|
+
lib/xml/sax.rb
|
73
|
+
lib/xml/saxdriver.rb
|
74
|
+
lib/xml/parser.rb
|
75
|
+
lib/xml/encoding-ja.rb
|
76
|
+
lib/xml/dom2/document.rb
|
77
|
+
lib/xml/dom2/node.rb
|
78
|
+
lib/xml/dom2/namednodemap.rb
|
79
|
+
lib/xml/dom2/documentfragment.rb
|
80
|
+
lib/xml/dom2/cdatasection.rb
|
81
|
+
lib/xml/dom2/documenttype.rb
|
82
|
+
lib/xml/dom2/element.rb
|
83
|
+
lib/xml/dom2/attr.rb
|
84
|
+
lib/xml/dom2/comment.rb
|
85
|
+
lib/xml/dom2/characterdata.rb
|
86
|
+
lib/xml/dom2/notation.rb
|
87
|
+
lib/xml/dom2/entity.rb
|
88
|
+
lib/xml/dom2/entityreference.rb
|
89
|
+
lib/xml/dom2/text.rb
|
90
|
+
lib/xml/dom2/processinginstruction.rb
|
91
|
+
lib/xml/dom2/domexception.rb
|
92
|
+
lib/xml/dom2/domimplementation.rb
|
93
|
+
lib/xml/dom2/nodelist.rb
|
94
|
+
lib/xml/dom2/dombuilder.rb
|
95
|
+
lib/xml/dom2/domentityresolver.rb
|
96
|
+
lib/xml/dom2/core.rb
|
97
|
+
lib/xml/dom2/dominputsource.rb
|
98
|
+
lib/xml/dom2/dombuilderfilter.rb
|
99
|
+
lib/xml/dom2/domentityresolverimpl.rb
|
100
|
+
samples/dom2/dom2test2.rb
|
101
|
+
samples/dom2/test1.xml
|
102
|
+
samples/dom2/test2.xml
|
103
|
+
samples/dom2/dom2test1.rb
|
104
|
+
lib/xml/parserns.rb
|
105
|
+
samples/nstest.rb
|
106
|
+
samples/test/featurelist.rb
|
107
|
+
samples/test/skippedentity.rb
|
108
|
+
samples/test/useforeigndtd.rb
|
109
|
+
lib/xml/xpath.ry
|
110
|
+
lib/xml/xpath.rb
|
111
|
+
lib/xml/dom2/xpath.rb
|
112
|
+
samples/dom2/gtkxpath.rb
|
data/README
ADDED
@@ -0,0 +1,697 @@
|
|
1
|
+
Expat (XML Parser Toolkit) Module for Ruby
|
2
|
+
version 0.6.8
|
3
|
+
|
4
|
+
Yoshida Masato
|
5
|
+
<yoshidam@yoshidam.net>
|
6
|
+
|
7
|
+
- Introduction
|
8
|
+
|
9
|
+
This is a module to access to James Clark's XML Parser
|
10
|
+
Toolkit "expat" (http://www.jclark.com/xml/expat.html) from
|
11
|
+
Ruby.
|
12
|
+
|
13
|
+
Supported versions of expat are 1.95.0 or later
|
14
|
+
(http://sourceforge.net/projects/expat/).
|
15
|
+
|
16
|
+
|
17
|
+
- Installation
|
18
|
+
|
19
|
+
This can work with ruby-1.6. I recommend you to use
|
20
|
+
ruby-1.6.7 or later. And you need the source code of
|
21
|
+
expat-1.95.x.
|
22
|
+
|
23
|
+
First, compile expat. With expat-1.95.x, configure; make;
|
24
|
+
make install.
|
25
|
+
|
26
|
+
Then, compile xmlparser. You can specify the location of
|
27
|
+
expat's header file or library file.
|
28
|
+
|
29
|
+
--with-expat-lib=/path/to/expat/lib
|
30
|
+
--with-expat-include=/path/to/expat/inclide
|
31
|
+
|
32
|
+
If you want to use encoding maps of XML::Parser of Perl, set
|
33
|
+
the proper directory with --with-perl-enc-map option.
|
34
|
+
|
35
|
+
For example:
|
36
|
+
|
37
|
+
ruby extconf.rb --with-perl-enc-map=/usr/local/lib/XML/Parser/Encodings
|
38
|
+
make
|
39
|
+
make site-install
|
40
|
+
|
41
|
+
|
42
|
+
- Usage
|
43
|
+
|
44
|
+
If you do not link this module with Ruby statically,
|
45
|
+
|
46
|
+
require "xml/parser"
|
47
|
+
|
48
|
+
before using.
|
49
|
+
|
50
|
+
There is two styles to get parsing result. One is to
|
51
|
+
define instance methods as event handlers, another is
|
52
|
+
to use iterator.
|
53
|
+
|
54
|
+
To define event handlers is like SAX (Simple API for XML).
|
55
|
+
|
56
|
+
If you use event handlers, inherit XMLParser class and
|
57
|
+
define instance methods as event handlers.
|
58
|
+
Or you may use the instance of XMLParser class (or derived)
|
59
|
+
with singleton instance methods as event handlers.
|
60
|
+
|
61
|
+
When no event handlers are defined, this parser does
|
62
|
+
non-validating syntax checking only.
|
63
|
+
|
64
|
+
method name | event
|
65
|
+
-------------------------+---------------------------
|
66
|
+
startElement | element start tag
|
67
|
+
endElement | element end tag
|
68
|
+
character | character data
|
69
|
+
processingInstruction | processing instruction
|
70
|
+
unparsedEntityDecl | unparsed entity declaration(OBSOLETE)
|
71
|
+
notationDecl | notation declaration
|
72
|
+
externalEntityRef | external entity reference
|
73
|
+
comment | comment
|
74
|
+
startCdata | CDATA section start
|
75
|
+
endCdata | CDATA section end
|
76
|
+
startNamespaceDecl | Namespace declaration start
|
77
|
+
endNamespaceDecl | Namespace declaration end
|
78
|
+
startDoctypeDecl | DOCTYPE declaration start
|
79
|
+
endDoctypeDecl | DOCTYPE declaration end
|
80
|
+
notStandalone | document is not standalone
|
81
|
+
default | other data
|
82
|
+
defaultExpand | same as default (*1)
|
83
|
+
unknownEncoding | unknown character encoding
|
84
|
+
elementDecl | element declaration
|
85
|
+
attlistDecl | attlist declaration
|
86
|
+
xmlDecl | XML declaration
|
87
|
+
entityDecl | entity declaration
|
88
|
+
|
89
|
+
*1 inhibits expansion of internal entities. defaultExpand
|
90
|
+
have higher priority than default.
|
91
|
+
|
92
|
+
To use iterator is probably a ruby-ish manner.
|
93
|
+
|
94
|
+
If you use iterator, this parser ignores event handlers
|
95
|
+
even if they are defined.
|
96
|
+
The iterator evaluates the iterator block with three
|
97
|
+
variables, event type, name, and data.
|
98
|
+
|
99
|
+
event type | name | data
|
100
|
+
----------------------------------+-----------------+-------------------
|
101
|
+
START_ELEM | element name | hash of attributes
|
102
|
+
END_ELEM | element name | nil
|
103
|
+
CDATA | nil | string
|
104
|
+
PI | PI name | string
|
105
|
+
UNPARSED_ENTITY_DECL(OBSOLETE) | entity name | array (*1)
|
106
|
+
NOTATION_DECL | notation name | array (*2)
|
107
|
+
EXTERNAL_ENTITY_REF | entity names(*5)| array (*2)
|
108
|
+
COMMENT | nil | string
|
109
|
+
START_CDATA | nil | nil
|
110
|
+
END_CDATA | nil | nil
|
111
|
+
START_NAMESPACE_DECL | prefix | URI
|
112
|
+
END_NAMESPACE_DECL | prefix | nil
|
113
|
+
START_DOCTYPE_DECL | doctype name | nil
|
114
|
+
END_DOCTYPE_DECL | nil | nil
|
115
|
+
DEFAULT (*4) | nil | string
|
116
|
+
ELEMENT_DECL | element name | array (*8)
|
117
|
+
ATTLIST_DECL | element name | array (*9)
|
118
|
+
XML_DECL | nil | array (*10)
|
119
|
+
ENTITY_DECL | entity name | array (*11)
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
*1 [URL base, system ID, public ID, notation name]
|
124
|
+
URL base and notation name may be nil.
|
125
|
+
*2 [URL base, system ID, public ID]
|
126
|
+
URL base, system ID and public ID may be nil.
|
127
|
+
*4 defaultExpand enables this event
|
128
|
+
*5 It may be nil
|
129
|
+
*8 [type, quant, name, [...]]
|
130
|
+
*9 [attname, atttype, default, isrequired]
|
131
|
+
*10 [version, encoding, standalone]
|
132
|
+
*11 [isPE, value, system ID, public ID, notation name]
|
133
|
+
|
134
|
+
UNPARSED_ENTITY_DECL, NOTATION_DECL, EXTERNAL_ENTITY_REF,
|
135
|
+
COMMENT, START_CDATA, END_CDATA, START_NAMESPACE_DECL,
|
136
|
+
END_NAMESPACE_DECL, DEFAULT, ELEMENT_DECL, ATTLIST_DECL,
|
137
|
+
XML_DECL, ENTITY_DECL, EXTERNAL_PARSED_ENTITY_DECL and
|
138
|
+
INTERNAL_PARSED_ENTITY_DECL events are generated only if
|
139
|
+
each dummy methods, "unparsedEntityDecl", "notationDecl",
|
140
|
+
"externalEntityRef", "comment", "startCdata", "endCdata",
|
141
|
+
"startNamespaceDecl", "endNamespaceDecl", "default" (or
|
142
|
+
"defaultExpand"), "elementDecl", "attlistDecl", "xmlDecl",
|
143
|
+
"entityDecl", "externalParsedEntityDecl" and
|
144
|
+
"internalParsedEntityDecl" are defined.
|
145
|
+
|
146
|
+
Supported input character encodings are UTF-8 and UTF-16.
|
147
|
+
Output character encoding is UTF-8.
|
148
|
+
If XML_ENC_PATH is set on compiling, you can use the
|
149
|
+
encoding maps of XML::Parser of Perl. This package not
|
150
|
+
include them, you must get XML::Parser or XML::Encoding from
|
151
|
+
CPAN, and install .enc files into the proper directory.
|
152
|
+
|
153
|
+
|
154
|
+
XMLParser class:
|
155
|
+
|
156
|
+
Class method
|
157
|
+
new(encoding = nil, nssep = nil)
|
158
|
+
Create a XML parser object. A failure of the
|
159
|
+
creation raises a XMLParserError exception.
|
160
|
+
|
161
|
+
The "encoding" parameter can specify the character
|
162
|
+
encoding. Expat can recognize ISO-8859-1, UTF-8,
|
163
|
+
US-ASCII and UTF-16, and expat_ja can also EUC-JP and
|
164
|
+
Shift_JIS.
|
165
|
+
|
166
|
+
The "nssep" parameter enables namespace extension.
|
167
|
+
The namespace-prefixed element and attribute names are
|
168
|
+
concatenated with the namespace's URI and a separator
|
169
|
+
(the first byte of nssep).
|
170
|
+
|
171
|
+
For example, with nssep = '!',
|
172
|
+
|
173
|
+
<hoge:test xmlns:hoge="http://www.yoshidam.net/ns/hoge">
|
174
|
+
|
175
|
+
is parsed into
|
176
|
+
|
177
|
+
http://www.yoshidam.net/ns/hoge!test
|
178
|
+
|
179
|
+
The object that finish parsing cannot be reused,
|
180
|
+
so you must create a new one for every parsing.
|
181
|
+
|
182
|
+
new(parser, context, encoding = nil)
|
183
|
+
Create a XML parser object that can parse an external
|
184
|
+
general entity. A failure of the creation raises a
|
185
|
+
XMLParserError exception.
|
186
|
+
|
187
|
+
This can be called at any point after the first call
|
188
|
+
to an externalEntityRef event.
|
189
|
+
|
190
|
+
The "context" parameter can be passed from the parse
|
191
|
+
context of externalEntityRef event.
|
192
|
+
|
193
|
+
The "encoding" parameter can specify the character
|
194
|
+
encoding.
|
195
|
+
|
196
|
+
Call "reset" to be reused.
|
197
|
+
|
198
|
+
expatVersion()
|
199
|
+
Get expat version.
|
200
|
+
|
201
|
+
getFeatureList
|
202
|
+
Get a hash list of expat API's features.
|
203
|
+
|
204
|
+
This method is for expat-1.95.5 or later.
|
205
|
+
|
206
|
+
Method
|
207
|
+
parse(str, isFinal = true)
|
208
|
+
Parse a string. This method can be an
|
209
|
+
iterator. Parsing results can be processed by event
|
210
|
+
handlers or an iterator block.
|
211
|
+
|
212
|
+
"IsFinal" parameter must true on last call of this
|
213
|
+
method. Default is true. No parameter call of this
|
214
|
+
method indicates the end of parsing.
|
215
|
+
|
216
|
+
"Str" can be a stream object. It must be an object
|
217
|
+
with "gets" method. In this case, "isFinal" is
|
218
|
+
ignored, the parsing is repeated until the stream
|
219
|
+
returns nil.
|
220
|
+
|
221
|
+
A failure to parse raises a XMLParserError
|
222
|
+
exception.
|
223
|
+
|
224
|
+
done
|
225
|
+
Free the parser. Usually you can trust the GC, but
|
226
|
+
after parsing the external parameser entity, you must
|
227
|
+
free the parser in the externalEntityRef event.
|
228
|
+
|
229
|
+
defaultCurrent
|
230
|
+
Raise a "default" event within any event handlers or
|
231
|
+
an iterator block.
|
232
|
+
You can get the corresponding markup.
|
233
|
+
|
234
|
+
If within a event handler, it raise a default event
|
235
|
+
immediately. But within an iterator block, the next
|
236
|
+
yielding will be a DEFAULT event.
|
237
|
+
|
238
|
+
setBase
|
239
|
+
Set URL base. The setting value can get the parameter
|
240
|
+
'base' of the external entity methods, such as
|
241
|
+
unparsedEntityDecl.
|
242
|
+
|
243
|
+
line
|
244
|
+
column
|
245
|
+
byteIndex
|
246
|
+
Get current parsing location.
|
247
|
+
|
248
|
+
When a "parse" method raises XML::Parser::Error, these
|
249
|
+
method return the position of the error detected.
|
250
|
+
|
251
|
+
byteCount
|
252
|
+
Get the number of bytes in the current event.
|
253
|
+
|
254
|
+
When the event is is in an internal entity, this
|
255
|
+
method returns 0.
|
256
|
+
|
257
|
+
getSpecifiedAttributes
|
258
|
+
Check the attributes whether specified or defaulted.
|
259
|
+
|
260
|
+
Return value is a hash, the keys are the attribute's
|
261
|
+
name, the values are specified or not (boolean).
|
262
|
+
|
263
|
+
This method should be used in startElement event
|
264
|
+
handler.
|
265
|
+
|
266
|
+
setParamEntityParsing(parsing)
|
267
|
+
Controls parsing of parameter entities (including the
|
268
|
+
external DTD subset ).
|
269
|
+
|
270
|
+
"Parsing" parameter is
|
271
|
+
PARAM_ENTITY_PARSING_NEVER (0) or
|
272
|
+
PARAM_ENTITY_PARSING_UNLESS_STANDALONE (1) or
|
273
|
+
PARAM_ENTITY_PARSING_ALWAYS (2).
|
274
|
+
|
275
|
+
References to external parameter entities will invoke
|
276
|
+
the externalEntityRef event. The context will be nil.
|
277
|
+
|
278
|
+
setReturnNSTriplet(do_nst)
|
279
|
+
Sets namespace triplet flag.
|
280
|
+
|
281
|
+
It will work well for element names with Expat-1.95.3 or later
|
282
|
+
|
283
|
+
getInputContext
|
284
|
+
Returns the parser's input buffer and current parse
|
285
|
+
posion.
|
286
|
+
|
287
|
+
getIdAttribute
|
288
|
+
Gets the ID attribute name.
|
289
|
+
|
290
|
+
This method should be used in startElement event
|
291
|
+
handler.
|
292
|
+
|
293
|
+
reset(encoding)
|
294
|
+
Resets the parser object to be reused.
|
295
|
+
|
296
|
+
The "encoding" parameter can specify the character
|
297
|
+
encoding.
|
298
|
+
|
299
|
+
This method is for the expat-1.95.3 or later.
|
300
|
+
|
301
|
+
useForeignDTD(useDTD)
|
302
|
+
Specifies to parse an external DTD subset without the
|
303
|
+
DOCTYPE declaration.
|
304
|
+
|
305
|
+
In externalEntityRef, sysID and pubID will be NULL.
|
306
|
+
|
307
|
+
This method is for the expat-1.95.5 or later.
|
308
|
+
|
309
|
+
See setParamEntityParsing also.
|
310
|
+
|
311
|
+
|
312
|
+
Method (event handler)
|
313
|
+
startElement(name, attrs)
|
314
|
+
This method is called when element start tags are
|
315
|
+
detected.
|
316
|
+
"Name" is the element name, attrs is a hash of
|
317
|
+
attributes, the keys are the attribute's name, the
|
318
|
+
values are attribute's values.
|
319
|
+
|
320
|
+
endElement(name)
|
321
|
+
This method is called when element end tags are
|
322
|
+
detected.
|
323
|
+
"Name" is the element name.
|
324
|
+
|
325
|
+
character(data)
|
326
|
+
This method is called when texts or CDATA sections are
|
327
|
+
detected.
|
328
|
+
Internal entities are expanded as long as "default"
|
329
|
+
handler is not defined.
|
330
|
+
|
331
|
+
processingInstruction(target, data)
|
332
|
+
This method is called when processing instructions are
|
333
|
+
detected.
|
334
|
+
|
335
|
+
unparsedEntityDecl(entityName, base, systemId, publicId, notationName)
|
336
|
+
** OBSOLETE **
|
337
|
+
This methods is called when parsed entity declarations
|
338
|
+
are detected.
|
339
|
+
"EntityName", "base", "systemId", "publicId" and
|
340
|
+
"notationName" are the entity name, the URL base, the
|
341
|
+
system identifier, the public identifier and the
|
342
|
+
notation name.
|
343
|
+
The URL base and the notation name can be nil.
|
344
|
+
|
345
|
+
If you use iterator, this method is not called, but to
|
346
|
+
define this affects to cause UNPARSED_ENTITY_DECL
|
347
|
+
event.
|
348
|
+
|
349
|
+
notationDecl(notationName, base, systemId, publicId)
|
350
|
+
This methods is called when notation declarations are
|
351
|
+
detected.
|
352
|
+
"NotationName", "base", "systemId", and "publicId" are
|
353
|
+
the notation name, the URL base, the system identifier
|
354
|
+
and the public identifier.
|
355
|
+
The URL base, the system identifier and the public
|
356
|
+
identifier can be nil.
|
357
|
+
|
358
|
+
If you use iterator, this method is not called, but to
|
359
|
+
define this affects to cause NOTATION_DECL event.
|
360
|
+
|
361
|
+
externalEntityRef(context, base, systemId, publicId)
|
362
|
+
This methods is called when external entity references
|
363
|
+
are detected.
|
364
|
+
"context", "base", "systemId", and "publicId" are the
|
365
|
+
parsing context, the URL base, the system identifier
|
366
|
+
and the public identifier.
|
367
|
+
The URL base and the public identifier can be nil.
|
368
|
+
The context can use the 'context' parameter of the
|
369
|
+
constructor of the external entity parser.
|
370
|
+
|
371
|
+
If you do not parse the external entities by this
|
372
|
+
event, the external entities are never parsed.
|
373
|
+
|
374
|
+
If you use iterator, this method is not called, but to
|
375
|
+
define this affects to cause EXTERNAL_ENTITY_REF
|
376
|
+
event.
|
377
|
+
|
378
|
+
On expat-19990626 or later, it is called when external
|
379
|
+
parameter entity refs (including external DTD subset) are
|
380
|
+
detected. In this case, "context" will be nil. The
|
381
|
+
parser for the external parameter entitiy must be
|
382
|
+
created, "parse" and "done" in this event.
|
383
|
+
See setParamEntityParsing also.
|
384
|
+
|
385
|
+
comment(data)
|
386
|
+
This methods is called when comments are detected.
|
387
|
+
|
388
|
+
If you use iterator, this method is not called, but to
|
389
|
+
define this affects to cause COMMENT event.
|
390
|
+
|
391
|
+
startCdata()
|
392
|
+
This methods is called when CDATA sections start.
|
393
|
+
The contents of the CDATA sections are reported by
|
394
|
+
character event.
|
395
|
+
|
396
|
+
If you use iterator, this method is not called, but to
|
397
|
+
define this affects to cause START_CDATA event.
|
398
|
+
|
399
|
+
endCdata()
|
400
|
+
This methods is called when CDATA sections end.
|
401
|
+
|
402
|
+
If you use iterator, this method is not called, but to
|
403
|
+
define this affects to cause END_CDATA event.
|
404
|
+
|
405
|
+
startNamespaceDecl(prefix, uri)
|
406
|
+
This methods is called before the element that has
|
407
|
+
namespace declaration.
|
408
|
+
|
409
|
+
Prefix and uri can be nil.
|
410
|
+
|
411
|
+
If you use iterator, this method is not called, but to
|
412
|
+
define this affects to cause START_NAMESPACE_DECL
|
413
|
+
event.
|
414
|
+
|
415
|
+
endNamespaceDecl(prefix)
|
416
|
+
This methods is called after the element that has
|
417
|
+
namespace declaration.
|
418
|
+
|
419
|
+
Prefix can be nil.
|
420
|
+
|
421
|
+
If you use iterator, this method is not called, but to
|
422
|
+
define this affects to cause END_NAMESPACE_DECL event.
|
423
|
+
|
424
|
+
startDoctypeDecl(doctypeName, sysid, pubid, has_internal_subset)
|
425
|
+
This methods is called when the name of the DOCTYPE is
|
426
|
+
encountered.
|
427
|
+
|
428
|
+
If you use iterator, this method is not called, but to
|
429
|
+
define this affects to cause START_DOCTYPE_DECL event.
|
430
|
+
|
431
|
+
endDoctypeDecl()
|
432
|
+
This methods is called when the closing > is
|
433
|
+
encountered, but after processing any external subset.
|
434
|
+
|
435
|
+
If you use iterator, this method is not called, but to
|
436
|
+
define this affects to cause END_DOCTYPE_DECL event.
|
437
|
+
|
438
|
+
default(data)
|
439
|
+
This method is called when there is no applicable
|
440
|
+
event handler.
|
441
|
+
|
442
|
+
If this method is defined, expansion of internal
|
443
|
+
entities are inhibited.
|
444
|
+
|
445
|
+
If you use iterator, this method is not called, but to
|
446
|
+
define this affects to cause DEFAULT event and to
|
447
|
+
inhibit expansion of internal entities.
|
448
|
+
|
449
|
+
defaultExpand(data)
|
450
|
+
This method is called when there is no applicable
|
451
|
+
event handler.
|
452
|
+
|
453
|
+
If you use iterator, this method is not called, but to
|
454
|
+
define this affects to cause DEFAULT event.
|
455
|
+
|
456
|
+
This method have higher priority than default method.
|
457
|
+
|
458
|
+
unknownEncoding(name)
|
459
|
+
This method is called when unknown encoding is
|
460
|
+
detected.
|
461
|
+
|
462
|
+
XMLEncoding object (or nil to reject) must be returned.
|
463
|
+
|
464
|
+
Even if parse method is used as the iterator, this
|
465
|
+
method is called.
|
466
|
+
|
467
|
+
notStandalone()
|
468
|
+
This methods is called if the document is not standalone
|
469
|
+
(it has an external subset or a reference to a
|
470
|
+
parameter entity, but does not have standalone="yes").
|
471
|
+
|
472
|
+
If you may return 0 to raise an error, or return 1 to
|
473
|
+
continue the parsing.
|
474
|
+
|
475
|
+
Even if parse method is used as the iterator, this
|
476
|
+
method is called.
|
477
|
+
|
478
|
+
elementDecl(name, model)
|
479
|
+
|
480
|
+
If you use iterator, this method is not called, but to
|
481
|
+
define this affects to cause ELEMENT_DECL event.
|
482
|
+
|
483
|
+
attlistDecl(elname, attname, att_type, dflt, isrequired)
|
484
|
+
|
485
|
+
If you use iterator, this method is not called, but to
|
486
|
+
define this affects to cause ATTLIST_DECL event.
|
487
|
+
|
488
|
+
xmlDecl(version, encoding, standalone)
|
489
|
+
|
490
|
+
If you use iterator, this method is not called, but to
|
491
|
+
define this affects to cause XML_DECL event.
|
492
|
+
|
493
|
+
entityDecl(entityName, isparameter_entity, vale,
|
494
|
+
base, systenId, publicId, notationName)
|
495
|
+
|
496
|
+
If you use iterator, this method is not called, but to
|
497
|
+
define this affects to cause ENTITY_DECL event.
|
498
|
+
|
499
|
+
skippedEntity(entityName, is_parameter_entity)
|
500
|
+
|
501
|
+
This method is for expat-1.95.4.
|
502
|
+
|
503
|
+
If you use iterator, this method is not called, but to
|
504
|
+
define this affects to cause
|
505
|
+
SKIPPED_ENTITY event.
|
506
|
+
|
507
|
+
|
508
|
+
XMLEncoding class:
|
509
|
+
To convert the character encoding, you must define map and
|
510
|
+
convert method.
|
511
|
+
|
512
|
+
Method
|
513
|
+
map(code)
|
514
|
+
This method is called to define byte stream
|
515
|
+
information. Code is the first byte of stream, 00h to
|
516
|
+
ffh. You must return the following value.
|
517
|
+
|
518
|
+
>= 0 : treat as Unicode value
|
519
|
+
-1 : the byte sequence is malformed
|
520
|
+
-n (n>=2): n-byte sequence
|
521
|
+
|
522
|
+
convert(s)
|
523
|
+
This method is called to convert the byte sequence
|
524
|
+
into a Unicode.
|
525
|
+
The byte sequence is n-byte string (n is defined by
|
526
|
+
map), you must return an integer value (treat as
|
527
|
+
Unicode), an ASCII character or two byte string
|
528
|
+
(treat as a little endian UCS-2 character).
|
529
|
+
|
530
|
+
|
531
|
+
- Additional Library
|
532
|
+
|
533
|
+
XML::DOM module and XML::DOM::Builder module are
|
534
|
+
added since version 0.3.1.
|
535
|
+
These module are not well documented, and API specification is
|
536
|
+
not fixed, so they are for experts only.
|
537
|
+
|
538
|
+
|
539
|
+
XML::DOM module (xml/dom/core.rb)
|
540
|
+
This module is a library for making and manipulating XML
|
541
|
+
trees.
|
542
|
+
The APIs are like Document Object Model (DOM) Core of W3C.
|
543
|
+
|
544
|
+
Classes
|
545
|
+
NameNodeMap
|
546
|
+
NodeList
|
547
|
+
Node
|
548
|
+
DocumentFragment<Node
|
549
|
+
Document<Node
|
550
|
+
CharacterData<Node
|
551
|
+
Attr<Node
|
552
|
+
Element<Node
|
553
|
+
Text<CharacterData
|
554
|
+
Comment<Data
|
555
|
+
CDATASection<Text
|
556
|
+
DocumentType<Node
|
557
|
+
Notation<Node
|
558
|
+
Entity<Node
|
559
|
+
EntityReference<Node
|
560
|
+
ProcessingInstruction<Node
|
561
|
+
|
562
|
+
XML::DOM::Builder (xml/dom/builder.rb)
|
563
|
+
This module is a library for parsing XML file and building
|
564
|
+
XML tree.
|
565
|
+
|
566
|
+
XML::JapaneseTreeBuilder class (xml/dom/builder-ja.rb)
|
567
|
+
|
568
|
+
XML::DOM::Visitor (xml/dom/visitor.rb)
|
569
|
+
|
570
|
+
XMLEncoding_ja class (xml/encoding-ja.rb)
|
571
|
+
|
572
|
+
WGET module (wget.rb)
|
573
|
+
|
574
|
+
DOMHASH module (xml/dom/digest.rb)
|
575
|
+
|
576
|
+
SAX module (xml/sax.rb, xml/saxdriver.rb)
|
577
|
+
|
578
|
+
XML::ParserNS class (xml/parserns.rb)
|
579
|
+
|
580
|
+
XML::DOM (xml/dom2/)
|
581
|
+
An experimental implementation of DOM Level 2.
|
582
|
+
|
583
|
+
|
584
|
+
- Samples
|
585
|
+
|
586
|
+
These sample scripts are required the optional "uconv"
|
587
|
+
module for Japanese XML files.
|
588
|
+
|
589
|
+
xmlchack.rb - a sample for syntax checking
|
590
|
+
xmlevent.rb - a sample for defining event handlers
|
591
|
+
xmliter.rb - a sample for iterator
|
592
|
+
treetest.rb - a sample for XML::SimpleTree
|
593
|
+
buildertest.rb - a sample for XML::SimpleTreeBuilder
|
594
|
+
gtktree.rb - a sample with GTK
|
595
|
+
xmlcomment.rb - a sample comes from XML::Parser of Perl
|
596
|
+
visitortest.rb - a visitor sample comes from XML::Grove of Perl
|
597
|
+
my-html.rb - a visitor sample comes from XML::Grove of Perl
|
598
|
+
visitor.rb - a sample to access the tree like visitor.
|
599
|
+
namespaces/ - files to test namespaces
|
600
|
+
xpointer.rb - a simple application of XPointer with GTK
|
601
|
+
digesttest.rb - a sample for DOMHASH
|
602
|
+
digesttest2.rb - a sample for DOMHASH without DOM
|
603
|
+
saxtest.rb - a sample for SAX
|
604
|
+
|
605
|
+
|
606
|
+
- Copying
|
607
|
+
|
608
|
+
This extension module is copyrighted free software by
|
609
|
+
Yoshida Masato.
|
610
|
+
|
611
|
+
You can redistribute it and/or modify it under the same term as
|
612
|
+
Ruby or expat.
|
613
|
+
|
614
|
+
encoding.h and the functions of encoding map are part of
|
615
|
+
XML::Parser for Perl.
|
616
|
+
|
617
|
+
Copyright (c) 1998 Larry Wall and Clark Cooper.
|
618
|
+
All rights reserved.
|
619
|
+
This program is free software; you can redistribute it and/or modify it
|
620
|
+
under the same terms as Perl itself.
|
621
|
+
|
622
|
+
|
623
|
+
- Author
|
624
|
+
|
625
|
+
Yoshida Masato <yoshidam@yoshidam.net>
|
626
|
+
|
627
|
+
XPointer support is contributed by Masaki Fukushima
|
628
|
+
<fukusima@goto.info.waseda.ac.jp>
|
629
|
+
|
630
|
+
|
631
|
+
- History
|
632
|
+
|
633
|
+
Apr 5, 2004 version 0.6.8 fixes overflow, and taints output.
|
634
|
+
Sep 20, 2002 version 0.6.5 fixes reset
|
635
|
+
adds skippedEntity event for expat-1.95.4
|
636
|
+
adds XML::Parser.getFeatureList and
|
637
|
+
XML::Parser#useForeignDTD for expat-1.95.5
|
638
|
+
Jun 18, 2002 version 0.6.4 adds XML::Parser#reset for expat-1.95.3.
|
639
|
+
Mar 23, 2002 version 0.6.2 changes a layout under lib
|
640
|
+
changes the parent class of XML::Parser::Error
|
641
|
+
adds an experimental implementaion of DOM Level 2
|
642
|
+
adds XML::ParserNS (experimental)
|
643
|
+
Oct 15 2000 version 0.6.1 support expat-1.95.0 and expat-1.2
|
644
|
+
Aug 5, 2000 version 0.5.19 RDize xmltree.rb by TAKAHASHI Masayoshi
|
645
|
+
May 30, 2000 version 0.5.18 fix for Ruby 1.5
|
646
|
+
Oct 14, 1999 version 0.5.16 change some samples
|
647
|
+
Aug 18, 1999 version 0.5.15 support start/endDoctypeDecl event of
|
648
|
+
expat-19990728.
|
649
|
+
fix SAX driver bug.
|
650
|
+
Jun 29, 1999 version 0.5.14 support to parse external
|
651
|
+
parameter entities for
|
652
|
+
expat-19990626
|
653
|
+
Jun 10, 1999 version 0.5.13 support experimental SAX driver
|
654
|
+
support expat-1.1
|
655
|
+
May 13, 1999 version 0.5.12 fix extconf.rb bug
|
656
|
+
Apr 28, 1999 version 0.5.11 for expat-19990425, add NotStandalone
|
657
|
+
event, getSpecifiedAttributes method
|
658
|
+
and byteCount method
|
659
|
+
Apr 20, 1999 version 0.5.10 change xmldigest.rb for xss4j
|
660
|
+
Mar 29, 1999 version 0.5.9 change the object structure for Ruby 1.3
|
661
|
+
Mar 23, 1999 version 0.5.8 support the omission of keywords of XPointer
|
662
|
+
support to parse external parsed entities in
|
663
|
+
XML::DOM::Builder
|
664
|
+
Mar 8, 1999 version 0.5.7 support start/endNamespaceDecl event of
|
665
|
+
expat-19990307.
|
666
|
+
Jan 25, 1999 version 0.5.6 class name aliases are defined in C module.
|
667
|
+
support cygwin.
|
668
|
+
Jan 14, 1999 version 0.5.5 support start/endCdataSection event of
|
669
|
+
expat-19981231
|
670
|
+
Jan 13, 1999 version 0.5.4 modify xmltree and xmltreebuilder
|
671
|
+
Jan 10, 1999 version 0.5.3 encoding map support
|
672
|
+
Dec 1, 1998 version 0.5.1 fix some bugs
|
673
|
+
Nov 24, 1998 version 0.5.0 support the test version of expat
|
674
|
+
Nov 5, 1998 version 0.4.18 fix some bugs, class name alias
|
675
|
+
XMLParserErorr -> XML::Parser::Error
|
676
|
+
and change some internal functions.
|
677
|
+
Oct 28, 1998 version 0.4.17 mIDs are stored into static vars
|
678
|
+
Oct 28, 1998 version 0.4.16 change ID attribute support of XPointer.
|
679
|
+
Node#trim is now xml:space-aware
|
680
|
+
Oct 23, 1998 version 0.4.15 fix some bugs, add class name alias
|
681
|
+
XMLParser -> XML::Parser
|
682
|
+
XML::SimpleTree -> XML::DOM
|
683
|
+
XML::SimpleTreeBuilder -> XML::DOM::Builder
|
684
|
+
Oct 20, 1998 version 0.4.14 add better XPointer support by Masaki Fukushima
|
685
|
+
Sep 17, 1998 version 0.4.5 add methods to SimpleTree
|
686
|
+
Sep 8, 1998 version 0.4.4 change parser object type from
|
687
|
+
T_DATA to T_OBJECT (now can use
|
688
|
+
instance variables)
|
689
|
+
Sep 3, 1998 version 0.4.3 add isFinal flag, and stream
|
690
|
+
parsing facility
|
691
|
+
Sep 2, 1998 version 0.4.2 add external entity event and parser
|
692
|
+
Aug 14, 1998 version 0.3.3 support expat 1.0
|
693
|
+
Aug 12, 1998 version 0.3.2
|
694
|
+
Aug 4, 1998 version 0.3.1
|
695
|
+
Jul 17, 1998 version 0.3
|
696
|
+
Jul 3, 1998 version 0.2
|
697
|
+
Jul 1, 1998 version 0.1
|