oga 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -0
- data/doc/changelog.md +108 -0
- data/ext/c/lexer.c +63 -48
- data/ext/java/org/liboga/xml/Lexer.java +87 -101
- data/ext/ragel/base_lexer.rl +8 -0
- data/lib/oga.rb +7 -1
- data/lib/oga/html/sax_parser.rb +18 -0
- data/lib/oga/oga.rb +30 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/cdata.rb +0 -7
- data/lib/oga/xml/comment.rb +0 -7
- data/lib/oga/xml/doctype.rb +0 -7
- data/lib/oga/xml/document.rb +7 -1
- data/lib/oga/xml/element.rb +43 -18
- data/lib/oga/xml/html_void_elements.rb +28 -0
- data/lib/oga/xml/lexer.rb +1 -26
- data/lib/oga/xml/node.rb +0 -7
- data/lib/oga/xml/parser.rb +34 -2
- data/lib/oga/xml/pull_parser.rb +17 -3
- data/lib/oga/xml/sax_parser.rb +63 -0
- data/lib/oga/xml/text.rb +1 -6
- data/lib/oga/xml/xml_declaration.rb +0 -7
- data/lib/oga/xpath/evaluator.rb +3 -2
- data/lib/oga/xpath/lexer.rb +75 -71
- data/lib/oga/xpath/parser.rb +65 -60
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7e6862730c7a4e17048c73f5d3656060cdbd1cb
|
4
|
+
data.tar.gz: bd9b3215aeff301a46606b63911f60ffc246aed5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 531e732ec31ddfd9f2f94d9ba422154607a8901881126cba42ca790e09e06ac3e8992ad5e47dee866c6723c583d2b939bdcb1708cd81bd6844c854f57d9c6c8f
|
7
|
+
data.tar.gz: cb46703bcabaf49ddc23ff568730f01c3120dedb20804c8107f6616713fafeb6825a2e80a13618b5f31b37d41ab8aac960680f4a5f5de16229af1684ca5ea65d
|
data/README.md
CHANGED
@@ -44,6 +44,26 @@ Parsing an IO handle using the pull parser:
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
+
Parse a string of XML using the SAX parser:
|
48
|
+
|
49
|
+
class ElementNames
|
50
|
+
attr_reader :names
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
@names = []
|
54
|
+
end
|
55
|
+
|
56
|
+
def on_element(namespace, name, attrs = {})
|
57
|
+
@names << name
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
handler = ElementNames.new
|
62
|
+
|
63
|
+
Oga.sax_parse_xml(handler, '<foo><bar></bar></foo>')
|
64
|
+
|
65
|
+
handler.names # => ["foo", "bar"]
|
66
|
+
|
47
67
|
Querying a document using XPath:
|
48
68
|
|
49
69
|
document = Oga.parse_xml('<people><person>Alice</person></people>')
|
@@ -71,6 +91,7 @@ Querying a document using a namespace:
|
|
71
91
|
* Support for parsing XML and HTML(5)
|
72
92
|
* DOM parsing
|
73
93
|
* Stream/pull parsing
|
94
|
+
* SAX parsing
|
74
95
|
* Low memory footprint
|
75
96
|
* High performance, if something doesn't perform well enough it's a bug
|
76
97
|
* Support for XPath 1.0
|
data/doc/changelog.md
CHANGED
@@ -1,5 +1,113 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
This document contains details of the various releases and their release dates.
|
4
|
+
Dates are in the format `yyyy-mm-dd`.
|
5
|
+
|
6
|
+
## 0.1.2 - 2014-09-23
|
7
|
+
|
8
|
+
### SAX API
|
9
|
+
|
10
|
+
A SAX parser/API has been added. This API is useful when even the overhead of
|
11
|
+
the pull-parser is too much memory wise. Example:
|
12
|
+
|
13
|
+
class ElementNames
|
14
|
+
attr_reader :names
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@names = []
|
18
|
+
end
|
19
|
+
|
20
|
+
def on_element(namespace, name, attrs = {})
|
21
|
+
@names << name
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
handler = ElementNames.new
|
26
|
+
|
27
|
+
Oga.sax_parse_xml(handler, '<foo><bar></bar></foo>')
|
28
|
+
|
29
|
+
handler.names # => ["foo", "bar"]
|
30
|
+
|
31
|
+
### Racc Gem
|
32
|
+
|
33
|
+
Oga will now always use the Racc gem instead of the version shipped with the
|
34
|
+
Ruby standard library.
|
35
|
+
|
36
|
+
### Error Reporting
|
37
|
+
|
38
|
+
XML parser errors have been made a little bit more user friendly, though they
|
39
|
+
can still be quite cryptic.
|
40
|
+
|
41
|
+
### Serializing Elements
|
42
|
+
|
43
|
+
Elements serialized to XML/HTML will use self-closing tags whenever possible.
|
44
|
+
When parsing HTML documents only HTML void elements will use self-closing tags
|
45
|
+
(e.g. `<link>` tags). Example:
|
46
|
+
|
47
|
+
Oga.parse_xml('<foo></foo>').to_xml # => "<foo />"
|
48
|
+
Oga.parse_html('<script></script>').to_xml # => "<script></script>"
|
49
|
+
|
50
|
+
### Default Namespaces
|
51
|
+
|
52
|
+
Namespaces are no longer removed from the attributes list when an element is
|
53
|
+
created.
|
54
|
+
|
55
|
+
Default XML namespaces can now be registered using `xmlns="..."`. Previously
|
56
|
+
this would be ignored. Example:
|
57
|
+
|
58
|
+
document = Oga.parse_xml('<root xmlns="baz"></root>')
|
59
|
+
root = document.children[0]
|
60
|
+
|
61
|
+
root.namespace # => Namespace(name: "xmlns" uri: "baz")
|
62
|
+
|
63
|
+
### Lexing Incomplete Input
|
64
|
+
|
65
|
+
Oga can now lex input such as `</` without entering an infinite loop. Example:
|
66
|
+
|
67
|
+
Oga.parse_xml('</') # => Document(children: NodeSet(Text("</")))
|
68
|
+
|
69
|
+
### Absolute XPath Paths
|
70
|
+
|
71
|
+
Oga can now parse and evaluate the XPath expression "/" (that is, just "/").
|
72
|
+
This will return the root node (usually a Document instance). Example:
|
73
|
+
|
74
|
+
document = Oga.parse_xml('<root></root>')
|
75
|
+
|
76
|
+
document.xpath('/') # => NodeSet(Document(children: NodeSet(Element(name: "root"))))
|
77
|
+
|
78
|
+
### Namespace Ordering
|
79
|
+
|
80
|
+
Namespaces available to an element are now returned in the correct order.
|
81
|
+
Previously outer namespaces would take precedence over inner namespaces, instead
|
82
|
+
of it being the other way around. Example:
|
83
|
+
|
84
|
+
document = Oga.parse_xml <<-EOF
|
85
|
+
<root xmlns:foo="bar">
|
86
|
+
<container xmlns:foo="baz">
|
87
|
+
<foo:text>Text!</foo:text>
|
88
|
+
</container>
|
89
|
+
</root>
|
90
|
+
EOF
|
91
|
+
|
92
|
+
foo = document.at_xpath('root/container/foo:text')
|
93
|
+
|
94
|
+
foo.namespace # => Namespace(name: "foo" uri: "baz")
|
95
|
+
|
96
|
+
### Parsing Capitalized HTML Void Elements
|
97
|
+
|
98
|
+
Oga is now capable of parsing capitalized HTML void elements (e.g. `<BR>`).
|
99
|
+
Previously it could only parse lower-cased void elements. Thanks to Tero Tasanen
|
100
|
+
for fixing this. Example:
|
101
|
+
|
102
|
+
Oga.parse_html('<BR>') # => Document(children: NodeSet(Element(name: "BR")))
|
103
|
+
|
104
|
+
### Node Type Method Removed
|
105
|
+
|
106
|
+
The `node_type` method has been removed and its purpose has been moved into
|
107
|
+
the `XML::PullParser` class itself. This method was solely used by the pull
|
108
|
+
parser to provide shorthands for node classes. As such it doesn't make sense to
|
109
|
+
expose this as a method to the outside world as a public method.
|
110
|
+
|
3
111
|
## 0.1.1 - 2014-09-13
|
4
112
|
|
5
113
|
This release fixes a problem where element attributes were not separated by
|
data/ext/c/lexer.c
CHANGED
@@ -181,6 +181,7 @@ _again:
|
|
181
181
|
case 64: goto st64;
|
182
182
|
case 65: goto st65;
|
183
183
|
case 66: goto st66;
|
184
|
+
case 67: goto st67;
|
184
185
|
default: break;
|
185
186
|
}
|
186
187
|
|
@@ -252,13 +253,13 @@ tr51:
|
|
252
253
|
cs = 35;
|
253
254
|
#line 1 "NONE"
|
254
255
|
{ switch( act ) {
|
255
|
-
case
|
256
|
+
case 26:
|
256
257
|
{{p = ((te))-1;}
|
257
258
|
callback_simple("on_xml_decl_start");
|
258
259
|
cs = 55;
|
259
260
|
}
|
260
261
|
break;
|
261
|
-
case
|
262
|
+
case 29:
|
262
263
|
{{p = ((te))-1;}
|
263
264
|
callback_simple("on_proc_ins_start");
|
264
265
|
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
@@ -291,7 +292,7 @@ st35:
|
|
291
292
|
case 35:
|
292
293
|
#line 1 "NONE"
|
293
294
|
{ts = p;}
|
294
|
-
#line
|
295
|
+
#line 296 "ext/c/lexer.c"
|
295
296
|
if ( (*p) == 60 )
|
296
297
|
goto tr45;
|
297
298
|
goto tr44;
|
@@ -303,7 +304,7 @@ st36:
|
|
303
304
|
if ( ++p == pe )
|
304
305
|
goto _test_eof36;
|
305
306
|
case 36:
|
306
|
-
#line
|
307
|
+
#line 308 "ext/c/lexer.c"
|
307
308
|
switch( (*p) ) {
|
308
309
|
case 33: goto st1;
|
309
310
|
case 45: goto tr48;
|
@@ -556,19 +557,19 @@ tr27:
|
|
556
557
|
#line 1 "NONE"
|
557
558
|
{te = p+1;}
|
558
559
|
#line 90 "ext/ragel/base_lexer.rl"
|
559
|
-
{act =
|
560
|
+
{act = 29;}
|
560
561
|
goto st38;
|
561
562
|
tr54:
|
562
563
|
#line 1 "NONE"
|
563
564
|
{te = p+1;}
|
564
565
|
#line 182 "ext/ragel/base_lexer.rl"
|
565
|
-
{act =
|
566
|
+
{act = 26;}
|
566
567
|
goto st38;
|
567
568
|
st38:
|
568
569
|
if ( ++p == pe )
|
569
570
|
goto _test_eof38;
|
570
571
|
case 38:
|
571
|
-
#line
|
572
|
+
#line 573 "ext/c/lexer.c"
|
572
573
|
switch( (*p) ) {
|
573
574
|
case 45: goto tr27;
|
574
575
|
case 95: goto tr27;
|
@@ -644,7 +645,7 @@ st41:
|
|
644
645
|
case 41:
|
645
646
|
#line 1 "NONE"
|
646
647
|
{ts = p;}
|
647
|
-
#line
|
648
|
+
#line 649 "ext/c/lexer.c"
|
648
649
|
if ( (*p) == 63 )
|
649
650
|
goto st42;
|
650
651
|
goto tr55;
|
@@ -709,7 +710,7 @@ st43:
|
|
709
710
|
case 43:
|
710
711
|
#line 1 "NONE"
|
711
712
|
{ts = p;}
|
712
|
-
#line
|
713
|
+
#line 714 "ext/c/lexer.c"
|
713
714
|
switch( (*p) ) {
|
714
715
|
case 9: goto tr59;
|
715
716
|
case 32: goto tr59;
|
@@ -764,7 +765,7 @@ st44:
|
|
764
765
|
if ( ++p == pe )
|
765
766
|
goto _test_eof44;
|
766
767
|
case 44:
|
767
|
-
#line
|
768
|
+
#line 769 "ext/c/lexer.c"
|
768
769
|
switch( (*p) ) {
|
769
770
|
case 45: goto tr60;
|
770
771
|
case 95: goto tr60;
|
@@ -1010,7 +1011,7 @@ st55:
|
|
1010
1011
|
case 55:
|
1011
1012
|
#line 1 "NONE"
|
1012
1013
|
{ts = p;}
|
1013
|
-
#line
|
1014
|
+
#line 1015 "ext/c/lexer.c"
|
1014
1015
|
switch( (*p) ) {
|
1015
1016
|
case 34: goto tr77;
|
1016
1017
|
case 39: goto tr78;
|
@@ -1035,7 +1036,7 @@ st56:
|
|
1035
1036
|
if ( ++p == pe )
|
1036
1037
|
goto _test_eof56;
|
1037
1038
|
case 56:
|
1038
|
-
#line
|
1039
|
+
#line 1040 "ext/c/lexer.c"
|
1039
1040
|
if ( (*p) == 34 )
|
1040
1041
|
goto tr36;
|
1041
1042
|
goto st29;
|
@@ -1054,7 +1055,7 @@ st57:
|
|
1054
1055
|
if ( ++p == pe )
|
1055
1056
|
goto _test_eof57;
|
1056
1057
|
case 57:
|
1057
|
-
#line
|
1058
|
+
#line 1059 "ext/c/lexer.c"
|
1058
1059
|
if ( (*p) == 39 )
|
1059
1060
|
goto tr36;
|
1060
1061
|
goto st30;
|
@@ -1111,7 +1112,7 @@ st60:
|
|
1111
1112
|
case 60:
|
1112
1113
|
#line 1 "NONE"
|
1113
1114
|
{ts = p;}
|
1114
|
-
#line
|
1115
|
+
#line 1116 "ext/c/lexer.c"
|
1115
1116
|
switch( (*p) ) {
|
1116
1117
|
case 45: goto st61;
|
1117
1118
|
case 95: goto st61;
|
@@ -1195,7 +1196,7 @@ st62:
|
|
1195
1196
|
case 62:
|
1196
1197
|
#line 1 "NONE"
|
1197
1198
|
{ts = p;}
|
1198
|
-
#line
|
1199
|
+
#line 1200 "ext/c/lexer.c"
|
1199
1200
|
switch( (*p) ) {
|
1200
1201
|
case 9: goto tr87;
|
1201
1202
|
case 10: goto tr38;
|
@@ -1266,23 +1267,7 @@ case 34:
|
|
1266
1267
|
goto st0;
|
1267
1268
|
tr96:
|
1268
1269
|
cs = 64;
|
1269
|
-
#line
|
1270
|
-
{ switch( act ) {
|
1271
|
-
case 0:
|
1272
|
-
{{goto st0;}}
|
1273
|
-
break;
|
1274
|
-
case 23:
|
1275
|
-
{{p = ((te))-1;}
|
1276
|
-
callback("on_text", data, encoding, ts, te);
|
1277
|
-
cs = 35;
|
1278
|
-
}
|
1279
|
-
break;
|
1280
|
-
}
|
1281
|
-
}
|
1282
|
-
goto _again;
|
1283
|
-
tr97:
|
1284
|
-
cs = 64;
|
1285
|
-
#line 306 "ext/ragel/base_lexer.rl"
|
1270
|
+
#line 314 "ext/ragel/base_lexer.rl"
|
1286
1271
|
{te = p;p--;{
|
1287
1272
|
callback("on_text", data, encoding, ts, te);
|
1288
1273
|
cs = 35;
|
@@ -1290,7 +1275,7 @@ tr97:
|
|
1290
1275
|
goto _again;
|
1291
1276
|
tr98:
|
1292
1277
|
cs = 64;
|
1293
|
-
#line
|
1278
|
+
#line 304 "ext/ragel/base_lexer.rl"
|
1294
1279
|
{te = p+1;{
|
1295
1280
|
callback("on_text", data, encoding, ts, mark);
|
1296
1281
|
|
@@ -1300,49 +1285,52 @@ tr98:
|
|
1300
1285
|
cs = 35;
|
1301
1286
|
}}
|
1302
1287
|
goto _again;
|
1288
|
+
tr99:
|
1289
|
+
cs = 64;
|
1290
|
+
#line 297 "ext/ragel/base_lexer.rl"
|
1291
|
+
{te = p+1;{
|
1292
|
+
callback("on_text", data, encoding, ts, te);
|
1293
|
+
|
1294
|
+
cs = 35;
|
1295
|
+
}}
|
1296
|
+
goto _again;
|
1303
1297
|
st64:
|
1304
1298
|
#line 1 "NONE"
|
1305
1299
|
{ts = 0;}
|
1306
|
-
#line 1 "NONE"
|
1307
|
-
{act = 0;}
|
1308
1300
|
if ( ++p == pe )
|
1309
1301
|
goto _test_eof64;
|
1310
1302
|
case 64:
|
1311
1303
|
#line 1 "NONE"
|
1312
1304
|
{ts = p;}
|
1313
|
-
#line
|
1305
|
+
#line 1306 "ext/c/lexer.c"
|
1314
1306
|
if ( (*p) == 60 )
|
1315
1307
|
goto tr95;
|
1316
1308
|
goto tr94;
|
1317
1309
|
tr94:
|
1318
|
-
#line
|
1319
|
-
{te = p+1;}
|
1320
|
-
#line 296 "ext/ragel/base_lexer.rl"
|
1310
|
+
#line 304 "ext/ragel/base_lexer.rl"
|
1321
1311
|
{ mark = p; }
|
1322
|
-
#line 306 "ext/ragel/base_lexer.rl"
|
1323
|
-
{act = 23;}
|
1324
1312
|
goto st65;
|
1325
1313
|
st65:
|
1326
1314
|
if ( ++p == pe )
|
1327
1315
|
goto _test_eof65;
|
1328
1316
|
case 65:
|
1329
|
-
#line
|
1317
|
+
#line 1318 "ext/c/lexer.c"
|
1330
1318
|
if ( (*p) == 60 )
|
1331
|
-
goto
|
1319
|
+
goto tr97;
|
1332
1320
|
goto tr94;
|
1333
|
-
|
1334
|
-
#line
|
1321
|
+
tr97:
|
1322
|
+
#line 304 "ext/ragel/base_lexer.rl"
|
1335
1323
|
{ mark = p; }
|
1336
1324
|
goto st66;
|
1337
1325
|
st66:
|
1338
1326
|
if ( ++p == pe )
|
1339
1327
|
goto _test_eof66;
|
1340
1328
|
case 66:
|
1341
|
-
#line
|
1329
|
+
#line 1330 "ext/c/lexer.c"
|
1342
1330
|
switch( (*p) ) {
|
1343
1331
|
case 33: goto tr98;
|
1344
1332
|
case 45: goto tr98;
|
1345
|
-
case 60: goto
|
1333
|
+
case 60: goto tr97;
|
1346
1334
|
case 63: goto tr98;
|
1347
1335
|
case 95: goto tr98;
|
1348
1336
|
}
|
@@ -1355,6 +1343,31 @@ case 66:
|
|
1355
1343
|
} else
|
1356
1344
|
goto tr98;
|
1357
1345
|
goto tr94;
|
1346
|
+
tr95:
|
1347
|
+
#line 304 "ext/ragel/base_lexer.rl"
|
1348
|
+
{ mark = p; }
|
1349
|
+
goto st67;
|
1350
|
+
st67:
|
1351
|
+
if ( ++p == pe )
|
1352
|
+
goto _test_eof67;
|
1353
|
+
case 67:
|
1354
|
+
#line 1355 "ext/c/lexer.c"
|
1355
|
+
switch( (*p) ) {
|
1356
|
+
case 33: goto tr99;
|
1357
|
+
case 45: goto tr99;
|
1358
|
+
case 60: goto tr97;
|
1359
|
+
case 63: goto tr99;
|
1360
|
+
case 95: goto tr99;
|
1361
|
+
}
|
1362
|
+
if ( (*p) < 65 ) {
|
1363
|
+
if ( 47 <= (*p) && (*p) <= 57 )
|
1364
|
+
goto tr99;
|
1365
|
+
} else if ( (*p) > 90 ) {
|
1366
|
+
if ( 97 <= (*p) && (*p) <= 122 )
|
1367
|
+
goto tr99;
|
1368
|
+
} else
|
1369
|
+
goto tr99;
|
1370
|
+
goto tr94;
|
1358
1371
|
}
|
1359
1372
|
_test_eof35: cs = 35; goto _test_eof;
|
1360
1373
|
_test_eof36: cs = 36; goto _test_eof;
|
@@ -1422,6 +1435,7 @@ case 66:
|
|
1422
1435
|
_test_eof64: cs = 64; goto _test_eof;
|
1423
1436
|
_test_eof65: cs = 65; goto _test_eof;
|
1424
1437
|
_test_eof66: cs = 66; goto _test_eof;
|
1438
|
+
_test_eof67: cs = 67; goto _test_eof;
|
1425
1439
|
|
1426
1440
|
_test_eof: {}
|
1427
1441
|
if ( p == eof )
|
@@ -1477,7 +1491,8 @@ case 66:
|
|
1477
1491
|
case 61: goto tr85;
|
1478
1492
|
case 63: goto tr92;
|
1479
1493
|
case 65: goto tr96;
|
1480
|
-
case 66: goto
|
1494
|
+
case 66: goto tr96;
|
1495
|
+
case 67: goto tr96;
|
1481
1496
|
}
|
1482
1497
|
}
|
1483
1498
|
|
@@ -43,15 +43,14 @@ public class Lexer extends RubyObject
|
|
43
43
|
private static byte[] init__java_lexer_actions_0()
|
44
44
|
{
|
45
45
|
return new byte [] {
|
46
|
-
0, 1, 0, 1, 1, 1,
|
47
|
-
6, 1,
|
48
|
-
14, 1, 15, 1, 16, 1, 17, 1, 18, 1,
|
49
|
-
20, 1, 21, 1, 22, 1, 23, 1, 24, 1,
|
50
|
-
26, 1, 27, 1, 28, 1, 29, 1, 30, 1,
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
4, 36, 3, 4, 0, 31
|
46
|
+
0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1,
|
47
|
+
5, 1, 6, 1, 9, 1, 10, 1, 11, 1, 12, 1,
|
48
|
+
13, 1, 14, 1, 15, 1, 16, 1, 17, 1, 18, 1,
|
49
|
+
19, 1, 20, 1, 21, 1, 22, 1, 23, 1, 24, 1,
|
50
|
+
25, 1, 26, 1, 27, 1, 28, 1, 29, 1, 30, 1,
|
51
|
+
31, 1, 32, 1, 35, 1, 36, 1, 37, 1, 38, 1,
|
52
|
+
39, 1, 40, 1, 41, 1, 42, 1, 43, 1, 44, 2,
|
53
|
+
3, 7, 2, 3, 8, 2, 3, 33, 2, 3, 34
|
55
54
|
};
|
56
55
|
}
|
57
56
|
|
@@ -66,7 +65,7 @@ private static short[] init__java_lexer_key_offsets_0()
|
|
66
65
|
51, 60, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69,
|
67
66
|
70, 81, 83, 91, 100, 109, 110, 111, 127, 135, 144, 153,
|
68
67
|
162, 171, 180, 189, 198, 207, 216, 225, 236, 237, 238, 246,
|
69
|
-
247, 255, 264, 281, 290, 291, 292
|
68
|
+
247, 255, 264, 281, 290, 291, 292, 303
|
70
69
|
};
|
71
70
|
}
|
72
71
|
|
@@ -101,7 +100,8 @@ private static char[] init__java_lexer_trans_keys_0()
|
|
101
100
|
9, 10, 13, 32, 34, 39, 45, 47, 61, 62, 95, 48,
|
102
101
|
57, 65, 90, 97, 122, 45, 58, 95, 48, 57, 65, 90,
|
103
102
|
97, 122, 60, 60, 33, 45, 60, 63, 95, 47, 57, 65,
|
104
|
-
90, 97, 122,
|
103
|
+
90, 97, 122, 33, 45, 60, 63, 95, 47, 57, 65, 90,
|
104
|
+
97, 122, 0
|
105
105
|
};
|
106
106
|
}
|
107
107
|
|
@@ -116,7 +116,7 @@ private static byte[] init__java_lexer_single_lengths_0()
|
|
116
116
|
3, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
117
117
|
5, 2, 2, 3, 3, 1, 1, 10, 2, 3, 3, 3,
|
118
118
|
3, 3, 3, 3, 3, 3, 3, 5, 1, 1, 2, 1,
|
119
|
-
2, 3, 11, 3, 1, 1, 5
|
119
|
+
2, 3, 11, 3, 1, 1, 5, 5
|
120
120
|
};
|
121
121
|
}
|
122
122
|
|
@@ -131,7 +131,7 @@ private static byte[] init__java_lexer_range_lengths_0()
|
|
131
131
|
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
132
132
|
3, 0, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3,
|
133
133
|
3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 3, 0,
|
134
|
-
3, 3, 3, 3, 0, 0, 3
|
134
|
+
3, 3, 3, 3, 0, 0, 3, 3
|
135
135
|
};
|
136
136
|
}
|
137
137
|
|
@@ -146,7 +146,7 @@ private static short[] init__java_lexer_index_offsets_0()
|
|
146
146
|
68, 75, 77, 79, 80, 82, 84, 86, 88, 90, 92, 94,
|
147
147
|
96, 105, 108, 114, 121, 128, 130, 132, 146, 152, 159, 166,
|
148
148
|
173, 180, 187, 194, 201, 208, 215, 222, 231, 233, 235, 241,
|
149
|
-
243, 249, 256, 271, 278, 280, 282
|
149
|
+
243, 249, 256, 271, 278, 280, 282, 291
|
150
150
|
};
|
151
151
|
}
|
152
152
|
|
@@ -179,8 +179,9 @@ private static byte[] init__java_lexer_indicies_0()
|
|
179
179
|
82, 83, 81, 84, 84, 84, 84, 84, 39, 84, 86, 84,
|
180
180
|
84, 84, 84, 85, 87, 38, 88, 87, 40, 42, 89, 90,
|
181
181
|
87, 91, 89, 89, 89, 89, 39, 89, 93, 89, 89, 89,
|
182
|
-
89, 92, 95, 94,
|
183
|
-
98, 98, 94,
|
182
|
+
89, 92, 95, 94, 97, 94, 98, 98, 97, 98, 98, 98,
|
183
|
+
98, 98, 94, 99, 99, 97, 99, 99, 99, 99, 99, 94,
|
184
|
+
0
|
184
185
|
};
|
185
186
|
}
|
186
187
|
|
@@ -197,8 +198,8 @@ private static byte[] init__java_lexer_trans_targs_0()
|
|
197
198
|
35, 24, 35, 35, 35, 40, 38, 41, 42, 41, 41, 43,
|
198
199
|
44, 43, 45, 50, 27, 43, 43, 46, 47, 48, 49, 44,
|
199
200
|
51, 52, 53, 54, 55, 56, 57, 58, 59, 55, 55, 55,
|
200
|
-
61, 60, 60, 62, 31, 63, 34, 62, 62, 62, 65,
|
201
|
-
64, 64, 64
|
201
|
+
61, 60, 60, 62, 31, 63, 34, 62, 62, 62, 65, 67,
|
202
|
+
64, 66, 64, 64
|
202
203
|
};
|
203
204
|
}
|
204
205
|
|
@@ -210,13 +211,13 @@ private static byte[] init__java_lexer_trans_actions_0()
|
|
210
211
|
return new byte [] {
|
211
212
|
79, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0,
|
212
213
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65,
|
213
|
-
0, 0, 69,
|
214
|
+
0, 0, 69, 92, 0, 0, 17, 0, 0, 15, 37, 0,
|
214
215
|
29, 0, 45, 0, 0, 49, 0, 53, 71, 7, 77, 0,
|
215
|
-
67, 0, 73, 81, 75, 0,
|
216
|
-
|
216
|
+
67, 0, 73, 81, 75, 0, 89, 11, 0, 13, 9, 19,
|
217
|
+
86, 21, 0, 0, 0, 25, 23, 0, 0, 0, 0, 83,
|
217
218
|
0, 0, 0, 0, 31, 7, 7, 0, 0, 35, 33, 27,
|
218
|
-
0, 41, 39, 43, 0, 0, 0, 51, 55, 47,
|
219
|
-
61, 59, 57
|
219
|
+
0, 41, 39, 43, 0, 0, 0, 51, 55, 47, 1, 1,
|
220
|
+
61, 1, 59, 57
|
220
221
|
};
|
221
222
|
}
|
222
223
|
|
@@ -231,7 +232,7 @@ private static byte[] init__java_lexer_to_state_actions_0()
|
|
231
232
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
|
232
233
|
0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
|
233
234
|
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
|
234
|
-
3, 0, 3, 0,
|
235
|
+
3, 0, 3, 0, 3, 0, 0, 0
|
235
236
|
};
|
236
237
|
}
|
237
238
|
|
@@ -246,7 +247,7 @@ private static byte[] init__java_lexer_from_state_actions_0()
|
|
246
247
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
|
247
248
|
0, 0, 0, 0, 0, 5, 0, 5, 0, 0, 0, 0,
|
248
249
|
0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0,
|
249
|
-
5, 0, 5, 0, 5, 0, 0
|
250
|
+
5, 0, 5, 0, 5, 0, 0, 0
|
250
251
|
};
|
251
252
|
}
|
252
253
|
|
@@ -261,7 +262,7 @@ private static short[] init__java_lexer_eof_trans_0()
|
|
261
262
|
1, 0, 0, 0, 0, 35, 35, 0, 0, 0, 0, 0,
|
262
263
|
47, 51, 52, 53, 53, 0, 58, 0, 66, 67, 67, 67,
|
263
264
|
67, 67, 67, 67, 67, 67, 67, 0, 82, 82, 83, 82,
|
264
|
-
0, 86, 0, 93, 0, 97,
|
265
|
+
0, 86, 0, 93, 0, 97, 97, 97
|
265
266
|
};
|
266
267
|
}
|
267
268
|
|
@@ -344,7 +345,7 @@ static final int java_lexer_en_main = 35;
|
|
344
345
|
int eof = data.length;
|
345
346
|
|
346
347
|
|
347
|
-
// line
|
348
|
+
// line 349 "ext/java/org/liboga/xml/Lexer.java"
|
348
349
|
{
|
349
350
|
int _klen;
|
350
351
|
int _trans = 0;
|
@@ -369,11 +370,11 @@ case 1:
|
|
369
370
|
_nacts = (int) _java_lexer_actions[_acts++];
|
370
371
|
while ( _nacts-- > 0 ) {
|
371
372
|
switch ( _java_lexer_actions[_acts++] ) {
|
372
|
-
case
|
373
|
+
case 2:
|
373
374
|
// line 1 "NONE"
|
374
375
|
{ts = p;}
|
375
376
|
break;
|
376
|
-
// line
|
377
|
+
// line 378 "ext/java/org/liboga/xml/Lexer.java"
|
377
378
|
}
|
378
379
|
}
|
379
380
|
|
@@ -438,14 +439,14 @@ case 3:
|
|
438
439
|
switch ( _java_lexer_actions[_acts++] )
|
439
440
|
{
|
440
441
|
case 0:
|
441
|
-
// line
|
442
|
+
// line 304 "ext/ragel/base_lexer.rl"
|
442
443
|
{ mark = p; }
|
443
444
|
break;
|
444
|
-
case
|
445
|
+
case 3:
|
445
446
|
// line 1 "NONE"
|
446
447
|
{te = p+1;}
|
447
448
|
break;
|
448
|
-
case
|
449
|
+
case 4:
|
449
450
|
// line 100 "ext/ragel/base_lexer.rl"
|
450
451
|
{te = p+1;{
|
451
452
|
callback("on_text", data, encoding, mark, ts);
|
@@ -454,52 +455,52 @@ case 3:
|
|
454
455
|
( this.cs) = 35;
|
455
456
|
}}
|
456
457
|
break;
|
457
|
-
case
|
458
|
+
case 5:
|
458
459
|
// line 107 "ext/ragel/base_lexer.rl"
|
459
460
|
{te = p+1;}
|
460
461
|
break;
|
461
|
-
case
|
462
|
+
case 6:
|
462
463
|
// line 107 "ext/ragel/base_lexer.rl"
|
463
464
|
{te = p;p--;}
|
464
465
|
break;
|
465
|
-
case
|
466
|
+
case 7:
|
466
467
|
// line 148 "ext/ragel/base_lexer.rl"
|
467
468
|
{( this.act) = 3;}
|
468
469
|
break;
|
469
|
-
case
|
470
|
+
case 8:
|
470
471
|
// line 165 "ext/ragel/base_lexer.rl"
|
471
472
|
{( this.act) = 7;}
|
472
473
|
break;
|
473
|
-
case
|
474
|
+
case 9:
|
474
475
|
// line 154 "ext/ragel/base_lexer.rl"
|
475
476
|
{te = p+1;{
|
476
477
|
callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
|
477
478
|
}}
|
478
479
|
break;
|
479
|
-
case
|
480
|
+
case 10:
|
480
481
|
// line 124 "ext/ragel/base_lexer.rl"
|
481
482
|
{te = p+1;{
|
482
483
|
callback("on_string", data, encoding, ts + 1, te - 1);
|
483
484
|
}}
|
484
485
|
break;
|
485
|
-
case
|
486
|
+
case 11:
|
486
487
|
// line 163 "ext/ragel/base_lexer.rl"
|
487
488
|
{te = p+1;}
|
488
489
|
break;
|
489
|
-
case
|
490
|
+
case 12:
|
490
491
|
// line 169 "ext/ragel/base_lexer.rl"
|
491
492
|
{te = p+1;{
|
492
493
|
callback_simple("on_doctype_end");
|
493
494
|
( this.cs) = 35;
|
494
495
|
}}
|
495
496
|
break;
|
496
|
-
case
|
497
|
+
case 13:
|
497
498
|
// line 165 "ext/ragel/base_lexer.rl"
|
498
499
|
{te = p;p--;{
|
499
500
|
callback("on_doctype_name", data, encoding, ts, te);
|
500
501
|
}}
|
501
502
|
break;
|
502
|
-
case
|
503
|
+
case 14:
|
503
504
|
// line 1 "NONE"
|
504
505
|
{ switch( ( this.act) ) {
|
505
506
|
case 3:
|
@@ -515,98 +516,102 @@ case 3:
|
|
515
516
|
}
|
516
517
|
}
|
517
518
|
break;
|
518
|
-
case
|
519
|
+
case 15:
|
519
520
|
// line 189 "ext/ragel/base_lexer.rl"
|
520
521
|
{te = p+1;{
|
521
522
|
callback_simple("on_xml_decl_end");
|
522
523
|
( this.cs) = 35;
|
523
524
|
}}
|
524
525
|
break;
|
525
|
-
case
|
526
|
+
case 16:
|
526
527
|
// line 124 "ext/ragel/base_lexer.rl"
|
527
528
|
{te = p+1;{
|
528
529
|
callback("on_string", data, encoding, ts + 1, te - 1);
|
529
530
|
}}
|
530
531
|
break;
|
531
|
-
case
|
532
|
+
case 17:
|
532
533
|
// line 201 "ext/ragel/base_lexer.rl"
|
533
534
|
{te = p+1;}
|
534
535
|
break;
|
535
|
-
case
|
536
|
+
case 18:
|
536
537
|
// line 195 "ext/ragel/base_lexer.rl"
|
537
538
|
{te = p;p--;{
|
538
539
|
callback("on_attribute", data, encoding, ts, te);
|
539
540
|
}}
|
540
541
|
break;
|
541
|
-
case
|
542
|
+
case 19:
|
542
543
|
// line 201 "ext/ragel/base_lexer.rl"
|
543
544
|
{te = p;p--;}
|
544
545
|
break;
|
545
|
-
case
|
546
|
+
case 20:
|
546
547
|
// line 201 "ext/ragel/base_lexer.rl"
|
547
548
|
{{p = ((te))-1;}}
|
548
549
|
break;
|
549
|
-
case
|
550
|
+
case 21:
|
550
551
|
// line 228 "ext/ragel/base_lexer.rl"
|
551
552
|
{te = p+1;{
|
552
553
|
callback("on_element_ns", data, encoding, ts, te - 1);
|
553
554
|
}}
|
554
555
|
break;
|
555
|
-
case
|
556
|
+
case 22:
|
556
557
|
// line 232 "ext/ragel/base_lexer.rl"
|
557
558
|
{te = p;p--;{
|
558
559
|
callback("on_element_name", data, encoding, ts, te);
|
559
560
|
( this.cs) = 62;
|
560
561
|
}}
|
561
562
|
break;
|
562
|
-
case
|
563
|
+
case 23:
|
563
564
|
// line 241 "ext/ragel/base_lexer.rl"
|
564
565
|
{te = p+1;}
|
565
566
|
break;
|
566
|
-
case
|
567
|
+
case 24:
|
567
568
|
// line 243 "ext/ragel/base_lexer.rl"
|
568
569
|
{te = p+1;{
|
569
570
|
callback_simple("advance_line");
|
570
571
|
}}
|
571
572
|
break;
|
572
|
-
case
|
573
|
+
case 25:
|
573
574
|
// line 248 "ext/ragel/base_lexer.rl"
|
574
575
|
{te = p+1;{
|
575
576
|
callback("on_attribute_ns", data, encoding, ts, te - 1);
|
576
577
|
}}
|
577
578
|
break;
|
578
|
-
case
|
579
|
+
case 26:
|
579
580
|
// line 124 "ext/ragel/base_lexer.rl"
|
580
581
|
{te = p+1;{
|
581
582
|
callback("on_string", data, encoding, ts + 1, te - 1);
|
582
583
|
}}
|
583
584
|
break;
|
584
|
-
case
|
585
|
+
case 27:
|
585
586
|
// line 260 "ext/ragel/base_lexer.rl"
|
586
587
|
{te = p+1;{
|
587
588
|
callback_simple("on_element_open_end");
|
588
589
|
( this.cs) = 35;
|
589
590
|
}}
|
590
591
|
break;
|
591
|
-
case
|
592
|
+
case 28:
|
592
593
|
// line 266 "ext/ragel/base_lexer.rl"
|
593
594
|
{te = p+1;{
|
594
595
|
callback_simple("on_element_end");
|
595
596
|
( this.cs) = 35;
|
596
597
|
}}
|
597
598
|
break;
|
598
|
-
case
|
599
|
+
case 29:
|
599
600
|
// line 252 "ext/ragel/base_lexer.rl"
|
600
601
|
{te = p;p--;{
|
601
602
|
callback("on_attribute", data, encoding, ts, te);
|
602
603
|
}}
|
603
604
|
break;
|
604
|
-
case
|
605
|
-
// line
|
606
|
-
{
|
605
|
+
case 30:
|
606
|
+
// line 297 "ext/ragel/base_lexer.rl"
|
607
|
+
{te = p+1;{
|
608
|
+
callback("on_text", data, encoding, ts, te);
|
609
|
+
|
610
|
+
( this.cs) = 35;
|
611
|
+
}}
|
607
612
|
break;
|
608
|
-
case
|
609
|
-
// line
|
613
|
+
case 31:
|
614
|
+
// line 304 "ext/ragel/base_lexer.rl"
|
610
615
|
{te = p+1;{
|
611
616
|
callback("on_text", data, encoding, ts, mark);
|
612
617
|
|
@@ -616,49 +621,34 @@ case 3:
|
|
616
621
|
( this.cs) = 35;
|
617
622
|
}}
|
618
623
|
break;
|
619
|
-
case
|
620
|
-
// line
|
624
|
+
case 32:
|
625
|
+
// line 314 "ext/ragel/base_lexer.rl"
|
621
626
|
{te = p;p--;{
|
622
627
|
callback("on_text", data, encoding, ts, te);
|
623
628
|
( this.cs) = 35;
|
624
629
|
}}
|
625
630
|
break;
|
626
|
-
case
|
627
|
-
// line 1 "NONE"
|
628
|
-
{ switch( ( this.act) ) {
|
629
|
-
case 0:
|
630
|
-
{{( this.cs) = 0; _goto_targ = 2; if (true) continue _goto;}}
|
631
|
-
break;
|
632
|
-
case 23:
|
633
|
-
{{p = ((te))-1;}
|
634
|
-
callback("on_text", data, encoding, ts, te);
|
635
|
-
( this.cs) = 35;
|
636
|
-
}
|
637
|
-
break;
|
638
|
-
}
|
639
|
-
}
|
640
|
-
break;
|
641
|
-
case 35:
|
631
|
+
case 33:
|
642
632
|
// line 182 "ext/ragel/base_lexer.rl"
|
643
|
-
{( this.act) =
|
633
|
+
{( this.act) = 26;}
|
644
634
|
break;
|
645
|
-
case
|
635
|
+
case 34:
|
646
636
|
// line 90 "ext/ragel/base_lexer.rl"
|
647
|
-
{( this.act) =
|
637
|
+
{( this.act) = 29;}
|
648
638
|
break;
|
649
|
-
case
|
639
|
+
case 35:
|
650
640
|
// line 56 "ext/ragel/base_lexer.rl"
|
651
641
|
{te = p+1;{
|
652
642
|
callback("on_comment", data, encoding, ts + 4, te - 3);
|
653
643
|
}}
|
654
644
|
break;
|
655
|
-
case
|
645
|
+
case 36:
|
656
646
|
// line 72 "ext/ragel/base_lexer.rl"
|
657
647
|
{te = p+1;{
|
658
648
|
callback("on_cdata", data, encoding, ts + 9, te - 3);
|
659
649
|
}}
|
660
650
|
break;
|
661
|
-
case
|
651
|
+
case 37:
|
662
652
|
// line 216 "ext/ragel/base_lexer.rl"
|
663
653
|
{te = p+1;{
|
664
654
|
callback_simple("on_element_start");
|
@@ -666,27 +656,27 @@ case 3:
|
|
666
656
|
( this.cs) = 60;
|
667
657
|
}}
|
668
658
|
break;
|
669
|
-
case
|
659
|
+
case 38:
|
670
660
|
// line 222 "ext/ragel/base_lexer.rl"
|
671
661
|
{te = p+1;{
|
672
662
|
callback_simple("on_element_end");
|
673
663
|
}}
|
674
664
|
break;
|
675
|
-
case
|
665
|
+
case 39:
|
676
666
|
// line 280 "ext/ragel/base_lexer.rl"
|
677
667
|
{te = p+1;{
|
678
668
|
p--;
|
679
669
|
( this.cs) = 64;
|
680
670
|
}}
|
681
671
|
break;
|
682
|
-
case
|
672
|
+
case 40:
|
683
673
|
// line 140 "ext/ragel/base_lexer.rl"
|
684
674
|
{te = p;p--;{
|
685
675
|
callback_simple("on_doctype_start");
|
686
676
|
( this.cs) = 43;
|
687
677
|
}}
|
688
678
|
break;
|
689
|
-
case
|
679
|
+
case 41:
|
690
680
|
// line 90 "ext/ragel/base_lexer.rl"
|
691
681
|
{te = p;p--;{
|
692
682
|
callback_simple("on_proc_ins_start");
|
@@ -697,30 +687,30 @@ case 3:
|
|
697
687
|
( this.cs) = 41;
|
698
688
|
}}
|
699
689
|
break;
|
700
|
-
case
|
690
|
+
case 42:
|
701
691
|
// line 280 "ext/ragel/base_lexer.rl"
|
702
692
|
{te = p;p--;{
|
703
693
|
p--;
|
704
694
|
( this.cs) = 64;
|
705
695
|
}}
|
706
696
|
break;
|
707
|
-
case
|
697
|
+
case 43:
|
708
698
|
// line 280 "ext/ragel/base_lexer.rl"
|
709
699
|
{{p = ((te))-1;}{
|
710
700
|
p--;
|
711
701
|
( this.cs) = 64;
|
712
702
|
}}
|
713
703
|
break;
|
714
|
-
case
|
704
|
+
case 44:
|
715
705
|
// line 1 "NONE"
|
716
706
|
{ switch( ( this.act) ) {
|
717
|
-
case
|
707
|
+
case 26:
|
718
708
|
{{p = ((te))-1;}
|
719
709
|
callback_simple("on_xml_decl_start");
|
720
710
|
( this.cs) = 55;
|
721
711
|
}
|
722
712
|
break;
|
723
|
-
case
|
713
|
+
case 29:
|
724
714
|
{{p = ((te))-1;}
|
725
715
|
callback_simple("on_proc_ins_start");
|
726
716
|
callback("on_proc_ins_name", data, encoding, ts + 2, te);
|
@@ -733,7 +723,7 @@ case 3:
|
|
733
723
|
}
|
734
724
|
}
|
735
725
|
break;
|
736
|
-
// line
|
726
|
+
// line 727 "ext/java/org/liboga/xml/Lexer.java"
|
737
727
|
}
|
738
728
|
}
|
739
729
|
}
|
@@ -747,11 +737,7 @@ case 2:
|
|
747
737
|
// line 1 "NONE"
|
748
738
|
{ts = -1;}
|
749
739
|
break;
|
750
|
-
|
751
|
-
// line 1 "NONE"
|
752
|
-
{( this.act) = 0;}
|
753
|
-
break;
|
754
|
-
// line 755 "ext/java/org/liboga/xml/Lexer.java"
|
740
|
+
// line 741 "ext/java/org/liboga/xml/Lexer.java"
|
755
741
|
}
|
756
742
|
}
|
757
743
|
|