oga 2.14 → 3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -11
- data/ext/c/lexer.c +39 -27
- data/ext/java/org/liboga/xml/Lexer.java +56 -53
- data/ext/ragel/base_lexer.rl +1 -1
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/lexer.rb +5 -1
- data/lib/oga/xml/querying.rb +13 -4
- data/lib/oga/xml/to_xml.rb +2 -0
- data/lib/oga/xpath/compiler.rb +24 -4
- data/oga.gemspec +1 -2
- metadata +7 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c63e4b4004bf4af6d575ffe513b552cba8282fb259db82423b500f8360f3292a
|
4
|
+
data.tar.gz: 1e872c97b653e35fef9b7ad0178c07b3976a2bc0c98c324536ae9e3fff077e49
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac793a9b79020aeffd99458a5200077c366d939d69ab9284dfc4a16be7f32cc2f469d4c75a0cf7d539f39033e5ba423688009d7da366e33fbb275b688bc23277
|
7
|
+
data.tar.gz: 490d98fbef2aa6c98f22f6815c4fc16da1573f53d4a686877707f5e57cdd1e6995451deee1521c480e5956624c0fa003470cdb2c4b2a3944a6fb0d52babf852f
|
data/README.md
CHANGED
@@ -172,9 +172,9 @@ Querying a document using a namespace:
|
|
172
172
|
|
173
173
|
| Ruby | Required | Recommended |
|
174
174
|
|:---------|:--------------|:------------|
|
175
|
-
| MRI | >=
|
176
|
-
| Rubinius | >= 2.2 | >= 2.2.10 |
|
175
|
+
| MRI | >= 2.3.0 | >= 2.6.0 |
|
177
176
|
| JRuby | >= 1.7 | >= 1.7.12 |
|
177
|
+
| Rubinius | Not supported | |
|
178
178
|
| Maglev | Not supported | |
|
179
179
|
| Topaz | Not supported | |
|
180
180
|
| mruby | Not supported | |
|
@@ -227,15 +227,14 @@ And if you want to specify an explicit namespace URI, you can use this:
|
|
227
227
|
|
228
228
|
descendant::*[local-name() = "bar" and namespace-uri() = "http://example.com"]
|
229
229
|
|
230
|
-
|
231
|
-
That is,
|
230
|
+
Like Nokogiri, Oga provides a way to create "dynamic" namespaces.
|
231
|
+
That is, Oga allows one to query the above document as following:
|
232
232
|
|
233
|
-
document =
|
233
|
+
document = Oga.parse_xml('<root xmlns="http://example.com"><bar>bar</bar></root>')
|
234
234
|
|
235
|
-
document.xpath('x:root/x:bar', :x => 'http://example.com')
|
235
|
+
document.xpath('x:root/x:bar', namespaces: {'x' => 'http://example.com'})
|
236
236
|
|
237
|
-
|
238
|
-
queries. Because Oga assigns the name "xmlns" to default namespaces you can use
|
237
|
+
Moreover, because Oga assigns the name "xmlns" to default namespaces you can use
|
239
238
|
this in your XPath queries:
|
240
239
|
|
241
240
|
document = Oga.parse_xml('<root xmlns="http://example.com"><bar>bar</bar></root>')
|
@@ -246,9 +245,6 @@ When using this you can still restrict the query to the correct namespace URI:
|
|
246
245
|
|
247
246
|
document.xpath('xmlns:root[namespace-uri() = "http://example.com"]/xmlns:bar')
|
248
247
|
|
249
|
-
In the future I might add an API to ease this process, although at this time I
|
250
|
-
have little interest in providing an API similar to Nokogiri.
|
251
|
-
|
252
248
|
## HTML5 Support
|
253
249
|
|
254
250
|
Oga fully supports HTML5 including the omission of certain tags. For example,
|
data/ext/c/lexer.c
CHANGED
@@ -1179,6 +1179,8 @@ case 57:
|
|
1179
1179
|
case 83: goto st65;
|
1180
1180
|
case 91: goto tr88;
|
1181
1181
|
case 96: goto st0;
|
1182
|
+
case 112: goto st60;
|
1183
|
+
case 115: goto st65;
|
1182
1184
|
}
|
1183
1185
|
if ( (*p) < 58 ) {
|
1184
1186
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1208,7 +1210,7 @@ st58:
|
|
1208
1210
|
if ( ++p == pe )
|
1209
1211
|
goto _test_eof58;
|
1210
1212
|
case 58:
|
1211
|
-
#line
|
1213
|
+
#line 1214 "ext/c/lexer.c"
|
1212
1214
|
switch( (*p) ) {
|
1213
1215
|
case 47: goto tr89;
|
1214
1216
|
case 96: goto tr89;
|
@@ -1243,6 +1245,7 @@ case 60:
|
|
1243
1245
|
case 47: goto tr91;
|
1244
1246
|
case 85: goto st61;
|
1245
1247
|
case 96: goto tr91;
|
1248
|
+
case 117: goto st61;
|
1246
1249
|
}
|
1247
1250
|
if ( (*p) < 58 ) {
|
1248
1251
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1264,6 +1267,7 @@ case 61:
|
|
1264
1267
|
case 47: goto tr91;
|
1265
1268
|
case 66: goto st62;
|
1266
1269
|
case 96: goto tr91;
|
1270
|
+
case 98: goto st62;
|
1267
1271
|
}
|
1268
1272
|
if ( (*p) < 58 ) {
|
1269
1273
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1285,6 +1289,7 @@ case 62:
|
|
1285
1289
|
case 47: goto tr91;
|
1286
1290
|
case 76: goto st63;
|
1287
1291
|
case 96: goto tr91;
|
1292
|
+
case 108: goto st63;
|
1288
1293
|
}
|
1289
1294
|
if ( (*p) < 58 ) {
|
1290
1295
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1306,6 +1311,7 @@ case 63:
|
|
1306
1311
|
case 47: goto tr91;
|
1307
1312
|
case 73: goto st64;
|
1308
1313
|
case 96: goto tr91;
|
1314
|
+
case 105: goto st64;
|
1309
1315
|
}
|
1310
1316
|
if ( (*p) < 58 ) {
|
1311
1317
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1327,6 +1333,7 @@ case 64:
|
|
1327
1333
|
case 47: goto tr91;
|
1328
1334
|
case 67: goto tr96;
|
1329
1335
|
case 96: goto tr91;
|
1336
|
+
case 99: goto tr96;
|
1330
1337
|
}
|
1331
1338
|
if ( (*p) < 58 ) {
|
1332
1339
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1348,6 +1355,7 @@ case 65:
|
|
1348
1355
|
case 47: goto tr91;
|
1349
1356
|
case 89: goto st66;
|
1350
1357
|
case 96: goto tr91;
|
1358
|
+
case 121: goto st66;
|
1351
1359
|
}
|
1352
1360
|
if ( (*p) < 58 ) {
|
1353
1361
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1369,6 +1377,7 @@ case 66:
|
|
1369
1377
|
case 47: goto tr91;
|
1370
1378
|
case 83: goto st67;
|
1371
1379
|
case 96: goto tr91;
|
1380
|
+
case 115: goto st67;
|
1372
1381
|
}
|
1373
1382
|
if ( (*p) < 58 ) {
|
1374
1383
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1390,6 +1399,7 @@ case 67:
|
|
1390
1399
|
case 47: goto tr91;
|
1391
1400
|
case 84: goto st68;
|
1392
1401
|
case 96: goto tr91;
|
1402
|
+
case 116: goto st68;
|
1393
1403
|
}
|
1394
1404
|
if ( (*p) < 58 ) {
|
1395
1405
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1411,6 +1421,7 @@ case 68:
|
|
1411
1421
|
case 47: goto tr91;
|
1412
1422
|
case 69: goto st69;
|
1413
1423
|
case 96: goto tr91;
|
1424
|
+
case 101: goto st69;
|
1414
1425
|
}
|
1415
1426
|
if ( (*p) < 58 ) {
|
1416
1427
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1432,6 +1443,7 @@ case 69:
|
|
1432
1443
|
case 47: goto tr91;
|
1433
1444
|
case 77: goto tr96;
|
1434
1445
|
case 96: goto tr91;
|
1446
|
+
case 109: goto tr96;
|
1435
1447
|
}
|
1436
1448
|
if ( (*p) < 58 ) {
|
1437
1449
|
if ( 0 <= (*p) && (*p) <= 44 )
|
@@ -1518,7 +1530,7 @@ st70:
|
|
1518
1530
|
case 70:
|
1519
1531
|
#line 1 "NONE"
|
1520
1532
|
{ts = p;}
|
1521
|
-
#line
|
1533
|
+
#line 1534 "ext/c/lexer.c"
|
1522
1534
|
switch( (*p) ) {
|
1523
1535
|
case 34: goto tr103;
|
1524
1536
|
case 39: goto tr104;
|
@@ -1548,7 +1560,7 @@ st71:
|
|
1548
1560
|
if ( ++p == pe )
|
1549
1561
|
goto _test_eof71;
|
1550
1562
|
case 71:
|
1551
|
-
#line
|
1563
|
+
#line 1564 "ext/c/lexer.c"
|
1552
1564
|
switch( (*p) ) {
|
1553
1565
|
case 47: goto tr106;
|
1554
1566
|
case 96: goto tr106;
|
@@ -1575,7 +1587,7 @@ st72:
|
|
1575
1587
|
if ( ++p == pe )
|
1576
1588
|
goto _test_eof72;
|
1577
1589
|
case 72:
|
1578
|
-
#line
|
1590
|
+
#line 1591 "ext/c/lexer.c"
|
1579
1591
|
if ( (*p) == 62 )
|
1580
1592
|
goto tr109;
|
1581
1593
|
goto tr108;
|
@@ -1612,7 +1624,7 @@ st73:
|
|
1612
1624
|
case 73:
|
1613
1625
|
#line 1 "NONE"
|
1614
1626
|
{ts = p;}
|
1615
|
-
#line
|
1627
|
+
#line 1628 "ext/c/lexer.c"
|
1616
1628
|
switch( (*p) ) {
|
1617
1629
|
case 47: goto st0;
|
1618
1630
|
case 96: goto st0;
|
@@ -1694,7 +1706,7 @@ st75:
|
|
1694
1706
|
case 75:
|
1695
1707
|
#line 1 "NONE"
|
1696
1708
|
{ts = p;}
|
1697
|
-
#line
|
1709
|
+
#line 1710 "ext/c/lexer.c"
|
1698
1710
|
switch( (*p) ) {
|
1699
1711
|
case 47: goto tr114;
|
1700
1712
|
case 62: goto tr115;
|
@@ -1722,7 +1734,7 @@ st76:
|
|
1722
1734
|
if ( ++p == pe )
|
1723
1735
|
goto _test_eof76;
|
1724
1736
|
case 76:
|
1725
|
-
#line
|
1737
|
+
#line 1738 "ext/c/lexer.c"
|
1726
1738
|
switch( (*p) ) {
|
1727
1739
|
case 47: goto tr116;
|
1728
1740
|
case 58: goto tr118;
|
@@ -1800,7 +1812,7 @@ st77:
|
|
1800
1812
|
case 77:
|
1801
1813
|
#line 1 "NONE"
|
1802
1814
|
{ts = p;}
|
1803
|
-
#line
|
1815
|
+
#line 1816 "ext/c/lexer.c"
|
1804
1816
|
switch( (*p) ) {
|
1805
1817
|
case 13: goto tr121;
|
1806
1818
|
case 32: goto tr120;
|
@@ -1820,7 +1832,7 @@ st78:
|
|
1820
1832
|
if ( ++p == pe )
|
1821
1833
|
goto _test_eof78;
|
1822
1834
|
case 78:
|
1823
|
-
#line
|
1835
|
+
#line 1836 "ext/c/lexer.c"
|
1824
1836
|
if ( (*p) == 10 )
|
1825
1837
|
goto tr120;
|
1826
1838
|
goto tr123;
|
@@ -1849,7 +1861,7 @@ st79:
|
|
1849
1861
|
case 79:
|
1850
1862
|
#line 1 "NONE"
|
1851
1863
|
{ts = p;}
|
1852
|
-
#line
|
1864
|
+
#line 1865 "ext/c/lexer.c"
|
1853
1865
|
switch( (*p) ) {
|
1854
1866
|
case 13: goto tr125;
|
1855
1867
|
case 32: goto tr125;
|
@@ -1903,7 +1915,7 @@ st81:
|
|
1903
1915
|
case 81:
|
1904
1916
|
#line 1 "NONE"
|
1905
1917
|
{ts = p;}
|
1906
|
-
#line
|
1918
|
+
#line 1919 "ext/c/lexer.c"
|
1907
1919
|
switch( (*p) ) {
|
1908
1920
|
case 34: goto tr128;
|
1909
1921
|
case 39: goto tr129;
|
@@ -1980,7 +1992,7 @@ st82:
|
|
1980
1992
|
case 82:
|
1981
1993
|
#line 1 "NONE"
|
1982
1994
|
{ts = p;}
|
1983
|
-
#line
|
1995
|
+
#line 1996 "ext/c/lexer.c"
|
1984
1996
|
switch( (*p) ) {
|
1985
1997
|
case 10: goto tr132;
|
1986
1998
|
case 13: goto st84;
|
@@ -2065,7 +2077,7 @@ st87:
|
|
2065
2077
|
if ( ++p == pe )
|
2066
2078
|
goto _test_eof87;
|
2067
2079
|
case 87:
|
2068
|
-
#line
|
2080
|
+
#line 2081 "ext/c/lexer.c"
|
2069
2081
|
switch( (*p) ) {
|
2070
2082
|
case 47: goto tr144;
|
2071
2083
|
case 58: goto st20;
|
@@ -2199,7 +2211,7 @@ st89:
|
|
2199
2211
|
case 89:
|
2200
2212
|
#line 1 "NONE"
|
2201
2213
|
{ts = p;}
|
2202
|
-
#line
|
2214
|
+
#line 2215 "ext/c/lexer.c"
|
2203
2215
|
switch( (*p) ) {
|
2204
2216
|
case 10: goto tr148;
|
2205
2217
|
case 13: goto st91;
|
@@ -2283,7 +2295,7 @@ st94:
|
|
2283
2295
|
if ( ++p == pe )
|
2284
2296
|
goto _test_eof94;
|
2285
2297
|
case 94:
|
2286
|
-
#line
|
2298
|
+
#line 2299 "ext/c/lexer.c"
|
2287
2299
|
switch( (*p) ) {
|
2288
2300
|
case 47: goto tr159;
|
2289
2301
|
case 58: goto st21;
|
@@ -2400,7 +2412,7 @@ st96:
|
|
2400
2412
|
case 96:
|
2401
2413
|
#line 1 "NONE"
|
2402
2414
|
{ts = p;}
|
2403
|
-
#line
|
2415
|
+
#line 2416 "ext/c/lexer.c"
|
2404
2416
|
if ( (*p) == 60 )
|
2405
2417
|
goto tr162;
|
2406
2418
|
goto tr161;
|
@@ -2414,7 +2426,7 @@ st97:
|
|
2414
2426
|
if ( ++p == pe )
|
2415
2427
|
goto _test_eof97;
|
2416
2428
|
case 97:
|
2417
|
-
#line
|
2429
|
+
#line 2430 "ext/c/lexer.c"
|
2418
2430
|
if ( (*p) == 60 )
|
2419
2431
|
goto tr164;
|
2420
2432
|
goto tr161;
|
@@ -2430,7 +2442,7 @@ st98:
|
|
2430
2442
|
if ( ++p == pe )
|
2431
2443
|
goto _test_eof98;
|
2432
2444
|
case 98:
|
2433
|
-
#line
|
2445
|
+
#line 2446 "ext/c/lexer.c"
|
2434
2446
|
switch( (*p) ) {
|
2435
2447
|
case 60: goto tr164;
|
2436
2448
|
case 64: goto tr161;
|
@@ -2463,7 +2475,7 @@ st99:
|
|
2463
2475
|
if ( ++p == pe )
|
2464
2476
|
goto _test_eof99;
|
2465
2477
|
case 99:
|
2466
|
-
#line
|
2478
|
+
#line 2479 "ext/c/lexer.c"
|
2467
2479
|
switch( (*p) ) {
|
2468
2480
|
case 60: goto tr164;
|
2469
2481
|
case 64: goto tr161;
|
@@ -2527,7 +2539,7 @@ st100:
|
|
2527
2539
|
case 100:
|
2528
2540
|
#line 1 "NONE"
|
2529
2541
|
{ts = p;}
|
2530
|
-
#line
|
2542
|
+
#line 2543 "ext/c/lexer.c"
|
2531
2543
|
if ( (*p) == 60 )
|
2532
2544
|
goto tr168;
|
2533
2545
|
goto tr167;
|
@@ -2541,7 +2553,7 @@ st101:
|
|
2541
2553
|
if ( ++p == pe )
|
2542
2554
|
goto _test_eof101;
|
2543
2555
|
case 101:
|
2544
|
-
#line
|
2556
|
+
#line 2557 "ext/c/lexer.c"
|
2545
2557
|
if ( (*p) == 60 )
|
2546
2558
|
goto tr169;
|
2547
2559
|
goto tr167;
|
@@ -2557,7 +2569,7 @@ st102:
|
|
2557
2569
|
if ( ++p == pe )
|
2558
2570
|
goto _test_eof102;
|
2559
2571
|
case 102:
|
2560
|
-
#line
|
2572
|
+
#line 2573 "ext/c/lexer.c"
|
2561
2573
|
switch( (*p) ) {
|
2562
2574
|
case 47: goto st22;
|
2563
2575
|
case 60: goto tr171;
|
@@ -2622,7 +2634,7 @@ st103:
|
|
2622
2634
|
if ( ++p == pe )
|
2623
2635
|
goto _test_eof103;
|
2624
2636
|
case 103:
|
2625
|
-
#line
|
2637
|
+
#line 2638 "ext/c/lexer.c"
|
2626
2638
|
if ( (*p) == 60 )
|
2627
2639
|
goto tr171;
|
2628
2640
|
goto tr169;
|
@@ -2669,7 +2681,7 @@ st104:
|
|
2669
2681
|
case 104:
|
2670
2682
|
#line 1 "NONE"
|
2671
2683
|
{ts = p;}
|
2672
|
-
#line
|
2684
|
+
#line 2685 "ext/c/lexer.c"
|
2673
2685
|
if ( (*p) == 60 )
|
2674
2686
|
goto tr173;
|
2675
2687
|
goto tr172;
|
@@ -2683,7 +2695,7 @@ st105:
|
|
2683
2695
|
if ( ++p == pe )
|
2684
2696
|
goto _test_eof105;
|
2685
2697
|
case 105:
|
2686
|
-
#line
|
2698
|
+
#line 2699 "ext/c/lexer.c"
|
2687
2699
|
if ( (*p) == 60 )
|
2688
2700
|
goto tr174;
|
2689
2701
|
goto tr172;
|
@@ -2699,7 +2711,7 @@ st106:
|
|
2699
2711
|
if ( ++p == pe )
|
2700
2712
|
goto _test_eof106;
|
2701
2713
|
case 106:
|
2702
|
-
#line
|
2714
|
+
#line 2715 "ext/c/lexer.c"
|
2703
2715
|
switch( (*p) ) {
|
2704
2716
|
case 47: goto st29;
|
2705
2717
|
case 60: goto tr176;
|
@@ -2757,7 +2769,7 @@ st107:
|
|
2757
2769
|
if ( ++p == pe )
|
2758
2770
|
goto _test_eof107;
|
2759
2771
|
case 107:
|
2760
|
-
#line
|
2772
|
+
#line 2773 "ext/c/lexer.c"
|
2761
2773
|
if ( (*p) == 60 )
|
2762
2774
|
goto tr176;
|
2763
2775
|
goto tr174;
|
@@ -73,11 +73,11 @@ private static short[] init__java_lexer_key_offsets_0()
|
|
73
73
|
23, 24, 25, 26, 27, 38, 48, 49, 50, 60, 70, 71,
|
74
74
|
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
|
75
75
|
84, 96, 100, 111, 121, 133, 145, 146, 147, 148, 149, 150,
|
76
|
-
151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
76
|
+
151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 182, 183,
|
77
|
+
193, 205, 217, 229, 241, 253, 265, 277, 289, 301, 313, 326,
|
78
|
+
336, 337, 347, 358, 369, 380, 386, 387, 392, 397, 399, 414,
|
79
|
+
415, 426, 427, 437, 448, 458, 473, 474, 484, 485, 495, 506,
|
80
|
+
516, 517, 518, 531, 544, 545, 546, 548, 549, 550, 551, 553
|
81
81
|
};
|
82
82
|
}
|
83
83
|
|
@@ -101,17 +101,18 @@ private static char[] init__java_lexer_trans_keys_0()
|
|
101
101
|
127, 47, 58, 96, 108, 0, 44, 59, 64, 91, 94, 123,
|
102
102
|
127, 45, 45, 45, 93, 93, 93, 63, 63, 62, 39, 39,
|
103
103
|
34, 34, 93, 93, 9, 10, 13, 32, 34, 39, 47, 62,
|
104
|
-
80, 83, 91, 96, 0, 44, 58, 64, 92, 94,
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
104
|
+
80, 83, 91, 96, 112, 115, 0, 44, 58, 64, 92, 94,
|
105
|
+
123, 127, 10, 47, 96, 0, 44, 58, 64, 91, 94, 123,
|
106
|
+
127, 47, 85, 96, 117, 0, 44, 58, 64, 91, 94, 123,
|
107
|
+
127, 47, 66, 96, 98, 0, 44, 58, 64, 91, 94, 123,
|
108
|
+
127, 47, 76, 96, 108, 0, 44, 58, 64, 91, 94, 123,
|
109
|
+
127, 47, 73, 96, 105, 0, 44, 58, 64, 91, 94, 123,
|
110
|
+
127, 47, 67, 96, 99, 0, 44, 58, 64, 91, 94, 123,
|
111
|
+
127, 47, 89, 96, 121, 0, 44, 58, 64, 91, 94, 123,
|
112
|
+
127, 47, 83, 96, 115, 0, 44, 58, 64, 91, 94, 123,
|
113
|
+
127, 47, 84, 96, 116, 0, 44, 58, 64, 91, 94, 123,
|
114
|
+
127, 47, 69, 96, 101, 0, 44, 58, 64, 91, 94, 123,
|
115
|
+
127, 47, 77, 96, 109, 0, 44, 58, 64, 91, 94, 123,
|
115
116
|
127, 34, 39, 47, 63, 96, 0, 44, 58, 64, 91, 94,
|
116
117
|
123, 127, 47, 96, 0, 44, 58, 64, 91, 94, 123, 127,
|
117
118
|
62, 47, 96, 0, 44, 58, 64, 91, 94, 123, 127, 47,
|
@@ -146,8 +147,8 @@ private static byte[] init__java_lexer_single_lengths_0()
|
|
146
147
|
1, 1, 1, 1, 3, 2, 1, 1, 2, 2, 1, 1,
|
147
148
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
148
149
|
4, 2, 3, 2, 4, 4, 1, 1, 1, 1, 1, 1,
|
149
|
-
1, 1, 1, 1, 1, 1, 1, 1, 1,
|
150
|
-
|
150
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 14, 1, 2,
|
151
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2,
|
151
152
|
1, 2, 3, 3, 3, 4, 1, 3, 3, 2, 7, 1,
|
152
153
|
3, 1, 2, 3, 2, 7, 1, 2, 1, 2, 3, 2,
|
153
154
|
1, 1, 3, 3, 1, 1, 2, 1, 1, 1, 2, 1
|
@@ -182,11 +183,11 @@ private static short[] init__java_lexer_index_offsets_0()
|
|
182
183
|
33, 35, 37, 39, 41, 49, 56, 58, 60, 67, 74, 76,
|
183
184
|
78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100,
|
184
185
|
102, 111, 115, 123, 130, 139, 148, 150, 152, 154, 156, 158,
|
185
|
-
160, 162, 164, 166, 168, 170, 172, 174, 176, 178,
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
186
|
+
160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 197, 199,
|
187
|
+
206, 215, 224, 233, 242, 251, 260, 269, 278, 287, 296, 306,
|
188
|
+
313, 315, 322, 330, 338, 346, 352, 354, 359, 364, 367, 379,
|
189
|
+
381, 389, 391, 398, 406, 413, 425, 427, 434, 436, 443, 451,
|
190
|
+
458, 460, 462, 471, 480, 482, 484, 487, 489, 491, 493, 496
|
190
191
|
};
|
191
192
|
}
|
192
193
|
|
@@ -211,15 +212,16 @@ private static byte[] init__java_lexer_trans_targs_0()
|
|
211
212
|
35, 35, 35, 38, 44, 43, 42, 43, 18, 42, 47, 46,
|
212
213
|
45, 46, 19, 45, 50, 49, 48, 49, 48, 48, 51, 52,
|
213
214
|
51, 52, 53, 54, 53, 54, 55, 56, 55, 56, 57, 57,
|
214
|
-
58, 57, 57, 57, 0, 57, 60, 65, 57, 0,
|
215
|
-
0, 0, 59, 57, 57, 57, 57, 57, 57, 57,
|
216
|
-
57,
|
217
|
-
|
218
|
-
57,
|
219
|
-
57,
|
220
|
-
|
221
|
-
57, 57, 57, 59, 57,
|
222
|
-
57, 59, 57,
|
215
|
+
58, 57, 57, 57, 0, 57, 60, 65, 57, 0, 60, 65,
|
216
|
+
0, 0, 0, 0, 59, 57, 57, 57, 57, 57, 57, 57,
|
217
|
+
57, 59, 57, 61, 57, 61, 57, 57, 57, 57, 59, 57,
|
218
|
+
62, 57, 62, 57, 57, 57, 57, 59, 57, 63, 57, 63,
|
219
|
+
57, 57, 57, 57, 59, 57, 64, 57, 64, 57, 57, 57,
|
220
|
+
57, 59, 57, 59, 57, 59, 57, 57, 57, 57, 59, 57,
|
221
|
+
66, 57, 66, 57, 57, 57, 57, 59, 57, 67, 57, 67,
|
222
|
+
57, 57, 57, 57, 59, 57, 68, 57, 68, 57, 57, 57,
|
223
|
+
57, 59, 57, 69, 57, 69, 57, 57, 57, 57, 59, 57,
|
224
|
+
59, 57, 59, 57, 57, 57, 57, 59, 70, 70, 70, 72,
|
223
225
|
70, 70, 70, 70, 70, 71, 70, 70, 70, 70, 70, 70,
|
224
226
|
71, 70, 70, 0, 0, 0, 0, 0, 0, 74, 73, 73,
|
225
227
|
73, 73, 73, 73, 73, 74, 75, 75, 75, 75, 75, 75,
|
@@ -269,14 +271,15 @@ private static short[] init__java_lexer_trans_actions_0()
|
|
269
271
|
17, 1, 0, 17, 1, 1, 23, 1, 21, 23, 25, 1,
|
270
272
|
27, 1, 29, 1, 31, 1, 33, 1, 35, 1, 47, 45,
|
271
273
|
0, 47, 41, 39, 0, 43, 0, 0, 37, 0, 0, 0,
|
272
|
-
0, 0, 183, 45, 51, 53, 53, 53, 53, 53,
|
273
|
-
49, 0, 49, 49, 49, 49, 49, 183, 49,
|
274
|
-
|
275
|
-
49,
|
276
|
-
49,
|
277
|
-
|
278
|
-
49, 49, 49, 183, 49, 0, 49,
|
279
|
-
49,
|
274
|
+
0, 0, 0, 0, 183, 45, 51, 53, 53, 53, 53, 53,
|
275
|
+
53, 183, 49, 0, 49, 0, 49, 49, 49, 49, 183, 49,
|
276
|
+
0, 49, 0, 49, 49, 49, 49, 183, 49, 0, 49, 0,
|
277
|
+
49, 49, 49, 49, 183, 49, 0, 49, 0, 49, 49, 49,
|
278
|
+
49, 183, 49, 180, 49, 180, 49, 49, 49, 49, 183, 49,
|
279
|
+
0, 49, 0, 49, 49, 49, 49, 183, 49, 0, 49, 0,
|
280
|
+
49, 49, 49, 49, 183, 49, 0, 49, 0, 49, 49, 49,
|
281
|
+
49, 183, 49, 0, 49, 0, 49, 49, 49, 49, 183, 49,
|
282
|
+
180, 49, 180, 49, 49, 49, 49, 183, 195, 192, 168, 1,
|
280
283
|
168, 168, 168, 168, 168, 1, 57, 57, 57, 57, 57, 57,
|
281
284
|
0, 55, 59, 0, 0, 0, 0, 0, 0, 0, 63, 61,
|
282
285
|
63, 63, 63, 63, 63, 0, 171, 198, 171, 171, 171, 171,
|
@@ -346,15 +349,15 @@ private static final short _java_lexer_from_state_actions[] = init__java_lexer_f
|
|
346
349
|
private static short[] init__java_lexer_eof_trans_0()
|
347
350
|
{
|
348
351
|
return new short [] {
|
349
|
-
0,
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
0,
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
0,
|
352
|
+
0, 514, 514, 514, 514, 514, 514, 514, 514, 514, 514, 514,
|
353
|
+
514, 514, 514, 514, 514, 535, 516, 517, 518, 519, 526, 526,
|
354
|
+
526, 526, 526, 526, 526, 532, 532, 532, 532, 532, 532, 0,
|
355
|
+
533, 534, 535, 538, 538, 538, 0, 540, 540, 0, 542, 542,
|
356
|
+
0, 544, 544, 0, 545, 0, 546, 0, 547, 0, 548, 549,
|
357
|
+
559, 559, 559, 559, 559, 559, 559, 559, 559, 559, 0, 560,
|
358
|
+
561, 0, 562, 0, 563, 0, 564, 0, 565, 0, 0, 566,
|
359
|
+
567, 569, 569, 571, 571, 0, 572, 573, 575, 575, 577, 577,
|
360
|
+
0, 580, 580, 580, 0, 583, 583, 583, 0, 586, 586, 586
|
358
361
|
};
|
359
362
|
}
|
360
363
|
|
@@ -484,7 +487,7 @@ static final int java_lexer_en_main = 35;
|
|
484
487
|
String id_on_xml_decl_start = "on_xml_decl_start";
|
485
488
|
|
486
489
|
|
487
|
-
// line
|
490
|
+
// line 491 "ext/java/org/liboga/xml/Lexer.java"
|
488
491
|
{
|
489
492
|
int _klen;
|
490
493
|
int _trans = 0;
|
@@ -513,7 +516,7 @@ case 1:
|
|
513
516
|
// line 1 "NONE"
|
514
517
|
{ts = p;}
|
515
518
|
break;
|
516
|
-
// line
|
519
|
+
// line 520 "ext/java/org/liboga/xml/Lexer.java"
|
517
520
|
}
|
518
521
|
}
|
519
522
|
|
@@ -1345,7 +1348,7 @@ case 3:
|
|
1345
1348
|
}
|
1346
1349
|
}
|
1347
1350
|
break;
|
1348
|
-
// line
|
1351
|
+
// line 1352 "ext/java/org/liboga/xml/Lexer.java"
|
1349
1352
|
}
|
1350
1353
|
}
|
1351
1354
|
}
|
@@ -1359,7 +1362,7 @@ case 2:
|
|
1359
1362
|
// line 1 "NONE"
|
1360
1363
|
{ts = -1;}
|
1361
1364
|
break;
|
1362
|
-
// line
|
1365
|
+
// line 1366 "ext/java/org/liboga/xml/Lexer.java"
|
1363
1366
|
}
|
1364
1367
|
}
|
1365
1368
|
|
data/ext/ragel/base_lexer.rl
CHANGED
@@ -289,7 +289,7 @@
|
|
289
289
|
# Machine for processing doctypes. Doctype values such as the public
|
290
290
|
# and system IDs are treated as T_STRING tokens.
|
291
291
|
doctype := |*
|
292
|
-
'PUBLIC' | 'SYSTEM' => {
|
292
|
+
'PUBLIC'i | 'SYSTEM'i => {
|
293
293
|
callback(id_on_doctype_type, data, encoding, ts, te);
|
294
294
|
};
|
295
295
|
|
data/lib/oga/version.rb
CHANGED
data/lib/oga/xml/lexer.rb
CHANGED
@@ -58,7 +58,11 @@ module Oga
|
|
58
58
|
|
59
59
|
HTML_SCRIPT_ELEMENTS = Whitelist.new(%w{script template})
|
60
60
|
|
61
|
-
|
61
|
+
# The elements that may occur in a thead, tbody, or tfoot.
|
62
|
+
#
|
63
|
+
# Technically "th" is not allowed per the HTML5 spec, but it's so commonly
|
64
|
+
# used in these elements that we allow it anyway.
|
65
|
+
HTML_TABLE_ROW_ELEMENTS = Whitelist.new(%w{tr th}) + HTML_SCRIPT_ELEMENTS
|
62
66
|
|
63
67
|
# Elements that should be closed automatically before a new opening tag is
|
64
68
|
# processed.
|
data/lib/oga/xml/querying.rb
CHANGED
@@ -10,6 +10,7 @@ module Oga
|
|
10
10
|
# document = Oga.parse_xml <<-EOF
|
11
11
|
# <people>
|
12
12
|
# <person age="25">Alice</person>
|
13
|
+
# <ns:person xmlns:ns="http://example.net">Bob</ns:person>
|
13
14
|
# </people>
|
14
15
|
# EOF
|
15
16
|
#
|
@@ -25,15 +26,23 @@ module Oga
|
|
25
26
|
#
|
26
27
|
# document.xpath('people/person[@age = $age]', 'age' => 25)
|
27
28
|
#
|
29
|
+
# Using namespace aliases:
|
30
|
+
#
|
31
|
+
# namespaces = {'example' => 'http://example.net'}
|
32
|
+
# document.xpath('people/example:person', namespaces: namespaces)
|
33
|
+
#
|
28
34
|
# @param [String] expression The XPath expression to run.
|
29
35
|
#
|
30
36
|
# @param [Hash] variables Variables to bind. The keys of this Hash should
|
31
37
|
# be String values.
|
32
38
|
#
|
39
|
+
# @param [Hash] namespaces Namespace aliases. The keys of this Hash should
|
40
|
+
# be String values.
|
41
|
+
#
|
33
42
|
# @return [Oga::XML::NodeSet]
|
34
|
-
def xpath(expression, variables = {})
|
43
|
+
def xpath(expression, variables = {}, namespaces: nil)
|
35
44
|
ast = XPath::Parser.parse_with_cache(expression)
|
36
|
-
block = XPath::Compiler.compile_with_cache(ast)
|
45
|
+
block = XPath::Compiler.compile_with_cache(ast, namespaces: namespaces)
|
37
46
|
|
38
47
|
block.call(self, variables)
|
39
48
|
end
|
@@ -54,8 +63,8 @@ module Oga
|
|
54
63
|
#
|
55
64
|
# @see [#xpath]
|
56
65
|
# @return [Oga::XML::Node|Oga::XML::Attribute]
|
57
|
-
def at_xpath(*args)
|
58
|
-
result = xpath(*args)
|
66
|
+
def at_xpath(*args, namespaces: nil)
|
67
|
+
result = xpath(*args, namespaces: namespaces)
|
59
68
|
|
60
69
|
result.is_a?(XML::NodeSet) ? result.first : result
|
61
70
|
end
|
data/lib/oga/xml/to_xml.rb
CHANGED
data/lib/oga/xpath/compiler.rb
CHANGED
@@ -42,12 +42,16 @@ module Oga
|
|
42
42
|
# Compiles and caches an AST.
|
43
43
|
#
|
44
44
|
# @see [#compile]
|
45
|
-
def self.compile_with_cache(ast)
|
46
|
-
|
45
|
+
def self.compile_with_cache(ast, namespaces: nil)
|
46
|
+
cache_key = namespaces ? [ast, namespaces] : ast
|
47
|
+
CACHE.get_or_set(cache_key) { new(namespaces: namespaces).compile(ast) }
|
47
48
|
end
|
48
49
|
|
49
|
-
|
50
|
+
# @param [Hash] namespaces
|
51
|
+
def initialize(namespaces: nil)
|
50
52
|
reset
|
53
|
+
|
54
|
+
@namespaces = namespaces
|
51
55
|
end
|
52
56
|
|
53
57
|
# Resets the internal state.
|
@@ -1385,7 +1389,23 @@ module Oga
|
|
1385
1389
|
end
|
1386
1390
|
|
1387
1391
|
if ns and ns != STAR
|
1388
|
-
|
1392
|
+
if @namespaces
|
1393
|
+
ns_uri = @namespaces[ns]
|
1394
|
+
ns_match =
|
1395
|
+
if ns_uri
|
1396
|
+
input.namespace.and(input.namespace.uri.eq(string(ns_uri)))
|
1397
|
+
else
|
1398
|
+
self.false
|
1399
|
+
end
|
1400
|
+
else
|
1401
|
+
ns_match =
|
1402
|
+
if ns == XML::Element::XMLNS_PREFIX
|
1403
|
+
input
|
1404
|
+
else
|
1405
|
+
input.namespace_name.eq(string(ns))
|
1406
|
+
end
|
1407
|
+
end
|
1408
|
+
|
1389
1409
|
condition = condition ? condition.and(ns_match) : ns_match
|
1390
1410
|
end
|
1391
1411
|
|
data/oga.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: oga
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '
|
4
|
+
version: '3.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yorick Peterse
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-07-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ast
|
@@ -210,7 +210,7 @@ homepage: https://gitlab.com/yorickpeterse/oga/
|
|
210
210
|
licenses:
|
211
211
|
- MPL-2.0
|
212
212
|
metadata: {}
|
213
|
-
post_install_message:
|
213
|
+
post_install_message:
|
214
214
|
rdoc_options: []
|
215
215
|
require_paths:
|
216
216
|
- lib
|
@@ -218,16 +218,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
218
218
|
requirements:
|
219
219
|
- - ">="
|
220
220
|
- !ruby/object:Gem::Version
|
221
|
-
version:
|
221
|
+
version: 2.3.0
|
222
222
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
223
223
|
requirements:
|
224
224
|
- - ">="
|
225
225
|
- !ruby/object:Gem::Version
|
226
226
|
version: '0'
|
227
227
|
requirements: []
|
228
|
-
|
229
|
-
|
230
|
-
signing_key:
|
228
|
+
rubygems_version: 3.1.2
|
229
|
+
signing_key:
|
231
230
|
specification_version: 4
|
232
231
|
summary: Oga is an XML/HTML parser written in Ruby.
|
233
232
|
test_files: []
|