oga 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -0
- data/doc/changelog.md +108 -0
- data/ext/c/lexer.c +63 -48
- data/ext/java/org/liboga/xml/Lexer.java +87 -101
- data/ext/ragel/base_lexer.rl +8 -0
- data/lib/oga.rb +7 -1
- data/lib/oga/html/sax_parser.rb +18 -0
- data/lib/oga/oga.rb +30 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/cdata.rb +0 -7
- data/lib/oga/xml/comment.rb +0 -7
- data/lib/oga/xml/doctype.rb +0 -7
- data/lib/oga/xml/document.rb +7 -1
- data/lib/oga/xml/element.rb +43 -18
- data/lib/oga/xml/html_void_elements.rb +28 -0
- data/lib/oga/xml/lexer.rb +1 -26
- data/lib/oga/xml/node.rb +0 -7
- data/lib/oga/xml/parser.rb +34 -2
- data/lib/oga/xml/pull_parser.rb +17 -3
- data/lib/oga/xml/sax_parser.rb +63 -0
- data/lib/oga/xml/text.rb +1 -6
- data/lib/oga/xml/xml_declaration.rb +0 -7
- data/lib/oga/xpath/evaluator.rb +3 -2
- data/lib/oga/xpath/lexer.rb +75 -71
- data/lib/oga/xpath/parser.rb +65 -60
- metadata +5 -2
@@ -0,0 +1,63 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# The SaxParser class provides the basic interface for writing custom SAX
|
5
|
+
# parsers. All callback methods defined in {Oga::XML::Parser} are delegated
|
6
|
+
# to a dedicated handler class.
|
7
|
+
#
|
8
|
+
# To write a custom handler for the SAX parser, create a class that
|
9
|
+
# implements one (or many) of the following callback methods:
|
10
|
+
#
|
11
|
+
# * `on_document`
|
12
|
+
# * `on_doctype`
|
13
|
+
# * `on_cdata`
|
14
|
+
# * `on_comment`
|
15
|
+
# * `on_proc_ins`
|
16
|
+
# * `on_xml_decl`
|
17
|
+
# * `on_text`
|
18
|
+
# * `on_element`
|
19
|
+
# * `on_element_children`
|
20
|
+
# * `after_element`
|
21
|
+
#
|
22
|
+
# For example:
|
23
|
+
#
|
24
|
+
# class SaxHandler
|
25
|
+
# def on_element(namespace, name, attrs = {})
|
26
|
+
# puts name
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# You can then use it as following:
|
31
|
+
#
|
32
|
+
# handler = SaxHandler.new
|
33
|
+
# parser = Oga::XML::SaxParser.new(handler, '<foo />')
|
34
|
+
#
|
35
|
+
# parser.parse
|
36
|
+
#
|
37
|
+
# For information on the callback arguments see the documentation of the
|
38
|
+
# corresponding methods in {Oga::XML::Parser}.
|
39
|
+
#
|
40
|
+
class SaxParser < Parser
|
41
|
+
##
|
42
|
+
# @param [Object] handler The SAX handler to delegate callbacks to.
|
43
|
+
# @see [Oga::XML::Parser#initialize]
|
44
|
+
#
|
45
|
+
def initialize(handler, *args)
|
46
|
+
@handler = handler
|
47
|
+
|
48
|
+
super(*args)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Delegate all callbacks to the handler object.
|
52
|
+
instance_methods.grep(/^(on_|after_)/).each do |method|
|
53
|
+
eval <<-EOF, nil, __FILE__, __LINE__ + 1
|
54
|
+
def #{method}(*args)
|
55
|
+
@handler.#{method}(*args) if @handler.respond_to?(:#{method})
|
56
|
+
|
57
|
+
return
|
58
|
+
end
|
59
|
+
EOF
|
60
|
+
end
|
61
|
+
end # SaxParser
|
62
|
+
end # XML
|
63
|
+
end # Oga
|
data/lib/oga/xml/text.rb
CHANGED
data/lib/oga/xpath/evaluator.rb
CHANGED
@@ -131,7 +131,8 @@ module Oga
|
|
131
131
|
context = XML::NodeSet.new([@document])
|
132
132
|
end
|
133
133
|
|
134
|
-
return
|
134
|
+
# If the expression is just "/" we'll just return the current context.
|
135
|
+
return ast_node.children.empty? ? context : on_path(ast_node, context)
|
135
136
|
end
|
136
137
|
|
137
138
|
##
|
@@ -1188,7 +1189,7 @@ module Oga
|
|
1188
1189
|
# This function call returns the substring of the 1st argument that occurs
|
1189
1190
|
# after the string given in the 2nd argument. For example:
|
1190
1191
|
#
|
1191
|
-
# substring-
|
1192
|
+
# substring-after("2014-08-25", "-")
|
1192
1193
|
#
|
1193
1194
|
# This would return "08-25" as it occurs after the first "-".
|
1194
1195
|
#
|
data/lib/oga/xpath/lexer.rb
CHANGED
@@ -1588,7 +1588,7 @@ ts = p
|
|
1588
1588
|
begin
|
1589
1589
|
add_token(:T_SLASH) end
|
1590
1590
|
when 33 then
|
1591
|
-
# line
|
1591
|
+
# line 296 "lib/oga/xpath/lexer.rl"
|
1592
1592
|
begin
|
1593
1593
|
add_token(:T_ADD) end
|
1594
1594
|
when 12 then
|
@@ -1597,32 +1597,20 @@ ts = p
|
|
1597
1597
|
te = p+1
|
1598
1598
|
end
|
1599
1599
|
when 11 then
|
1600
|
-
# line
|
1600
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1601
1601
|
begin
|
1602
1602
|
te = p+1
|
1603
1603
|
end
|
1604
1604
|
when 8 then
|
1605
|
-
# line
|
1605
|
+
# line 349 "lib/oga/xpath/lexer.rl"
|
1606
1606
|
begin
|
1607
1607
|
te = p+1
|
1608
1608
|
begin
|
1609
1609
|
emit(:T_TYPE_TEST, ts, te - 2)
|
1610
1610
|
end
|
1611
1611
|
end
|
1612
|
-
when 20 then
|
1613
|
-
# line 365 "lib/oga/xpath/lexer.rl"
|
1614
|
-
begin
|
1615
|
-
te = p+1
|
1616
|
-
begin add_token(:T_LBRACK) end
|
1617
|
-
end
|
1618
|
-
when 21 then
|
1619
|
-
# line 366 "lib/oga/xpath/lexer.rl"
|
1620
|
-
begin
|
1621
|
-
te = p+1
|
1622
|
-
begin add_token(:T_RBRACK) end
|
1623
|
-
end
|
1624
1612
|
when 2 then
|
1625
|
-
# line
|
1613
|
+
# line 238 "lib/oga/xpath/lexer.rl"
|
1626
1614
|
begin
|
1627
1615
|
te = p+1
|
1628
1616
|
begin
|
@@ -1630,7 +1618,7 @@ te = p+1
|
|
1630
1618
|
end
|
1631
1619
|
end
|
1632
1620
|
when 7 then
|
1633
|
-
# line
|
1621
|
+
# line 260 "lib/oga/xpath/lexer.rl"
|
1634
1622
|
begin
|
1635
1623
|
te = p+1
|
1636
1624
|
begin
|
@@ -1638,7 +1626,7 @@ te = p+1
|
|
1638
1626
|
end
|
1639
1627
|
end
|
1640
1628
|
when 19 then
|
1641
|
-
# line
|
1629
|
+
# line 272 "lib/oga/xpath/lexer.rl"
|
1642
1630
|
begin
|
1643
1631
|
te = p+1
|
1644
1632
|
begin
|
@@ -1659,12 +1647,12 @@ te = p+1
|
|
1659
1647
|
end
|
1660
1648
|
end
|
1661
1649
|
when 22 then
|
1662
|
-
# line
|
1650
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1663
1651
|
begin
|
1664
1652
|
te = p
|
1665
1653
|
p = p - 1; end
|
1666
1654
|
when 30 then
|
1667
|
-
# line
|
1655
|
+
# line 362 "lib/oga/xpath/lexer.rl"
|
1668
1656
|
begin
|
1669
1657
|
te = p
|
1670
1658
|
p = p - 1; begin
|
@@ -1672,7 +1660,7 @@ p = p - 1; begin
|
|
1672
1660
|
end
|
1673
1661
|
end
|
1674
1662
|
when 35 then
|
1675
|
-
# line
|
1663
|
+
# line 213 "lib/oga/xpath/lexer.rl"
|
1676
1664
|
begin
|
1677
1665
|
te = p
|
1678
1666
|
p = p - 1; begin
|
@@ -1682,7 +1670,7 @@ p = p - 1; begin
|
|
1682
1670
|
end
|
1683
1671
|
end
|
1684
1672
|
when 36 then
|
1685
|
-
# line
|
1673
|
+
# line 219 "lib/oga/xpath/lexer.rl"
|
1686
1674
|
begin
|
1687
1675
|
te = p
|
1688
1676
|
p = p - 1; begin
|
@@ -1692,7 +1680,7 @@ p = p - 1; begin
|
|
1692
1680
|
end
|
1693
1681
|
end
|
1694
1682
|
when 37 then
|
1695
|
-
# line
|
1683
|
+
# line 272 "lib/oga/xpath/lexer.rl"
|
1696
1684
|
begin
|
1697
1685
|
te = p
|
1698
1686
|
p = p - 1; begin
|
@@ -1713,7 +1701,7 @@ p = p - 1; begin
|
|
1713
1701
|
end
|
1714
1702
|
end
|
1715
1703
|
when 31 then
|
1716
|
-
# line
|
1704
|
+
# line 199 "lib/oga/xpath/lexer.rl"
|
1717
1705
|
begin
|
1718
1706
|
te = p
|
1719
1707
|
p = p - 1; begin
|
@@ -1721,12 +1709,12 @@ p = p - 1; begin
|
|
1721
1709
|
end
|
1722
1710
|
end
|
1723
1711
|
when 1 then
|
1724
|
-
# line
|
1712
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1725
1713
|
begin
|
1726
1714
|
begin p = ((te))-1; end
|
1727
1715
|
end
|
1728
1716
|
when 6 then
|
1729
|
-
# line
|
1717
|
+
# line 199 "lib/oga/xpath/lexer.rl"
|
1730
1718
|
begin
|
1731
1719
|
begin p = ((te))-1; end
|
1732
1720
|
begin
|
@@ -1744,14 +1732,14 @@ p = p - 1; begin
|
|
1744
1732
|
next
|
1745
1733
|
end
|
1746
1734
|
end
|
1747
|
-
when
|
1735
|
+
when 6 then
|
1748
1736
|
begin begin p = ((te))-1; end
|
1749
1737
|
|
1750
1738
|
value = slice_input(ts, te).to_i
|
1751
1739
|
|
1752
1740
|
add_token(:T_INT, value)
|
1753
1741
|
end
|
1754
|
-
when
|
1742
|
+
when 7 then
|
1755
1743
|
begin begin p = ((te))-1; end
|
1756
1744
|
|
1757
1745
|
value = slice_input(ts, te).to_f
|
@@ -1767,7 +1755,7 @@ end
|
|
1767
1755
|
# line 185 "lib/oga/xpath/lexer.rl"
|
1768
1756
|
begin
|
1769
1757
|
add_token(:T_LPAREN) end
|
1770
|
-
# line
|
1758
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1771
1759
|
begin
|
1772
1760
|
te = p+1
|
1773
1761
|
end
|
@@ -1775,7 +1763,7 @@ te = p+1
|
|
1775
1763
|
# line 186 "lib/oga/xpath/lexer.rl"
|
1776
1764
|
begin
|
1777
1765
|
add_token(:T_RPAREN) end
|
1778
|
-
# line
|
1766
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1779
1767
|
begin
|
1780
1768
|
te = p+1
|
1781
1769
|
end
|
@@ -1783,7 +1771,7 @@ te = p+1
|
|
1783
1771
|
# line 187 "lib/oga/xpath/lexer.rl"
|
1784
1772
|
begin
|
1785
1773
|
add_token(:T_COMMA) end
|
1786
|
-
# line
|
1774
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1787
1775
|
begin
|
1788
1776
|
te = p+1
|
1789
1777
|
end
|
@@ -1791,119 +1779,135 @@ te = p+1
|
|
1791
1779
|
# line 188 "lib/oga/xpath/lexer.rl"
|
1792
1780
|
begin
|
1793
1781
|
add_token(:T_COLON) end
|
1794
|
-
# line
|
1782
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1783
|
+
begin
|
1784
|
+
te = p+1
|
1785
|
+
end
|
1786
|
+
when 20 then
|
1787
|
+
# line 189 "lib/oga/xpath/lexer.rl"
|
1788
|
+
begin
|
1789
|
+
add_token(:T_LBRACK) end
|
1790
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1791
|
+
begin
|
1792
|
+
te = p+1
|
1793
|
+
end
|
1794
|
+
when 21 then
|
1795
|
+
# line 190 "lib/oga/xpath/lexer.rl"
|
1796
|
+
begin
|
1797
|
+
add_token(:T_RBRACK) end
|
1798
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1795
1799
|
begin
|
1796
1800
|
te = p+1
|
1797
1801
|
end
|
1798
1802
|
when 43 then
|
1799
|
-
# line
|
1803
|
+
# line 295 "lib/oga/xpath/lexer.rl"
|
1800
1804
|
begin
|
1801
1805
|
add_token(:T_PIPE) end
|
1802
|
-
# line
|
1806
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1803
1807
|
begin
|
1804
1808
|
te = p
|
1805
1809
|
p = p - 1; end
|
1806
1810
|
when 32 then
|
1807
|
-
# line
|
1811
|
+
# line 296 "lib/oga/xpath/lexer.rl"
|
1808
1812
|
begin
|
1809
1813
|
add_token(:T_ADD) end
|
1810
|
-
# line
|
1814
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1811
1815
|
begin
|
1812
1816
|
te = p
|
1813
1817
|
p = p - 1; end
|
1814
1818
|
when 40 then
|
1815
|
-
# line
|
1819
|
+
# line 297 "lib/oga/xpath/lexer.rl"
|
1816
1820
|
begin
|
1817
1821
|
add_token(:T_EQ) end
|
1818
|
-
# line
|
1822
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1819
1823
|
begin
|
1820
1824
|
te = p
|
1821
1825
|
p = p - 1; end
|
1822
1826
|
when 29 then
|
1823
|
-
# line
|
1827
|
+
# line 298 "lib/oga/xpath/lexer.rl"
|
1824
1828
|
begin
|
1825
1829
|
add_token(:T_NEQ) end
|
1826
|
-
# line
|
1830
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1827
1831
|
begin
|
1828
1832
|
te = p
|
1829
1833
|
p = p - 1; end
|
1830
1834
|
when 38 then
|
1831
|
-
# line
|
1835
|
+
# line 299 "lib/oga/xpath/lexer.rl"
|
1832
1836
|
begin
|
1833
1837
|
add_token(:T_LT) end
|
1834
|
-
# line
|
1838
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1835
1839
|
begin
|
1836
1840
|
te = p
|
1837
1841
|
p = p - 1; end
|
1838
1842
|
when 41 then
|
1839
|
-
# line
|
1843
|
+
# line 300 "lib/oga/xpath/lexer.rl"
|
1840
1844
|
begin
|
1841
1845
|
add_token(:T_GT) end
|
1842
|
-
# line
|
1846
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1843
1847
|
begin
|
1844
1848
|
te = p
|
1845
1849
|
p = p - 1; end
|
1846
1850
|
when 39 then
|
1847
|
-
# line
|
1851
|
+
# line 301 "lib/oga/xpath/lexer.rl"
|
1848
1852
|
begin
|
1849
1853
|
add_token(:T_LTE) end
|
1850
|
-
# line
|
1854
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1851
1855
|
begin
|
1852
1856
|
te = p
|
1853
1857
|
p = p - 1; end
|
1854
1858
|
when 42 then
|
1855
|
-
# line
|
1859
|
+
# line 302 "lib/oga/xpath/lexer.rl"
|
1856
1860
|
begin
|
1857
1861
|
add_token(:T_GTE) end
|
1858
|
-
# line
|
1862
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1859
1863
|
begin
|
1860
1864
|
te = p
|
1861
1865
|
p = p - 1; end
|
1862
1866
|
when 25 then
|
1863
|
-
# line
|
1867
|
+
# line 312 "lib/oga/xpath/lexer.rl"
|
1864
1868
|
begin
|
1865
1869
|
add_token(:T_AND) end
|
1866
|
-
# line
|
1870
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1867
1871
|
begin
|
1868
1872
|
te = p
|
1869
1873
|
p = p - 1; end
|
1870
1874
|
when 28 then
|
1871
|
-
# line
|
1875
|
+
# line 313 "lib/oga/xpath/lexer.rl"
|
1872
1876
|
begin
|
1873
1877
|
add_token(:T_OR) end
|
1874
|
-
# line
|
1878
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1875
1879
|
begin
|
1876
1880
|
te = p
|
1877
1881
|
p = p - 1; end
|
1878
1882
|
when 26 then
|
1879
|
-
# line
|
1883
|
+
# line 314 "lib/oga/xpath/lexer.rl"
|
1880
1884
|
begin
|
1881
1885
|
add_token(:T_DIV) end
|
1882
|
-
# line
|
1886
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1883
1887
|
begin
|
1884
1888
|
te = p
|
1885
1889
|
p = p - 1; end
|
1886
1890
|
when 27 then
|
1887
|
-
# line
|
1891
|
+
# line 315 "lib/oga/xpath/lexer.rl"
|
1888
1892
|
begin
|
1889
1893
|
add_token(:T_MOD) end
|
1890
|
-
# line
|
1894
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1891
1895
|
begin
|
1892
1896
|
te = p
|
1893
1897
|
p = p - 1; end
|
1894
1898
|
when 23 then
|
1895
|
-
# line
|
1899
|
+
# line 316 "lib/oga/xpath/lexer.rl"
|
1896
1900
|
begin
|
1897
1901
|
add_token(:T_MUL) end
|
1898
|
-
# line
|
1902
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1899
1903
|
begin
|
1900
1904
|
te = p
|
1901
1905
|
p = p - 1; end
|
1902
1906
|
when 24 then
|
1903
|
-
# line
|
1907
|
+
# line 317 "lib/oga/xpath/lexer.rl"
|
1904
1908
|
begin
|
1905
1909
|
add_token(:T_SUB) end
|
1906
|
-
# line
|
1910
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1907
1911
|
begin
|
1908
1912
|
te = p
|
1909
1913
|
p = p - 1; end
|
@@ -1912,7 +1916,7 @@ p = p - 1; end
|
|
1912
1916
|
begin
|
1913
1917
|
te = p+1
|
1914
1918
|
end
|
1915
|
-
# line
|
1919
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1916
1920
|
begin
|
1917
1921
|
act = 1; end
|
1918
1922
|
when 4 then
|
@@ -1920,29 +1924,29 @@ act = 1; end
|
|
1920
1924
|
begin
|
1921
1925
|
te = p+1
|
1922
1926
|
end
|
1923
|
-
# line
|
1927
|
+
# line 213 "lib/oga/xpath/lexer.rl"
|
1924
1928
|
begin
|
1925
|
-
act =
|
1929
|
+
act = 6; end
|
1926
1930
|
when 5 then
|
1927
1931
|
# line 1 "NONE"
|
1928
1932
|
begin
|
1929
1933
|
te = p+1
|
1930
1934
|
end
|
1931
|
-
# line
|
1935
|
+
# line 219 "lib/oga/xpath/lexer.rl"
|
1932
1936
|
begin
|
1933
|
-
act =
|
1937
|
+
act = 7; end
|
1934
1938
|
when 34 then
|
1935
1939
|
# line 1 "NONE"
|
1936
1940
|
begin
|
1937
1941
|
te = p+1
|
1938
1942
|
end
|
1939
|
-
# line
|
1943
|
+
# line 296 "lib/oga/xpath/lexer.rl"
|
1940
1944
|
begin
|
1941
1945
|
add_token(:T_ADD) end
|
1942
|
-
# line
|
1946
|
+
# line 213 "lib/oga/xpath/lexer.rl"
|
1943
1947
|
begin
|
1944
|
-
act =
|
1945
|
-
# line
|
1948
|
+
act = 6; end
|
1949
|
+
# line 1949 "lib/oga/xpath/lexer.rb"
|
1946
1950
|
end
|
1947
1951
|
end
|
1948
1952
|
end
|
@@ -1956,7 +1960,7 @@ ts = nil; end
|
|
1956
1960
|
begin
|
1957
1961
|
act = 0
|
1958
1962
|
end
|
1959
|
-
# line
|
1963
|
+
# line 1963 "lib/oga/xpath/lexer.rb"
|
1960
1964
|
end
|
1961
1965
|
|
1962
1966
|
if cs == 0
|
@@ -2036,7 +2040,7 @@ end
|
|
2036
2040
|
end
|
2037
2041
|
|
2038
2042
|
|
2039
|
-
# line
|
2043
|
+
# line 379 "lib/oga/xpath/lexer.rl"
|
2040
2044
|
|
2041
2045
|
end # Lexer
|
2042
2046
|
end # XPath
|