oga 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -0
- data/doc/changelog.md +108 -0
- data/ext/c/lexer.c +63 -48
- data/ext/java/org/liboga/xml/Lexer.java +87 -101
- data/ext/ragel/base_lexer.rl +8 -0
- data/lib/oga.rb +7 -1
- data/lib/oga/html/sax_parser.rb +18 -0
- data/lib/oga/oga.rb +30 -0
- data/lib/oga/version.rb +1 -1
- data/lib/oga/xml/cdata.rb +0 -7
- data/lib/oga/xml/comment.rb +0 -7
- data/lib/oga/xml/doctype.rb +0 -7
- data/lib/oga/xml/document.rb +7 -1
- data/lib/oga/xml/element.rb +43 -18
- data/lib/oga/xml/html_void_elements.rb +28 -0
- data/lib/oga/xml/lexer.rb +1 -26
- data/lib/oga/xml/node.rb +0 -7
- data/lib/oga/xml/parser.rb +34 -2
- data/lib/oga/xml/pull_parser.rb +17 -3
- data/lib/oga/xml/sax_parser.rb +63 -0
- data/lib/oga/xml/text.rb +1 -6
- data/lib/oga/xml/xml_declaration.rb +0 -7
- data/lib/oga/xpath/evaluator.rb +3 -2
- data/lib/oga/xpath/lexer.rb +75 -71
- data/lib/oga/xpath/parser.rb +65 -60
- metadata +5 -2
@@ -0,0 +1,63 @@
|
|
1
|
+
module Oga
|
2
|
+
module XML
|
3
|
+
##
|
4
|
+
# The SaxParser class provides the basic interface for writing custom SAX
|
5
|
+
# parsers. All callback methods defined in {Oga::XML::Parser} are delegated
|
6
|
+
# to a dedicated handler class.
|
7
|
+
#
|
8
|
+
# To write a custom handler for the SAX parser, create a class that
|
9
|
+
# implements one (or many) of the following callback methods:
|
10
|
+
#
|
11
|
+
# * `on_document`
|
12
|
+
# * `on_doctype`
|
13
|
+
# * `on_cdata`
|
14
|
+
# * `on_comment`
|
15
|
+
# * `on_proc_ins`
|
16
|
+
# * `on_xml_decl`
|
17
|
+
# * `on_text`
|
18
|
+
# * `on_element`
|
19
|
+
# * `on_element_children`
|
20
|
+
# * `after_element`
|
21
|
+
#
|
22
|
+
# For example:
|
23
|
+
#
|
24
|
+
# class SaxHandler
|
25
|
+
# def on_element(namespace, name, attrs = {})
|
26
|
+
# puts name
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# You can then use it as following:
|
31
|
+
#
|
32
|
+
# handler = SaxHandler.new
|
33
|
+
# parser = Oga::XML::SaxParser.new(handler, '<foo />')
|
34
|
+
#
|
35
|
+
# parser.parse
|
36
|
+
#
|
37
|
+
# For information on the callback arguments see the documentation of the
|
38
|
+
# corresponding methods in {Oga::XML::Parser}.
|
39
|
+
#
|
40
|
+
class SaxParser < Parser
|
41
|
+
##
|
42
|
+
# @param [Object] handler The SAX handler to delegate callbacks to.
|
43
|
+
# @see [Oga::XML::Parser#initialize]
|
44
|
+
#
|
45
|
+
def initialize(handler, *args)
|
46
|
+
@handler = handler
|
47
|
+
|
48
|
+
super(*args)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Delegate all callbacks to the handler object.
|
52
|
+
instance_methods.grep(/^(on_|after_)/).each do |method|
|
53
|
+
eval <<-EOF, nil, __FILE__, __LINE__ + 1
|
54
|
+
def #{method}(*args)
|
55
|
+
@handler.#{method}(*args) if @handler.respond_to?(:#{method})
|
56
|
+
|
57
|
+
return
|
58
|
+
end
|
59
|
+
EOF
|
60
|
+
end
|
61
|
+
end # SaxParser
|
62
|
+
end # XML
|
63
|
+
end # Oga
|
data/lib/oga/xml/text.rb
CHANGED
data/lib/oga/xpath/evaluator.rb
CHANGED
@@ -131,7 +131,8 @@ module Oga
|
|
131
131
|
context = XML::NodeSet.new([@document])
|
132
132
|
end
|
133
133
|
|
134
|
-
return
|
134
|
+
# If the expression is just "/" we'll just return the current context.
|
135
|
+
return ast_node.children.empty? ? context : on_path(ast_node, context)
|
135
136
|
end
|
136
137
|
|
137
138
|
##
|
@@ -1188,7 +1189,7 @@ module Oga
|
|
1188
1189
|
# This function call returns the substring of the 1st argument that occurs
|
1189
1190
|
# after the string given in the 2nd argument. For example:
|
1190
1191
|
#
|
1191
|
-
# substring-
|
1192
|
+
# substring-after("2014-08-25", "-")
|
1192
1193
|
#
|
1193
1194
|
# This would return "08-25" as it occurs after the first "-".
|
1194
1195
|
#
|
data/lib/oga/xpath/lexer.rb
CHANGED
@@ -1588,7 +1588,7 @@ ts = p
|
|
1588
1588
|
begin
|
1589
1589
|
add_token(:T_SLASH) end
|
1590
1590
|
when 33 then
|
1591
|
-
# line
|
1591
|
+
# line 296 "lib/oga/xpath/lexer.rl"
|
1592
1592
|
begin
|
1593
1593
|
add_token(:T_ADD) end
|
1594
1594
|
when 12 then
|
@@ -1597,32 +1597,20 @@ ts = p
|
|
1597
1597
|
te = p+1
|
1598
1598
|
end
|
1599
1599
|
when 11 then
|
1600
|
-
# line
|
1600
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1601
1601
|
begin
|
1602
1602
|
te = p+1
|
1603
1603
|
end
|
1604
1604
|
when 8 then
|
1605
|
-
# line
|
1605
|
+
# line 349 "lib/oga/xpath/lexer.rl"
|
1606
1606
|
begin
|
1607
1607
|
te = p+1
|
1608
1608
|
begin
|
1609
1609
|
emit(:T_TYPE_TEST, ts, te - 2)
|
1610
1610
|
end
|
1611
1611
|
end
|
1612
|
-
when 20 then
|
1613
|
-
# line 365 "lib/oga/xpath/lexer.rl"
|
1614
|
-
begin
|
1615
|
-
te = p+1
|
1616
|
-
begin add_token(:T_LBRACK) end
|
1617
|
-
end
|
1618
|
-
when 21 then
|
1619
|
-
# line 366 "lib/oga/xpath/lexer.rl"
|
1620
|
-
begin
|
1621
|
-
te = p+1
|
1622
|
-
begin add_token(:T_RBRACK) end
|
1623
|
-
end
|
1624
1612
|
when 2 then
|
1625
|
-
# line
|
1613
|
+
# line 238 "lib/oga/xpath/lexer.rl"
|
1626
1614
|
begin
|
1627
1615
|
te = p+1
|
1628
1616
|
begin
|
@@ -1630,7 +1618,7 @@ te = p+1
|
|
1630
1618
|
end
|
1631
1619
|
end
|
1632
1620
|
when 7 then
|
1633
|
-
# line
|
1621
|
+
# line 260 "lib/oga/xpath/lexer.rl"
|
1634
1622
|
begin
|
1635
1623
|
te = p+1
|
1636
1624
|
begin
|
@@ -1638,7 +1626,7 @@ te = p+1
|
|
1638
1626
|
end
|
1639
1627
|
end
|
1640
1628
|
when 19 then
|
1641
|
-
# line
|
1629
|
+
# line 272 "lib/oga/xpath/lexer.rl"
|
1642
1630
|
begin
|
1643
1631
|
te = p+1
|
1644
1632
|
begin
|
@@ -1659,12 +1647,12 @@ te = p+1
|
|
1659
1647
|
end
|
1660
1648
|
end
|
1661
1649
|
when 22 then
|
1662
|
-
# line
|
1650
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1663
1651
|
begin
|
1664
1652
|
te = p
|
1665
1653
|
p = p - 1; end
|
1666
1654
|
when 30 then
|
1667
|
-
# line
|
1655
|
+
# line 362 "lib/oga/xpath/lexer.rl"
|
1668
1656
|
begin
|
1669
1657
|
te = p
|
1670
1658
|
p = p - 1; begin
|
@@ -1672,7 +1660,7 @@ p = p - 1; begin
|
|
1672
1660
|
end
|
1673
1661
|
end
|
1674
1662
|
when 35 then
|
1675
|
-
# line
|
1663
|
+
# line 213 "lib/oga/xpath/lexer.rl"
|
1676
1664
|
begin
|
1677
1665
|
te = p
|
1678
1666
|
p = p - 1; begin
|
@@ -1682,7 +1670,7 @@ p = p - 1; begin
|
|
1682
1670
|
end
|
1683
1671
|
end
|
1684
1672
|
when 36 then
|
1685
|
-
# line
|
1673
|
+
# line 219 "lib/oga/xpath/lexer.rl"
|
1686
1674
|
begin
|
1687
1675
|
te = p
|
1688
1676
|
p = p - 1; begin
|
@@ -1692,7 +1680,7 @@ p = p - 1; begin
|
|
1692
1680
|
end
|
1693
1681
|
end
|
1694
1682
|
when 37 then
|
1695
|
-
# line
|
1683
|
+
# line 272 "lib/oga/xpath/lexer.rl"
|
1696
1684
|
begin
|
1697
1685
|
te = p
|
1698
1686
|
p = p - 1; begin
|
@@ -1713,7 +1701,7 @@ p = p - 1; begin
|
|
1713
1701
|
end
|
1714
1702
|
end
|
1715
1703
|
when 31 then
|
1716
|
-
# line
|
1704
|
+
# line 199 "lib/oga/xpath/lexer.rl"
|
1717
1705
|
begin
|
1718
1706
|
te = p
|
1719
1707
|
p = p - 1; begin
|
@@ -1721,12 +1709,12 @@ p = p - 1; begin
|
|
1721
1709
|
end
|
1722
1710
|
end
|
1723
1711
|
when 1 then
|
1724
|
-
# line
|
1712
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1725
1713
|
begin
|
1726
1714
|
begin p = ((te))-1; end
|
1727
1715
|
end
|
1728
1716
|
when 6 then
|
1729
|
-
# line
|
1717
|
+
# line 199 "lib/oga/xpath/lexer.rl"
|
1730
1718
|
begin
|
1731
1719
|
begin p = ((te))-1; end
|
1732
1720
|
begin
|
@@ -1744,14 +1732,14 @@ p = p - 1; begin
|
|
1744
1732
|
next
|
1745
1733
|
end
|
1746
1734
|
end
|
1747
|
-
when
|
1735
|
+
when 6 then
|
1748
1736
|
begin begin p = ((te))-1; end
|
1749
1737
|
|
1750
1738
|
value = slice_input(ts, te).to_i
|
1751
1739
|
|
1752
1740
|
add_token(:T_INT, value)
|
1753
1741
|
end
|
1754
|
-
when
|
1742
|
+
when 7 then
|
1755
1743
|
begin begin p = ((te))-1; end
|
1756
1744
|
|
1757
1745
|
value = slice_input(ts, te).to_f
|
@@ -1767,7 +1755,7 @@ end
|
|
1767
1755
|
# line 185 "lib/oga/xpath/lexer.rl"
|
1768
1756
|
begin
|
1769
1757
|
add_token(:T_LPAREN) end
|
1770
|
-
# line
|
1758
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1771
1759
|
begin
|
1772
1760
|
te = p+1
|
1773
1761
|
end
|
@@ -1775,7 +1763,7 @@ te = p+1
|
|
1775
1763
|
# line 186 "lib/oga/xpath/lexer.rl"
|
1776
1764
|
begin
|
1777
1765
|
add_token(:T_RPAREN) end
|
1778
|
-
# line
|
1766
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1779
1767
|
begin
|
1780
1768
|
te = p+1
|
1781
1769
|
end
|
@@ -1783,7 +1771,7 @@ te = p+1
|
|
1783
1771
|
# line 187 "lib/oga/xpath/lexer.rl"
|
1784
1772
|
begin
|
1785
1773
|
add_token(:T_COMMA) end
|
1786
|
-
# line
|
1774
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1787
1775
|
begin
|
1788
1776
|
te = p+1
|
1789
1777
|
end
|
@@ -1791,119 +1779,135 @@ te = p+1
|
|
1791
1779
|
# line 188 "lib/oga/xpath/lexer.rl"
|
1792
1780
|
begin
|
1793
1781
|
add_token(:T_COLON) end
|
1794
|
-
# line
|
1782
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1783
|
+
begin
|
1784
|
+
te = p+1
|
1785
|
+
end
|
1786
|
+
when 20 then
|
1787
|
+
# line 189 "lib/oga/xpath/lexer.rl"
|
1788
|
+
begin
|
1789
|
+
add_token(:T_LBRACK) end
|
1790
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1791
|
+
begin
|
1792
|
+
te = p+1
|
1793
|
+
end
|
1794
|
+
when 21 then
|
1795
|
+
# line 190 "lib/oga/xpath/lexer.rl"
|
1796
|
+
begin
|
1797
|
+
add_token(:T_RBRACK) end
|
1798
|
+
# line 368 "lib/oga/xpath/lexer.rl"
|
1795
1799
|
begin
|
1796
1800
|
te = p+1
|
1797
1801
|
end
|
1798
1802
|
when 43 then
|
1799
|
-
# line
|
1803
|
+
# line 295 "lib/oga/xpath/lexer.rl"
|
1800
1804
|
begin
|
1801
1805
|
add_token(:T_PIPE) end
|
1802
|
-
# line
|
1806
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1803
1807
|
begin
|
1804
1808
|
te = p
|
1805
1809
|
p = p - 1; end
|
1806
1810
|
when 32 then
|
1807
|
-
# line
|
1811
|
+
# line 296 "lib/oga/xpath/lexer.rl"
|
1808
1812
|
begin
|
1809
1813
|
add_token(:T_ADD) end
|
1810
|
-
# line
|
1814
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1811
1815
|
begin
|
1812
1816
|
te = p
|
1813
1817
|
p = p - 1; end
|
1814
1818
|
when 40 then
|
1815
|
-
# line
|
1819
|
+
# line 297 "lib/oga/xpath/lexer.rl"
|
1816
1820
|
begin
|
1817
1821
|
add_token(:T_EQ) end
|
1818
|
-
# line
|
1822
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1819
1823
|
begin
|
1820
1824
|
te = p
|
1821
1825
|
p = p - 1; end
|
1822
1826
|
when 29 then
|
1823
|
-
# line
|
1827
|
+
# line 298 "lib/oga/xpath/lexer.rl"
|
1824
1828
|
begin
|
1825
1829
|
add_token(:T_NEQ) end
|
1826
|
-
# line
|
1830
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1827
1831
|
begin
|
1828
1832
|
te = p
|
1829
1833
|
p = p - 1; end
|
1830
1834
|
when 38 then
|
1831
|
-
# line
|
1835
|
+
# line 299 "lib/oga/xpath/lexer.rl"
|
1832
1836
|
begin
|
1833
1837
|
add_token(:T_LT) end
|
1834
|
-
# line
|
1838
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1835
1839
|
begin
|
1836
1840
|
te = p
|
1837
1841
|
p = p - 1; end
|
1838
1842
|
when 41 then
|
1839
|
-
# line
|
1843
|
+
# line 300 "lib/oga/xpath/lexer.rl"
|
1840
1844
|
begin
|
1841
1845
|
add_token(:T_GT) end
|
1842
|
-
# line
|
1846
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1843
1847
|
begin
|
1844
1848
|
te = p
|
1845
1849
|
p = p - 1; end
|
1846
1850
|
when 39 then
|
1847
|
-
# line
|
1851
|
+
# line 301 "lib/oga/xpath/lexer.rl"
|
1848
1852
|
begin
|
1849
1853
|
add_token(:T_LTE) end
|
1850
|
-
# line
|
1854
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1851
1855
|
begin
|
1852
1856
|
te = p
|
1853
1857
|
p = p - 1; end
|
1854
1858
|
when 42 then
|
1855
|
-
# line
|
1859
|
+
# line 302 "lib/oga/xpath/lexer.rl"
|
1856
1860
|
begin
|
1857
1861
|
add_token(:T_GTE) end
|
1858
|
-
# line
|
1862
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1859
1863
|
begin
|
1860
1864
|
te = p
|
1861
1865
|
p = p - 1; end
|
1862
1866
|
when 25 then
|
1863
|
-
# line
|
1867
|
+
# line 312 "lib/oga/xpath/lexer.rl"
|
1864
1868
|
begin
|
1865
1869
|
add_token(:T_AND) end
|
1866
|
-
# line
|
1870
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1867
1871
|
begin
|
1868
1872
|
te = p
|
1869
1873
|
p = p - 1; end
|
1870
1874
|
when 28 then
|
1871
|
-
# line
|
1875
|
+
# line 313 "lib/oga/xpath/lexer.rl"
|
1872
1876
|
begin
|
1873
1877
|
add_token(:T_OR) end
|
1874
|
-
# line
|
1878
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1875
1879
|
begin
|
1876
1880
|
te = p
|
1877
1881
|
p = p - 1; end
|
1878
1882
|
when 26 then
|
1879
|
-
# line
|
1883
|
+
# line 314 "lib/oga/xpath/lexer.rl"
|
1880
1884
|
begin
|
1881
1885
|
add_token(:T_DIV) end
|
1882
|
-
# line
|
1886
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1883
1887
|
begin
|
1884
1888
|
te = p
|
1885
1889
|
p = p - 1; end
|
1886
1890
|
when 27 then
|
1887
|
-
# line
|
1891
|
+
# line 315 "lib/oga/xpath/lexer.rl"
|
1888
1892
|
begin
|
1889
1893
|
add_token(:T_MOD) end
|
1890
|
-
# line
|
1894
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1891
1895
|
begin
|
1892
1896
|
te = p
|
1893
1897
|
p = p - 1; end
|
1894
1898
|
when 23 then
|
1895
|
-
# line
|
1899
|
+
# line 316 "lib/oga/xpath/lexer.rl"
|
1896
1900
|
begin
|
1897
1901
|
add_token(:T_MUL) end
|
1898
|
-
# line
|
1902
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1899
1903
|
begin
|
1900
1904
|
te = p
|
1901
1905
|
p = p - 1; end
|
1902
1906
|
when 24 then
|
1903
|
-
# line
|
1907
|
+
# line 317 "lib/oga/xpath/lexer.rl"
|
1904
1908
|
begin
|
1905
1909
|
add_token(:T_SUB) end
|
1906
|
-
# line
|
1910
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1907
1911
|
begin
|
1908
1912
|
te = p
|
1909
1913
|
p = p - 1; end
|
@@ -1912,7 +1916,7 @@ p = p - 1; end
|
|
1912
1916
|
begin
|
1913
1917
|
te = p+1
|
1914
1918
|
end
|
1915
|
-
# line
|
1919
|
+
# line 367 "lib/oga/xpath/lexer.rl"
|
1916
1920
|
begin
|
1917
1921
|
act = 1; end
|
1918
1922
|
when 4 then
|
@@ -1920,29 +1924,29 @@ act = 1; end
|
|
1920
1924
|
begin
|
1921
1925
|
te = p+1
|
1922
1926
|
end
|
1923
|
-
# line
|
1927
|
+
# line 213 "lib/oga/xpath/lexer.rl"
|
1924
1928
|
begin
|
1925
|
-
act =
|
1929
|
+
act = 6; end
|
1926
1930
|
when 5 then
|
1927
1931
|
# line 1 "NONE"
|
1928
1932
|
begin
|
1929
1933
|
te = p+1
|
1930
1934
|
end
|
1931
|
-
# line
|
1935
|
+
# line 219 "lib/oga/xpath/lexer.rl"
|
1932
1936
|
begin
|
1933
|
-
act =
|
1937
|
+
act = 7; end
|
1934
1938
|
when 34 then
|
1935
1939
|
# line 1 "NONE"
|
1936
1940
|
begin
|
1937
1941
|
te = p+1
|
1938
1942
|
end
|
1939
|
-
# line
|
1943
|
+
# line 296 "lib/oga/xpath/lexer.rl"
|
1940
1944
|
begin
|
1941
1945
|
add_token(:T_ADD) end
|
1942
|
-
# line
|
1946
|
+
# line 213 "lib/oga/xpath/lexer.rl"
|
1943
1947
|
begin
|
1944
|
-
act =
|
1945
|
-
# line
|
1948
|
+
act = 6; end
|
1949
|
+
# line 1949 "lib/oga/xpath/lexer.rb"
|
1946
1950
|
end
|
1947
1951
|
end
|
1948
1952
|
end
|
@@ -1956,7 +1960,7 @@ ts = nil; end
|
|
1956
1960
|
begin
|
1957
1961
|
act = 0
|
1958
1962
|
end
|
1959
|
-
# line
|
1963
|
+
# line 1963 "lib/oga/xpath/lexer.rb"
|
1960
1964
|
end
|
1961
1965
|
|
1962
1966
|
if cs == 0
|
@@ -2036,7 +2040,7 @@ end
|
|
2036
2040
|
end
|
2037
2041
|
|
2038
2042
|
|
2039
|
-
# line
|
2043
|
+
# line 379 "lib/oga/xpath/lexer.rl"
|
2040
2044
|
|
2041
2045
|
end # Lexer
|
2042
2046
|
end # XPath
|