webtranslateit-hpricot 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +122 -0
- data/COPYING +18 -0
- data/README.md +295 -0
- data/Rakefile +237 -0
- data/ext/fast_xs/FastXsService.java +1123 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +210 -0
- data/ext/hpricot_scan/HpricotCss.java +850 -0
- data/ext/hpricot_scan/HpricotScanService.java +2085 -0
- data/ext/hpricot_scan/MANIFEST +0 -0
- data/ext/hpricot_scan/extconf.rb +9 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3511 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +120 -0
- data/ext/hpricot_scan/hpricot_scan.c +6848 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
- data/ext/hpricot_scan/hpricot_scan.rl +911 -0
- data/extras/hpricot.png +0 -0
- data/hpricot.gemspec +18 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +217 -0
- data/lib/hpricot/elements.rb +514 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +95 -0
- data/lib/hpricot.rb +26 -0
- data/setup.rb +1585 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +496 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +106 -0
|
File without changes
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
%%{
|
|
2
|
+
|
|
3
|
+
machine hpricot_common;
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# HTML tokens
|
|
7
|
+
# (a blatant rip from HTree)
|
|
8
|
+
#
|
|
9
|
+
newline = '\n' @{curline += 1;} ;
|
|
10
|
+
NameChar = [\-A-Za-z0-9._:?] ;
|
|
11
|
+
Name = [A-Za-z_:] NameChar* ;
|
|
12
|
+
StartComment = "<!--" ;
|
|
13
|
+
EndComment = "-->" ;
|
|
14
|
+
StartCdata = "<![CDATA[" ;
|
|
15
|
+
EndCdata = "]]>" ;
|
|
16
|
+
|
|
17
|
+
NameCap = Name >_tag %tag;
|
|
18
|
+
NameAttr = NameChar+ >_akey %akey ;
|
|
19
|
+
Q1Char = [^'] ;
|
|
20
|
+
Q1Attr = Q1Char* >_aval %aval ;
|
|
21
|
+
Q2Char = [^"] ;
|
|
22
|
+
Q2Attr = Q2Char* >_aval %aval ;
|
|
23
|
+
UnqAttr = ( space >_aval | [^ \t\r\n<>"'] >_aval [^ \t\r\n<>]* %aunq ) ;
|
|
24
|
+
Nmtoken = NameChar+ >_akey %akey ;
|
|
25
|
+
|
|
26
|
+
Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ;
|
|
27
|
+
AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ;
|
|
28
|
+
AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ;
|
|
29
|
+
StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">";
|
|
30
|
+
EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ;
|
|
31
|
+
|
|
32
|
+
EndTag = "</" NameCap space* ">" ;
|
|
33
|
+
XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ;
|
|
34
|
+
XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ;
|
|
35
|
+
XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ;
|
|
36
|
+
XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ;
|
|
37
|
+
XmlYesNo = ("yes" | "no") >_aval %xmlsd ;
|
|
38
|
+
XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ;
|
|
39
|
+
XmlDecl = "<?xml" XmlVersionInfo XmlEncodingDecl? XmlSDDecl? space* "?"? ">" ;
|
|
40
|
+
|
|
41
|
+
SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ;
|
|
42
|
+
PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid '"' |
|
|
43
|
+
"'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ;
|
|
44
|
+
ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ;
|
|
45
|
+
DocType = "<!DOCTYPE" space+ NameCap (space+ ExternalID)? space* ("[" [^\]]* "]" space*)? ">" ;
|
|
46
|
+
StartXmlProcIns = "<?" Name >{ TEXT_PASS(); } space+ ;
|
|
47
|
+
EndXmlProcIns = "?"? ">" ;
|
|
48
|
+
|
|
49
|
+
html_comment := |*
|
|
50
|
+
EndComment @{ EBLK(comment, 3); fgoto main; };
|
|
51
|
+
any | newline { TEXT_PASS(); };
|
|
52
|
+
*|;
|
|
53
|
+
|
|
54
|
+
html_cdata := |*
|
|
55
|
+
EndCdata @{ EBLK(cdata, 3); fgoto main; };
|
|
56
|
+
any | newline { TEXT_PASS(); };
|
|
57
|
+
*|;
|
|
58
|
+
|
|
59
|
+
html_procins := |*
|
|
60
|
+
EndXmlProcIns @{ EBLK(procins, 2); fgoto main; };
|
|
61
|
+
any | newline { TEXT_PASS(); };
|
|
62
|
+
*|;
|
|
63
|
+
|
|
64
|
+
main := |*
|
|
65
|
+
XmlDecl >newEle { ELE(xmldecl); };
|
|
66
|
+
DocType >newEle { ELE(doctype); };
|
|
67
|
+
StartXmlProcIns >newEle { fgoto html_procins; };
|
|
68
|
+
StartTag >newEle { ELE(stag); };
|
|
69
|
+
EndTag >newEle { ELE(etag); };
|
|
70
|
+
EmptyTag >newEle { ELE(emptytag); };
|
|
71
|
+
StartComment >newEle { fgoto html_comment; };
|
|
72
|
+
StartCdata >newEle { fgoto html_cdata; };
|
|
73
|
+
any | newline { TEXT_PASS(); };
|
|
74
|
+
*|;
|
|
75
|
+
|
|
76
|
+
}%%;
|