webtranslateit-hpricot 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +122 -0
- data/COPYING +18 -0
- data/README.md +295 -0
- data/Rakefile +237 -0
- data/ext/fast_xs/FastXsService.java +1123 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +210 -0
- data/ext/hpricot_scan/HpricotCss.java +850 -0
- data/ext/hpricot_scan/HpricotScanService.java +2085 -0
- data/ext/hpricot_scan/MANIFEST +0 -0
- data/ext/hpricot_scan/extconf.rb +9 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3511 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +120 -0
- data/ext/hpricot_scan/hpricot_scan.c +6848 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
- data/ext/hpricot_scan/hpricot_scan.rl +911 -0
- data/extras/hpricot.png +0 -0
- data/hpricot.gemspec +18 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +217 -0
- data/lib/hpricot/elements.rb +514 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +95 -0
- data/lib/hpricot.rb +26 -0
- data/setup.rb +1585 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +496 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +106 -0
@@ -0,0 +1,155 @@
|
|
1
|
+
import java.io.IOException;
|
2
|
+
|
3
|
+
import org.jruby.Ruby;
|
4
|
+
import org.jruby.RubyArray;
|
5
|
+
import org.jruby.RubyClass;
|
6
|
+
import org.jruby.RubyHash;
|
7
|
+
import org.jruby.RubyModule;
|
8
|
+
import org.jruby.RubyNumeric;
|
9
|
+
import org.jruby.RubyObject;
|
10
|
+
import org.jruby.RubyObjectAdapter;
|
11
|
+
import org.jruby.RubyRegexp;
|
12
|
+
import org.jruby.RubyString;
|
13
|
+
import org.jruby.anno.JRubyMethod;
|
14
|
+
import org.jruby.exceptions.RaiseException;
|
15
|
+
import org.jruby.javasupport.JavaEmbedUtils;
|
16
|
+
import org.jruby.runtime.Arity;
|
17
|
+
import org.jruby.runtime.Block;
|
18
|
+
import org.jruby.runtime.ObjectAllocator;
|
19
|
+
import org.jruby.runtime.ThreadContext;
|
20
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
21
|
+
import org.jruby.runtime.callback.Callback;
|
22
|
+
import org.jruby.exceptions.RaiseException;
|
23
|
+
import org.jruby.runtime.load.BasicLibraryService;
|
24
|
+
import org.jruby.util.ByteList;
|
25
|
+
|
26
|
+
public class HpricotCss {
|
27
|
+
public void FILTER(String id) {
|
28
|
+
IRubyObject[] args = new IRubyObject[fargs];
|
29
|
+
System.arraycopy(fvals, 0, args, 0, fargs);
|
30
|
+
mod.callMethod(ctx, id, args);
|
31
|
+
tmpt.rb_clear();
|
32
|
+
fargs = 1;
|
33
|
+
}
|
34
|
+
|
35
|
+
public void FILTERAUTO() {
|
36
|
+
try {
|
37
|
+
FILTER(new String(data, ts, te - ts, "ISO-8859-1"));
|
38
|
+
} catch(java.io.UnsupportedEncodingException e) {}
|
39
|
+
}
|
40
|
+
|
41
|
+
public void PUSH(int aps, int ape) {
|
42
|
+
RubyString str = RubyString.newString(runtime, data, aps, ape-aps);
|
43
|
+
fvals[fargs++] = str;
|
44
|
+
tmpt.append(str);
|
45
|
+
}
|
46
|
+
|
47
|
+
private IRubyObject self, mod, str, node;
|
48
|
+
private int cs, act, eof, p, pe, ts, te, aps, ape, aps2, ape2;
|
49
|
+
private byte[] data;
|
50
|
+
|
51
|
+
private int fargs = 1;
|
52
|
+
private IRubyObject[] fvals = new IRubyObject[6];
|
53
|
+
private RubyArray focus;
|
54
|
+
private RubyArray tmpt;
|
55
|
+
private Ruby runtime;
|
56
|
+
private ThreadContext ctx;
|
57
|
+
|
58
|
+
public HpricotCss(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
|
59
|
+
this.self = self;
|
60
|
+
this.mod = mod;
|
61
|
+
this.str = str;
|
62
|
+
this.node = node;
|
63
|
+
this.runtime = self.getRuntime();
|
64
|
+
this.ctx = runtime.getCurrentContext();
|
65
|
+
this.focus = RubyArray.newArray(runtime, node);
|
66
|
+
this.tmpt = runtime.newArray();
|
67
|
+
|
68
|
+
fvals[0] = focus;
|
69
|
+
|
70
|
+
if(!(str instanceof RubyString)) {
|
71
|
+
throw runtime.newArgumentError("bad CSS selector, String only please.");
|
72
|
+
}
|
73
|
+
|
74
|
+
ByteList bl = ((RubyString)str).getByteList();
|
75
|
+
|
76
|
+
data = bl.bytes;
|
77
|
+
p = bl.begin;
|
78
|
+
pe = p + bl.realSize;
|
79
|
+
eof = pe;
|
80
|
+
}
|
81
|
+
|
82
|
+
%%{
|
83
|
+
machine hpricot_css;
|
84
|
+
|
85
|
+
action a {
|
86
|
+
aps = p;
|
87
|
+
}
|
88
|
+
|
89
|
+
action b {
|
90
|
+
ape = p;
|
91
|
+
PUSH(aps, ape);
|
92
|
+
}
|
93
|
+
|
94
|
+
action c {
|
95
|
+
ape = p;
|
96
|
+
aps2 = p;
|
97
|
+
}
|
98
|
+
|
99
|
+
action d {
|
100
|
+
ape2 = p;
|
101
|
+
PUSH(aps, ape);
|
102
|
+
PUSH(aps2, ape2);
|
103
|
+
}
|
104
|
+
|
105
|
+
commas = space* "," space*;
|
106
|
+
traverse = [>+~];
|
107
|
+
sdot = "\\.";
|
108
|
+
utfw = alnum | "_" | "-" |
|
109
|
+
(0xc4 0xa8..0xbf) | (0xc5..0xdf 0x80..0xbf) |
|
110
|
+
(0xe0..0xef 0x80..0xbf 0x80..0xbf) |
|
111
|
+
(0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
|
112
|
+
utfword = utfw+;
|
113
|
+
utfname = (utfw | sdot)+;
|
114
|
+
quote1 = "'" [^']* "'";
|
115
|
+
quote2 = '"' [^"]* '"';
|
116
|
+
|
117
|
+
cssid = "#" %a utfname %b;
|
118
|
+
cssclass = "." %a utfname %b;
|
119
|
+
cssname = "[name=" %a utfname %b "]";
|
120
|
+
cssattr = "[" %a utfname %c space* [^ \n\t]? "=" %d space* (quote1 | quote2 | [^\]]+) "]";
|
121
|
+
csstag = utfname >a %b;
|
122
|
+
cssmod = ("even" | "odd" | (digit | "n" | "+" | "-")* );
|
123
|
+
csschild = ":" %a ("only" | "nth" | "last" | "first") "-child" %b ("(" %a cssmod %b ")")?;
|
124
|
+
csspos = ":" %a ("nth" | "eq" | "gt" | "lt" | "first" | "last" | "even" | "odd") %b ("(" %a digit+ %b ")")?;
|
125
|
+
pseudop = "(" [^)]+ ")";
|
126
|
+
pseudoq = "'" (pseudop+ | [^'()]*) "'" |
|
127
|
+
'"' (pseudop+ | [^"()]*) '"' |
|
128
|
+
(pseudop+ | [^"()]*);
|
129
|
+
pseudo = ":" %a utfname %b ("(" %a pseudoq %b ")")?;
|
130
|
+
|
131
|
+
main := |*
|
132
|
+
cssid => { FILTER("ID"); };
|
133
|
+
cssclass => { FILTER("CLASS"); };
|
134
|
+
cssname => { FILTER("NAME"); };
|
135
|
+
cssattr => { FILTER("ATTR"); };
|
136
|
+
csstag => { FILTER("TAG"); };
|
137
|
+
cssmod => { FILTER("MOD"); };
|
138
|
+
csschild => { FILTER("CHILD"); };
|
139
|
+
csspos => { FILTER("POS"); };
|
140
|
+
pseudo => { FILTER("PSUEDO"); };
|
141
|
+
commas => { focus = RubyArray.newArray(runtime, node); };
|
142
|
+
traverse => { FILTERAUTO(); };
|
143
|
+
space;
|
144
|
+
*|;
|
145
|
+
|
146
|
+
write data nofinal;
|
147
|
+
}%%
|
148
|
+
|
149
|
+
public IRubyObject scan() {
|
150
|
+
%% write init;
|
151
|
+
%% write exec;
|
152
|
+
|
153
|
+
return focus;
|
154
|
+
}
|
155
|
+
}
|
@@ -0,0 +1,120 @@
|
|
1
|
+
/*
|
2
|
+
* hpricot_css.rl
|
3
|
+
* ragel -C hpricot_css.rl -o hpricot_css.c
|
4
|
+
*
|
5
|
+
* Copyright (C) 2008 why the lucky stiff
|
6
|
+
*/
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#define FILTER(id) \
|
10
|
+
rb_funcall2(mod, rb_intern("" # id), fargs, fvals); \
|
11
|
+
rb_ary_clear(tmpt); \
|
12
|
+
fargs = 1
|
13
|
+
#define FILTERAUTO() \
|
14
|
+
char filt[10]; \
|
15
|
+
sprintf(filt, "%.*s", te - ts, ts); \
|
16
|
+
rb_funcall2(mod, rb_intern(filt), fargs, fvals); \
|
17
|
+
rb_ary_clear(tmpt); \
|
18
|
+
fargs = 1
|
19
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
20
|
+
#define STRNEW(a, len) rb_external_str_new((a), (len))
|
21
|
+
#else
|
22
|
+
#define STRNEW(a, len) rb_str_new((a), (len))
|
23
|
+
#endif
|
24
|
+
#define PUSH(aps, ape) rb_ary_push(tmpt, fvals[fargs++] = STRNEW(aps, ape - aps))
|
25
|
+
#define P(id) printf(id ": %.*s\n", te - ts, ts);
|
26
|
+
|
27
|
+
%%{
|
28
|
+
machine hpricot_css;
|
29
|
+
|
30
|
+
action a {
|
31
|
+
aps = p;
|
32
|
+
}
|
33
|
+
|
34
|
+
action b {
|
35
|
+
ape = p;
|
36
|
+
PUSH(aps, ape);
|
37
|
+
}
|
38
|
+
|
39
|
+
action c {
|
40
|
+
ape = p;
|
41
|
+
aps2 = p;
|
42
|
+
}
|
43
|
+
|
44
|
+
action d {
|
45
|
+
ape2 = p;
|
46
|
+
PUSH(aps, ape);
|
47
|
+
PUSH(aps2, ape2);
|
48
|
+
}
|
49
|
+
|
50
|
+
commas = space* "," space*;
|
51
|
+
traverse = [>+~];
|
52
|
+
sdot = "\\.";
|
53
|
+
utfw = alnum | "_" | "-" |
|
54
|
+
(0xc4 0xa8..0xbf) | (0xc5..0xdf 0x80..0xbf) |
|
55
|
+
(0xe0..0xef 0x80..0xbf 0x80..0xbf) |
|
56
|
+
(0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
|
57
|
+
utfword = utfw+;
|
58
|
+
utfname = (utfw | sdot)+;
|
59
|
+
quote1 = "'" [^']* "'";
|
60
|
+
quote2 = '"' [^"]* '"';
|
61
|
+
|
62
|
+
cssid = "#" %a utfname %b;
|
63
|
+
cssclass = "." %a utfname %b;
|
64
|
+
cssname = "[name=" %a utfname %b "]";
|
65
|
+
cssattr = "[" %a utfname %c space* [^ \n\t]? "=" %d space* (quote1 | quote2 | [^\]]+) "]";
|
66
|
+
csstag = utfname >a %b;
|
67
|
+
cssmod = ("even" | "odd" | (digit | "n" | "+" | "-")* );
|
68
|
+
csschild = ":" %a ("only" | "nth" | "last" | "first") "-child" %b ("(" %a cssmod %b ")")?;
|
69
|
+
csspos = ":" %a ("nth" | "eq" | "gt" | "lt" | "first" | "last" | "even" | "odd") %b ("(" %a digit+ %b ")")?;
|
70
|
+
pseudop = "(" [^)]+ ")";
|
71
|
+
pseudoq = "'" (pseudop+ | [^'()]*) "'" |
|
72
|
+
'"' (pseudop+ | [^"()]*) '"' |
|
73
|
+
(pseudop+ | [^"()]*);
|
74
|
+
pseudo = ":" %a utfname %b ("(" %a pseudoq %b ")")?;
|
75
|
+
|
76
|
+
main := |*
|
77
|
+
cssid => { FILTER(ID); };
|
78
|
+
cssclass => { FILTER(CLASS); };
|
79
|
+
cssname => { FILTER(NAME); };
|
80
|
+
cssattr => { FILTER(ATTR); };
|
81
|
+
csstag => { FILTER(TAG); };
|
82
|
+
cssmod => { FILTER(MOD); };
|
83
|
+
csschild => { FILTER(CHILD); };
|
84
|
+
csspos => { FILTER(POS); };
|
85
|
+
pseudo => { FILTER(PSUEDO); };
|
86
|
+
commas => { focus = rb_ary_new3(1, node); };
|
87
|
+
traverse => { FILTERAUTO(); };
|
88
|
+
space;
|
89
|
+
*|;
|
90
|
+
|
91
|
+
write data nofinal;
|
92
|
+
}%%
|
93
|
+
|
94
|
+
VALUE hpricot_css(VALUE self, VALUE mod, VALUE str, VALUE node)
|
95
|
+
{
|
96
|
+
int cs, act, eof;
|
97
|
+
char *p, *pe, *ts, *te, *aps, *ape, *aps2, *ape2;
|
98
|
+
|
99
|
+
int fargs = 1;
|
100
|
+
VALUE fvals[6];
|
101
|
+
VALUE focus = rb_ary_new3(1, node);
|
102
|
+
VALUE tmpt = rb_ary_new();
|
103
|
+
rb_gc_register_address(&focus);
|
104
|
+
rb_gc_register_address(&tmpt);
|
105
|
+
fvals[0] = focus;
|
106
|
+
|
107
|
+
if (TYPE(str) != T_STRING)
|
108
|
+
rb_raise(rb_eArgError, "bad CSS selector, String only please.");
|
109
|
+
|
110
|
+
StringValue(str);
|
111
|
+
p = RSTRING_PTR(str);
|
112
|
+
pe = p + RSTRING_LEN(str);
|
113
|
+
|
114
|
+
%% write init;
|
115
|
+
%% write exec;
|
116
|
+
|
117
|
+
rb_gc_unregister_address(&focus);
|
118
|
+
rb_gc_unregister_address(&tmpt);
|
119
|
+
return focus;
|
120
|
+
}
|