webtranslateit-hpricot 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
@@ -0,0 +1,155 @@
1
+ import java.io.IOException;
2
+
3
+ import org.jruby.Ruby;
4
+ import org.jruby.RubyArray;
5
+ import org.jruby.RubyClass;
6
+ import org.jruby.RubyHash;
7
+ import org.jruby.RubyModule;
8
+ import org.jruby.RubyNumeric;
9
+ import org.jruby.RubyObject;
10
+ import org.jruby.RubyObjectAdapter;
11
+ import org.jruby.RubyRegexp;
12
+ import org.jruby.RubyString;
13
+ import org.jruby.anno.JRubyMethod;
14
+ import org.jruby.exceptions.RaiseException;
15
+ import org.jruby.javasupport.JavaEmbedUtils;
16
+ import org.jruby.runtime.Arity;
17
+ import org.jruby.runtime.Block;
18
+ import org.jruby.runtime.ObjectAllocator;
19
+ import org.jruby.runtime.ThreadContext;
20
+ import org.jruby.runtime.builtin.IRubyObject;
21
+ import org.jruby.runtime.callback.Callback;
22
+ import org.jruby.exceptions.RaiseException;
23
+ import org.jruby.runtime.load.BasicLibraryService;
24
+ import org.jruby.util.ByteList;
25
+
26
+ public class HpricotCss {
27
+ public void FILTER(String id) {
28
+ IRubyObject[] args = new IRubyObject[fargs];
29
+ System.arraycopy(fvals, 0, args, 0, fargs);
30
+ mod.callMethod(ctx, id, args);
31
+ tmpt.rb_clear();
32
+ fargs = 1;
33
+ }
34
+
35
+ public void FILTERAUTO() {
36
+ try {
37
+ FILTER(new String(data, ts, te - ts, "ISO-8859-1"));
38
+ } catch(java.io.UnsupportedEncodingException e) {}
39
+ }
40
+
41
+ public void PUSH(int aps, int ape) {
42
+ RubyString str = RubyString.newString(runtime, data, aps, ape-aps);
43
+ fvals[fargs++] = str;
44
+ tmpt.append(str);
45
+ }
46
+
47
+ private IRubyObject self, mod, str, node;
48
+ private int cs, act, eof, p, pe, ts, te, aps, ape, aps2, ape2;
49
+ private byte[] data;
50
+
51
+ private int fargs = 1;
52
+ private IRubyObject[] fvals = new IRubyObject[6];
53
+ private RubyArray focus;
54
+ private RubyArray tmpt;
55
+ private Ruby runtime;
56
+ private ThreadContext ctx;
57
+
58
+ public HpricotCss(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
59
+ this.self = self;
60
+ this.mod = mod;
61
+ this.str = str;
62
+ this.node = node;
63
+ this.runtime = self.getRuntime();
64
+ this.ctx = runtime.getCurrentContext();
65
+ this.focus = RubyArray.newArray(runtime, node);
66
+ this.tmpt = runtime.newArray();
67
+
68
+ fvals[0] = focus;
69
+
70
+ if(!(str instanceof RubyString)) {
71
+ throw runtime.newArgumentError("bad CSS selector, String only please.");
72
+ }
73
+
74
+ ByteList bl = ((RubyString)str).getByteList();
75
+
76
+ data = bl.bytes;
77
+ p = bl.begin;
78
+ pe = p + bl.realSize;
79
+ eof = pe;
80
+ }
81
+
82
+ %%{
83
+ machine hpricot_css;
84
+
85
+ action a {
86
+ aps = p;
87
+ }
88
+
89
+ action b {
90
+ ape = p;
91
+ PUSH(aps, ape);
92
+ }
93
+
94
+ action c {
95
+ ape = p;
96
+ aps2 = p;
97
+ }
98
+
99
+ action d {
100
+ ape2 = p;
101
+ PUSH(aps, ape);
102
+ PUSH(aps2, ape2);
103
+ }
104
+
105
+ commas = space* "," space*;
106
+ traverse = [>+~];
107
+ sdot = "\\.";
108
+ utfw = alnum | "_" | "-" |
109
+ (0xc4 0xa8..0xbf) | (0xc5..0xdf 0x80..0xbf) |
110
+ (0xe0..0xef 0x80..0xbf 0x80..0xbf) |
111
+ (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
112
+ utfword = utfw+;
113
+ utfname = (utfw | sdot)+;
114
+ quote1 = "'" [^']* "'";
115
+ quote2 = '"' [^"]* '"';
116
+
117
+ cssid = "#" %a utfname %b;
118
+ cssclass = "." %a utfname %b;
119
+ cssname = "[name=" %a utfname %b "]";
120
+ cssattr = "[" %a utfname %c space* [^ \n\t]? "=" %d space* (quote1 | quote2 | [^\]]+) "]";
121
+ csstag = utfname >a %b;
122
+ cssmod = ("even" | "odd" | (digit | "n" | "+" | "-")* );
123
+ csschild = ":" %a ("only" | "nth" | "last" | "first") "-child" %b ("(" %a cssmod %b ")")?;
124
+ csspos = ":" %a ("nth" | "eq" | "gt" | "lt" | "first" | "last" | "even" | "odd") %b ("(" %a digit+ %b ")")?;
125
+ pseudop = "(" [^)]+ ")";
126
+ pseudoq = "'" (pseudop+ | [^'()]*) "'" |
127
+ '"' (pseudop+ | [^"()]*) '"' |
128
+ (pseudop+ | [^"()]*);
129
+ pseudo = ":" %a utfname %b ("(" %a pseudoq %b ")")?;
130
+
131
+ main := |*
132
+ cssid => { FILTER("ID"); };
133
+ cssclass => { FILTER("CLASS"); };
134
+ cssname => { FILTER("NAME"); };
135
+ cssattr => { FILTER("ATTR"); };
136
+ csstag => { FILTER("TAG"); };
137
+ cssmod => { FILTER("MOD"); };
138
+ csschild => { FILTER("CHILD"); };
139
+ csspos => { FILTER("POS"); };
140
+ pseudo => { FILTER("PSUEDO"); };
141
+ commas => { focus = RubyArray.newArray(runtime, node); };
142
+ traverse => { FILTERAUTO(); };
143
+ space;
144
+ *|;
145
+
146
+ write data nofinal;
147
+ }%%
148
+
149
+ public IRubyObject scan() {
150
+ %% write init;
151
+ %% write exec;
152
+
153
+ return focus;
154
+ }
155
+ }
@@ -0,0 +1,120 @@
1
+ /*
2
+ * hpricot_css.rl
3
+ * ragel -C hpricot_css.rl -o hpricot_css.c
4
+ *
5
+ * Copyright (C) 2008 why the lucky stiff
6
+ */
7
+ #include <ruby.h>
8
+
9
+ #define FILTER(id) \
10
+ rb_funcall2(mod, rb_intern("" # id), fargs, fvals); \
11
+ rb_ary_clear(tmpt); \
12
+ fargs = 1
13
+ #define FILTERAUTO() \
14
+ char filt[10]; \
15
+ sprintf(filt, "%.*s", te - ts, ts); \
16
+ rb_funcall2(mod, rb_intern(filt), fargs, fvals); \
17
+ rb_ary_clear(tmpt); \
18
+ fargs = 1
19
+ #ifdef HAVE_RUBY_ENCODING_H
20
+ #define STRNEW(a, len) rb_external_str_new((a), (len))
21
+ #else
22
+ #define STRNEW(a, len) rb_str_new((a), (len))
23
+ #endif
24
+ #define PUSH(aps, ape) rb_ary_push(tmpt, fvals[fargs++] = STRNEW(aps, ape - aps))
25
+ #define P(id) printf(id ": %.*s\n", te - ts, ts);
26
+
27
+ %%{
28
+ machine hpricot_css;
29
+
30
+ action a {
31
+ aps = p;
32
+ }
33
+
34
+ action b {
35
+ ape = p;
36
+ PUSH(aps, ape);
37
+ }
38
+
39
+ action c {
40
+ ape = p;
41
+ aps2 = p;
42
+ }
43
+
44
+ action d {
45
+ ape2 = p;
46
+ PUSH(aps, ape);
47
+ PUSH(aps2, ape2);
48
+ }
49
+
50
+ commas = space* "," space*;
51
+ traverse = [>+~];
52
+ sdot = "\\.";
53
+ utfw = alnum | "_" | "-" |
54
+ (0xc4 0xa8..0xbf) | (0xc5..0xdf 0x80..0xbf) |
55
+ (0xe0..0xef 0x80..0xbf 0x80..0xbf) |
56
+ (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
57
+ utfword = utfw+;
58
+ utfname = (utfw | sdot)+;
59
+ quote1 = "'" [^']* "'";
60
+ quote2 = '"' [^"]* '"';
61
+
62
+ cssid = "#" %a utfname %b;
63
+ cssclass = "." %a utfname %b;
64
+ cssname = "[name=" %a utfname %b "]";
65
+ cssattr = "[" %a utfname %c space* [^ \n\t]? "=" %d space* (quote1 | quote2 | [^\]]+) "]";
66
+ csstag = utfname >a %b;
67
+ cssmod = ("even" | "odd" | (digit | "n" | "+" | "-")* );
68
+ csschild = ":" %a ("only" | "nth" | "last" | "first") "-child" %b ("(" %a cssmod %b ")")?;
69
+ csspos = ":" %a ("nth" | "eq" | "gt" | "lt" | "first" | "last" | "even" | "odd") %b ("(" %a digit+ %b ")")?;
70
+ pseudop = "(" [^)]+ ")";
71
+ pseudoq = "'" (pseudop+ | [^'()]*) "'" |
72
+ '"' (pseudop+ | [^"()]*) '"' |
73
+ (pseudop+ | [^"()]*);
74
+ pseudo = ":" %a utfname %b ("(" %a pseudoq %b ")")?;
75
+
76
+ main := |*
77
+ cssid => { FILTER(ID); };
78
+ cssclass => { FILTER(CLASS); };
79
+ cssname => { FILTER(NAME); };
80
+ cssattr => { FILTER(ATTR); };
81
+ csstag => { FILTER(TAG); };
82
+ cssmod => { FILTER(MOD); };
83
+ csschild => { FILTER(CHILD); };
84
+ csspos => { FILTER(POS); };
85
+ pseudo => { FILTER(PSUEDO); };
86
+ commas => { focus = rb_ary_new3(1, node); };
87
+ traverse => { FILTERAUTO(); };
88
+ space;
89
+ *|;
90
+
91
+ write data nofinal;
92
+ }%%
93
+
94
+ VALUE hpricot_css(VALUE self, VALUE mod, VALUE str, VALUE node)
95
+ {
96
+ int cs, act, eof;
97
+ char *p, *pe, *ts, *te, *aps, *ape, *aps2, *ape2;
98
+
99
+ int fargs = 1;
100
+ VALUE fvals[6];
101
+ VALUE focus = rb_ary_new3(1, node);
102
+ VALUE tmpt = rb_ary_new();
103
+ rb_gc_register_address(&focus);
104
+ rb_gc_register_address(&tmpt);
105
+ fvals[0] = focus;
106
+
107
+ if (TYPE(str) != T_STRING)
108
+ rb_raise(rb_eArgError, "bad CSS selector, String only please.");
109
+
110
+ StringValue(str);
111
+ p = RSTRING_PTR(str);
112
+ pe = p + RSTRING_LEN(str);
113
+
114
+ %% write init;
115
+ %% write exec;
116
+
117
+ rb_gc_unregister_address(&focus);
118
+ rb_gc_unregister_address(&tmpt);
119
+ return focus;
120
+ }