ox 1.9.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

@@ -0,0 +1,198 @@
1
+ /* sax_stack.h
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #ifndef __OX_SAX_BUF_H__
32
+ #define __OX_SAX_BUF_H__
33
+
34
+ typedef struct _Buf {
35
+ char base[0x00001000];
36
+ char *head;
37
+ char *end;
38
+ char *tail;
39
+ char *read_end; /* one past last character read */
40
+ char *pro; /* protection start, buffer can not slide past this point */
41
+ char *str; /* start of current string being read */
42
+ int line;
43
+ int col;
44
+ int pro_line;
45
+ int pro_col;
46
+ int (*read_func)(struct _Buf *buf);
47
+ union {
48
+ int fd;
49
+ VALUE io;
50
+ const char *in_str;
51
+ };
52
+ struct _SaxDrive *dr;
53
+ } *Buf;
54
+
55
+ typedef struct _CheckPt {
56
+ int pro_dif;
57
+ int line;
58
+ int col;
59
+ char c;
60
+ } *CheckPt;
61
+
62
+ #define CHECK_PT_INIT { -1, 0, 0, '\0' }
63
+
64
+ extern void ox_sax_buf_init(Buf buf, VALUE io);
65
+ extern int ox_sax_buf_read(Buf buf);
66
+
67
+ static inline char
68
+ buf_get(Buf buf) {
69
+ //printf("*** drive get from '%s' from start: %ld buf: %p from read_end: %ld\n", buf->tail, buf->tail - buf->head, buf->head, buf->read_end - buf->tail);
70
+ if (buf->read_end <= buf->tail) {
71
+ if (0 != ox_sax_buf_read(buf)) {
72
+ return '\0';
73
+ }
74
+ }
75
+ if ('\n' == *buf->tail) {
76
+ buf->line++;
77
+ buf->col = 0;
78
+ }
79
+ buf->col++;
80
+
81
+ return *buf->tail++;
82
+ }
83
+
84
+ static inline void
85
+ buf_backup(Buf buf) {
86
+ buf->tail--;
87
+ buf->col--;
88
+ if (0 >= buf->col) {
89
+ buf->line--;
90
+ // allow col to be negative since we never backup twice in a row
91
+ }
92
+ }
93
+
94
+ static inline void
95
+ buf_protect(Buf buf) {
96
+ buf->pro = buf->tail;
97
+ buf->str = buf->tail; // can't have str before pro
98
+ buf->pro_line = buf->line;
99
+ buf->pro_col = buf->col;
100
+ }
101
+
102
+ static inline void
103
+ buf_reset(Buf buf) {
104
+ buf->tail = buf->pro;
105
+ buf->line = buf->pro_line;
106
+ buf->col = buf->pro_col;
107
+ }
108
+
109
+ /* Starts by reading a character so it is safe to use with an empty or
110
+ * compacted buffer.
111
+ */
112
+ static inline char
113
+ buf_next_non_white(Buf buf) {
114
+ char c;
115
+
116
+ while ('\0' != (c = buf_get(buf))) {
117
+ switch(c) {
118
+ case ' ':
119
+ case '\t':
120
+ case '\f':
121
+ case '\n':
122
+ case '\r':
123
+ break;
124
+ default:
125
+ return c;
126
+ }
127
+ }
128
+ return '\0';
129
+ }
130
+
131
+ /* Starts by reading a character so it is safe to use with an empty or
132
+ * compacted buffer.
133
+ */
134
+ static inline char
135
+ buf_next_white(Buf buf) {
136
+ char c;
137
+
138
+ while ('\0' != (c = buf_get(buf))) {
139
+ switch(c) {
140
+ case ' ':
141
+ case '\t':
142
+ case '\f':
143
+ case '\n':
144
+ case '\r':
145
+ case '\0':
146
+ return c;
147
+ default:
148
+ break;
149
+ }
150
+ }
151
+ return '\0';
152
+ }
153
+
154
+ static inline void
155
+ buf_cleanup(Buf buf) {
156
+ if (buf->base != buf->head) {
157
+ xfree(buf->head);
158
+ }
159
+ }
160
+
161
+ static inline int
162
+ is_white(char c) {
163
+ switch(c) {
164
+ case ' ':
165
+ case '\t':
166
+ case '\f':
167
+ case '\n':
168
+ case '\r':
169
+ return 1;
170
+ default:
171
+ break;
172
+ }
173
+ return 0;
174
+ }
175
+
176
+ static inline void
177
+ buf_checkpoint(Buf buf, CheckPt cp) {
178
+ cp->pro_dif = (int)(buf->tail - buf->pro);
179
+ cp->line = buf->line;
180
+ cp->col = buf->col;
181
+ cp->c = *(buf->tail - 1);
182
+ }
183
+
184
+ static inline int
185
+ buf_checkset(CheckPt cp) {
186
+ return (0 <= cp->pro_dif);
187
+ }
188
+
189
+ static inline char
190
+ buf_checkback(Buf buf, CheckPt cp) {
191
+ buf->tail = buf->pro + cp->pro_dif;
192
+ buf->line = cp->line;
193
+ buf->col = cp->col;
194
+ return cp->c;
195
+ }
196
+
197
+
198
+ #endif /* __OX_SAX_BUF_H__ */
@@ -0,0 +1,85 @@
1
+ /* sax_has.h
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #ifndef __OX_SAX_HAS_H__
32
+ #define __OX_SAX_HAS_H__
33
+
34
+ typedef struct _Has {
35
+ int instruct;
36
+ int end_instruct;
37
+ int attr;
38
+ int attr_value;
39
+ int doctype;
40
+ int comment;
41
+ int cdata;
42
+ int text;
43
+ int value;
44
+ int start_element;
45
+ int end_element;
46
+ int error;
47
+ int line;
48
+ int column;
49
+ } *Has;
50
+
51
+ inline static int
52
+ respond_to(VALUE obj, ID method) {
53
+ #ifdef JRUBY_RUBY
54
+ /* There is a bug in JRuby where rb_respond_to() returns true (1) even if
55
+ * a method is private. */
56
+ {
57
+ VALUE args[1];
58
+
59
+ *args = ID2SYM(method);
60
+ return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args));
61
+ }
62
+ #else
63
+ return rb_respond_to(obj, method);
64
+ #endif
65
+ }
66
+
67
+ inline static void
68
+ has_init(Has has, VALUE handler) {
69
+ has->instruct = respond_to(handler, ox_instruct_id);
70
+ has->end_instruct = respond_to(handler, ox_end_instruct_id);
71
+ has->attr = respond_to(handler, ox_attr_id);
72
+ has->attr_value = respond_to(handler, ox_attr_value_id);
73
+ has->doctype = respond_to(handler, ox_doctype_id);
74
+ has->comment = respond_to(handler, ox_comment_id);
75
+ has->cdata = respond_to(handler, ox_cdata_id);
76
+ has->text = respond_to(handler, ox_text_id);
77
+ has->value = respond_to(handler, ox_value_id);
78
+ has->start_element = respond_to(handler, ox_start_element_id);
79
+ has->end_element = respond_to(handler, ox_end_element_id);
80
+ has->error = respond_to(handler, ox_error_id);
81
+ has->line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id));
82
+ has->column = (Qtrue == rb_ivar_defined(handler, ox_at_column_id));
83
+ }
84
+
85
+ #endif /* __OX_SAX_HAS_H__ */
@@ -0,0 +1,217 @@
1
+ /* hint.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #include <string.h>
32
+ #include <stdio.h>
33
+
34
+ #include "sax_hint.h"
35
+
36
+ static const char *audio_video_0[] = { "audio", "video", 0 };
37
+ static const char *colgroup_0[] = { "colgroup", 0 };
38
+ static const char *details_0[] = { "details", 0 };
39
+ static const char *dl_0[] = { "dl", 0 };
40
+ static const char *dt_th_0[] = { "dt", "th", 0 };
41
+ static const char *fieldset_0[] = { "fieldset", 0 };
42
+ static const char *figure_0[] = { "figure", 0 };
43
+ static const char *frameset_0[] = { "frameset", 0 };
44
+ static const char *head_0[] = { "head", 0 };
45
+ static const char *html_0[] = { "html", 0 };
46
+ static const char *map_0[] = { "map", 0 };
47
+ static const char *ol_ul_menu_0[] = { "ol", "ul", "menu", 0 };
48
+ static const char *optgroup_select_datalist_0[] = { "optgroup", "select", "datalist", 0 };
49
+ static const char *ruby_0[] = { "ruby", 0 };
50
+ static const char *table_0[] = { "table", 0 };
51
+ static const char *tr_0[] = { "tr", 0 };
52
+
53
+ static struct _Hint html_hint_array[] = {
54
+ { "a", 0, 0, 0 },
55
+ { "abbr", 0, 0, 0 },
56
+ { "acronym", 0, 0, 0 },
57
+ { "address", 0, 0, 0 },
58
+ { "applet", 0, 0, 0 },
59
+ { "area", 1, 0, map_0 },
60
+ { "article", 0, 0, 0 },
61
+ { "aside", 0, 0, 0 },
62
+ { "audio", 0, 0, 0 },
63
+ { "b", 0, 0, 0 },
64
+ { "base", 1, 0, head_0 },
65
+ { "basefont", 1, 0, head_0 },
66
+ { "bdi", 0, 0, 0 },
67
+ { "bdo", 0, 1, 0 },
68
+ { "big", 0, 0, 0 },
69
+ { "blockquote", 0, 0, 0 },
70
+ { "body", 0, 0, html_0 },
71
+ { "br", 1, 0, 0 },
72
+ { "button", 0, 0, 0 },
73
+ { "canvas", 0, 0, 0 },
74
+ { "caption", 0, 0, table_0 },
75
+ { "center", 0, 0, 0 },
76
+ { "cite", 0, 0, 0 },
77
+ { "code", 0, 0, 0 },
78
+ { "col", 1, 0, colgroup_0 },
79
+ { "colgroup", 0, 0, 0 },
80
+ { "command", 1, 0, 0 },
81
+ { "datalist", 0, 0, 0 },
82
+ { "dd", 0, 0, dl_0 },
83
+ { "del", 0, 0, 0 },
84
+ { "details", 0, 0, 0 },
85
+ { "dfn", 0, 0, 0 },
86
+ { "dialog", 0, 0, dt_th_0 },
87
+ { "dir", 0, 0, 0 },
88
+ { "div", 0, 1, 0 },
89
+ { "dl", 0, 0, 0 },
90
+ { "dt", 0, 1, dl_0 },
91
+ { "em", 0, 0, 0 },
92
+ { "embed", 1, 0, 0 },
93
+ { "fieldset", 0, 0, 0 },
94
+ { "figcaption", 0, 0, figure_0 },
95
+ { "figure", 0, 0, 0 },
96
+ { "font", 0, 1, 0 },
97
+ { "footer", 0, 0, 0 },
98
+ { "form", 0, 0, 0 },
99
+ { "frame", 1, 0, frameset_0 },
100
+ { "frameset", 0, 0, 0 },
101
+ { "h1", 0, 0, 0 },
102
+ { "h2", 0, 0, 0 },
103
+ { "h3", 0, 0, 0 },
104
+ { "h4", 0, 0, 0 },
105
+ { "h5", 0, 0, 0 },
106
+ { "h6", 0, 0, 0 },
107
+ { "head", 0, 0, html_0 },
108
+ { "header", 0, 0, 0 },
109
+ { "hgroup", 0, 0, 0 },
110
+ { "hr", 1, 0, 0 },
111
+ { "html", 0, 0, 0 },
112
+ { "i", 0, 0, 0 },
113
+ { "iframe", 1, 0, 0 },
114
+ { "img", 1, 0, 0 },
115
+ { "input", 1, 0, 0 }, // somewhere under a form_0
116
+ { "ins", 0, 0, 0 },
117
+ { "kbd", 0, 0, 0 },
118
+ { "keygen", 1, 0, 0 },
119
+ { "label", 0, 0, 0 }, // somewhere under a form_0
120
+ { "legend", 0, 0, fieldset_0 },
121
+ { "li", 0, 0, ol_ul_menu_0 },
122
+ { "link", 1, 0, head_0 },
123
+ { "map", 0, 0, 0 },
124
+ { "mark", 0, 0, 0 },
125
+ { "menu", 0, 0, 0 },
126
+ { "meta", 1, 0, head_0 },
127
+ { "meter", 0, 0, 0 },
128
+ { "nav", 0, 0, 0 },
129
+ { "noframes", 0, 0, 0 },
130
+ { "noscript", 0, 0, 0 },
131
+ { "object", 0, 0, 0 },
132
+ { "ol", 0, 1, 0 },
133
+ { "optgroup", 0, 0, 0 },
134
+ { "option", 0, 0, optgroup_select_datalist_0 },
135
+ { "output", 0, 0, 0 },
136
+ { "p", 0, 0, 0 },
137
+ { "param", 1, 0, 0 },
138
+ { "pre", 0, 0, 0 },
139
+ { "progress", 0, 0, 0 },
140
+ { "q", 0, 0, 0 },
141
+ { "rp", 0, 0, ruby_0 },
142
+ { "rt", 0, 0, ruby_0 },
143
+ { "ruby", 0, 0, 0 },
144
+ { "s", 0, 0, 0 },
145
+ { "samp", 0, 0, 0 },
146
+ { "script", 0, 0, 0 },
147
+ { "section", 0, 1, 0 },
148
+ { "select", 0, 0, 0 },
149
+ { "small", 0, 0, 0 },
150
+ { "source", 0, 0, audio_video_0 },
151
+ { "span", 0, 1, 0 },
152
+ { "strike", 0, 0, 0 },
153
+ { "strong", 0, 0, 0 },
154
+ { "style", 0, 0, 0 },
155
+ { "sub", 0, 0, 0 },
156
+ { "summary", 0, 0, details_0 },
157
+ { "sup", 0, 0, 0 },
158
+ { "table", 0, 0, 0 },
159
+ { "tbody", 0, 0, table_0 },
160
+ { "td", 0, 0, tr_0 },
161
+ { "textarea", 0, 0, 0 },
162
+ { "tfoot", 0, 0, table_0 },
163
+ { "th", 0, 0, tr_0 },
164
+ { "thead", 0, 0, table_0 },
165
+ { "time", 0, 0, 0 },
166
+ { "title", 0, 0, head_0 },
167
+ { "tr", 0, 0, table_0 },
168
+ { "track", 1, 0, audio_video_0 },
169
+ { "tt", 0, 0, 0 },
170
+ { "u", 0, 0, 0 },
171
+ { "ul", 0, 0, 0 },
172
+ { "var", 0, 0, 0 },
173
+ { "video", 0, 0, 0 },
174
+ { "wbr", 1, 0, 0 },
175
+ };
176
+ static struct _Hints html_hints = {
177
+ "HTML",
178
+ html_hint_array,
179
+ sizeof(html_hint_array) / sizeof(*html_hint_array)
180
+ };
181
+
182
+ Hints
183
+ ox_hints_html() {
184
+ return &html_hints;
185
+ }
186
+
187
+ Hint
188
+ ox_hint_find(Hints hints, const char *name) {
189
+ if (0 != hints) {
190
+ Hint lo = hints->hints;
191
+ Hint hi = hints->hints + hints->size - 1;
192
+ Hint mid;
193
+ int res;
194
+
195
+ if (0 == (res = strcasecmp(name, lo->name))) {
196
+ return lo;
197
+ } else if (0 > res) {
198
+ return 0;
199
+ }
200
+ if (0 == (res = strcasecmp(name, hi->name))) {
201
+ return hi;
202
+ } else if (0 < res) {
203
+ return 0;
204
+ }
205
+ while (1 < hi - lo) {
206
+ mid = lo + (hi - lo) / 2;
207
+ if (0 == (res = strcasecmp(name, mid->name))) {
208
+ return mid;
209
+ } else if (0 < res) {
210
+ lo = mid;
211
+ } else {
212
+ hi = mid;
213
+ }
214
+ }
215
+ }
216
+ return 0;
217
+ }