tidy-ext 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/.gitignore +4 -0
  2. data/LICENSE +50 -0
  3. data/README +12 -0
  4. data/Rakefile +60 -0
  5. data/VERSION +1 -0
  6. data/ext/tidy/access.c +3310 -0
  7. data/ext/tidy/access.h +279 -0
  8. data/ext/tidy/alloc.c +107 -0
  9. data/ext/tidy/attrask.c +209 -0
  10. data/ext/tidy/attrdict.c +2398 -0
  11. data/ext/tidy/attrdict.h +122 -0
  12. data/ext/tidy/attrget.c +213 -0
  13. data/ext/tidy/attrs.c +1911 -0
  14. data/ext/tidy/attrs.h +374 -0
  15. data/ext/tidy/buffio.c +232 -0
  16. data/ext/tidy/buffio.h +118 -0
  17. data/ext/tidy/charsets.c +1032 -0
  18. data/ext/tidy/charsets.h +14 -0
  19. data/ext/tidy/clean.c +2674 -0
  20. data/ext/tidy/clean.h +87 -0
  21. data/ext/tidy/config.c +1746 -0
  22. data/ext/tidy/config.h +153 -0
  23. data/ext/tidy/entities.c +419 -0
  24. data/ext/tidy/entities.h +24 -0
  25. data/ext/tidy/extconf.rb +5 -0
  26. data/ext/tidy/fileio.c +106 -0
  27. data/ext/tidy/fileio.h +46 -0
  28. data/ext/tidy/forward.h +69 -0
  29. data/ext/tidy/iconvtc.c +105 -0
  30. data/ext/tidy/iconvtc.h +15 -0
  31. data/ext/tidy/istack.c +373 -0
  32. data/ext/tidy/lexer.c +3825 -0
  33. data/ext/tidy/lexer.h +617 -0
  34. data/ext/tidy/localize.c +1882 -0
  35. data/ext/tidy/mappedio.c +329 -0
  36. data/ext/tidy/mappedio.h +16 -0
  37. data/ext/tidy/message.h +207 -0
  38. data/ext/tidy/parser.c +4408 -0
  39. data/ext/tidy/parser.h +76 -0
  40. data/ext/tidy/platform.h +636 -0
  41. data/ext/tidy/pprint.c +2276 -0
  42. data/ext/tidy/pprint.h +93 -0
  43. data/ext/tidy/ruby-tidy.c +195 -0
  44. data/ext/tidy/streamio.c +1407 -0
  45. data/ext/tidy/streamio.h +222 -0
  46. data/ext/tidy/tagask.c +286 -0
  47. data/ext/tidy/tags.c +955 -0
  48. data/ext/tidy/tags.h +235 -0
  49. data/ext/tidy/tidy-int.h +129 -0
  50. data/ext/tidy/tidy.h +1097 -0
  51. data/ext/tidy/tidyenum.h +622 -0
  52. data/ext/tidy/tidylib.c +1751 -0
  53. data/ext/tidy/tmbstr.c +306 -0
  54. data/ext/tidy/tmbstr.h +92 -0
  55. data/ext/tidy/utf8.c +539 -0
  56. data/ext/tidy/utf8.h +52 -0
  57. data/ext/tidy/version.h +14 -0
  58. data/ext/tidy/win32tc.c +795 -0
  59. data/ext/tidy/win32tc.h +19 -0
  60. data/spec/spec_helper.rb +5 -0
  61. data/spec/tidy/compat_spec.rb +44 -0
  62. data/spec/tidy/remote_uri_spec.rb +14 -0
  63. data/spec/tidy/test1.html +5 -0
  64. data/spec/tidy/tidy_spec.rb +34 -0
  65. metadata +125 -0
data/ext/tidy/tmbstr.c ADDED
@@ -0,0 +1,306 @@
1
+ /* tmbstr.c -- Tidy string utility functions
2
+
3
+ (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
4
+ See tidy.h for the copyright notice.
5
+
6
+ CVS Info :
7
+
8
+ $Author: arnaud02 $
9
+ $Date: 2006/12/29 16:31:08 $
10
+ $Revision: 1.13 $
11
+
12
+ */
13
+
14
+ #include "forward.h"
15
+ #include "tmbstr.h"
16
+ #include "lexer.h"
17
+
18
+ /* like strdup but using an allocator */
19
+ tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str )
20
+ {
21
+ tmbstr s = NULL;
22
+ if ( str )
23
+ {
24
+ uint len = TY_(tmbstrlen)( str );
25
+ tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
26
+ while ( 0 != (*cp++ = *str++) )
27
+ /**/;
28
+ }
29
+ return s;
30
+ }
31
+
32
+ /* like strndup but using an allocator */
33
+ tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len )
34
+ {
35
+ tmbstr s = NULL;
36
+ if ( str && len > 0 )
37
+ {
38
+ tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
39
+ while ( len-- > 0 && (*cp++ = *str++) )
40
+ /**/;
41
+ *cp = 0;
42
+ }
43
+ return s;
44
+ }
45
+
46
+ /* exactly same as strncpy */
47
+ uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size )
48
+ {
49
+ if ( s1 != NULL && s2 != NULL )
50
+ {
51
+ tmbstr cp = s1;
52
+ while ( *s2 && --size ) /* Predecrement: reserve byte */
53
+ *cp++ = *s2++; /* for NULL terminator. */
54
+ *cp = 0;
55
+ }
56
+ return size;
57
+ }
58
+
59
+ /* Allows expressions like: cp += tmbstrcpy( cp, "joebob" );
60
+ */
61
+ uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 )
62
+ {
63
+ uint ncpy = 0;
64
+ while (0 != (*s1++ = *s2++) )
65
+ ++ncpy;
66
+ return ncpy;
67
+ }
68
+
69
+ /* Allows expressions like: cp += tmbstrcat( cp, "joebob" );
70
+ */
71
+ uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 )
72
+ {
73
+ uint ncpy = 0;
74
+ while ( *s1 )
75
+ ++s1;
76
+
77
+ while (0 != (*s1++ = *s2++) )
78
+ ++ncpy;
79
+ return ncpy;
80
+ }
81
+
82
+ /* exactly same as strcmp */
83
+ int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 )
84
+ {
85
+ int c;
86
+ while ((c = *s1) == *s2)
87
+ {
88
+ if (c == '\0')
89
+ return 0;
90
+
91
+ ++s1;
92
+ ++s2;
93
+ }
94
+
95
+ return (*s1 > *s2 ? 1 : -1);
96
+ }
97
+
98
+ /* returns byte count, not char count */
99
+ uint TY_(tmbstrlen)( ctmbstr str )
100
+ {
101
+ uint len = 0;
102
+ if ( str )
103
+ {
104
+ while ( *str++ )
105
+ ++len;
106
+ }
107
+ return len;
108
+ }
109
+
110
+ /*
111
+ MS C 4.2 doesn't include strcasecmp.
112
+ Note that tolower and toupper won't
113
+ work on chars > 127.
114
+
115
+ Neither does ToLower()!
116
+ */
117
+ int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 )
118
+ {
119
+ uint c;
120
+
121
+ while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
122
+ {
123
+ if (c == '\0')
124
+ return 0;
125
+
126
+ ++s1;
127
+ ++s2;
128
+ }
129
+
130
+ return (*s1 > *s2 ? 1 : -1);
131
+ }
132
+
133
+ int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n )
134
+ {
135
+ uint c;
136
+
137
+ while ((c = (byte)*s1) == (byte)*s2)
138
+ {
139
+ if (c == '\0')
140
+ return 0;
141
+
142
+ if (n == 0)
143
+ return 0;
144
+
145
+ ++s1;
146
+ ++s2;
147
+ --n;
148
+ }
149
+
150
+ if (n == 0)
151
+ return 0;
152
+
153
+ return (*s1 > *s2 ? 1 : -1);
154
+ }
155
+
156
+ int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n )
157
+ {
158
+ uint c;
159
+
160
+ while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
161
+ {
162
+ if (c == '\0')
163
+ return 0;
164
+
165
+ if (n == 0)
166
+ return 0;
167
+
168
+ ++s1;
169
+ ++s2;
170
+ --n;
171
+ }
172
+
173
+ if (n == 0)
174
+ return 0;
175
+
176
+ return (*s1 > *s2 ? 1 : -1);
177
+ }
178
+
179
+ #if 0
180
+ /* return offset of cc from beginning of s1,
181
+ ** -1 if not found.
182
+ */
183
+ int TY_(tmbstrnchr)( ctmbstr s1, uint maxlen, tmbchar cc )
184
+ {
185
+ int i;
186
+ ctmbstr cp = s1;
187
+
188
+ for ( i = 0; (uint)i < maxlen; ++i, ++cp )
189
+ {
190
+ if ( *cp == cc )
191
+ return i;
192
+ }
193
+
194
+ return -1;
195
+ }
196
+ #endif
197
+
198
+ ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 )
199
+ {
200
+ uint len2 = TY_(tmbstrlen)(s2);
201
+ int ix, diff = len1 - len2;
202
+
203
+ for ( ix = 0; ix <= diff; ++ix )
204
+ {
205
+ if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 )
206
+ return (ctmbstr) s1+ix;
207
+ }
208
+ return NULL;
209
+ }
210
+
211
+ #if 0
212
+ ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 )
213
+ {
214
+ uint len2 = TY_(tmbstrlen)(s2);
215
+ int ix, diff = len1 - len2;
216
+
217
+ for ( ix = 0; ix <= diff; ++ix )
218
+ {
219
+ if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
220
+ return (ctmbstr) s1+ix;
221
+ }
222
+ return NULL;
223
+ }
224
+ #endif
225
+
226
+ ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 )
227
+ {
228
+ uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2);
229
+ int ix, diff = len1 - len2;
230
+
231
+ for ( ix = 0; ix <= diff; ++ix )
232
+ {
233
+ if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
234
+ return (ctmbstr) s1+ix;
235
+ }
236
+ return NULL;
237
+ }
238
+
239
+ /* Transform ASCII chars in string to lower case */
240
+ tmbstr TY_(tmbstrtolower)( tmbstr s )
241
+ {
242
+ tmbstr cp;
243
+ for ( cp=s; *cp; ++cp )
244
+ *cp = (tmbchar) TY_(ToLower)( *cp );
245
+ return s;
246
+ }
247
+
248
+ /* Transform ASCII chars in string to upper case */
249
+ tmbstr TY_(tmbstrtoupper)(tmbstr s)
250
+ {
251
+ tmbstr cp;
252
+
253
+ for (cp = s; *cp; ++cp)
254
+ *cp = (tmbchar)TY_(ToUpper)(*cp);
255
+
256
+ return s;
257
+ }
258
+
259
+ #if 0
260
+ Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 )
261
+ {
262
+ #if FILENAMES_CASE_SENSITIVE
263
+ return ( TY_(tmbstrcmp)( filename1, filename2 ) == 0 );
264
+ #else
265
+ return ( TY_(tmbstrcasecmp)( filename1, filename2 ) == 0 );
266
+ #endif
267
+ }
268
+ #endif
269
+
270
+ int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
271
+ {
272
+ int retval;
273
+ #if HAS_VSNPRINTF
274
+ retval = vsnprintf(buffer, count - 1, format, args);
275
+ /* todo: conditionally null-terminate the string? */
276
+ buffer[count - 1] = 0;
277
+ #else
278
+ retval = vsprintf(buffer, format, args);
279
+ #endif /* HAS_VSNPRINTF */
280
+ return retval;
281
+ }
282
+
283
+ int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
284
+ {
285
+ int retval;
286
+ va_list args;
287
+ va_start(args, format);
288
+ #if HAS_VSNPRINTF
289
+ retval = vsnprintf(buffer, count - 1, format, args);
290
+ /* todo: conditionally null-terminate the string? */
291
+ buffer[count - 1] = 0;
292
+ #else
293
+ retval = vsprintf(buffer, format, args);
294
+ #endif /* HAS_VSNPRINTF */
295
+ va_end(args);
296
+ return retval;
297
+ }
298
+
299
+ /*
300
+ * local variables:
301
+ * mode: c
302
+ * indent-tabs-mode: nil
303
+ * c-basic-offset: 4
304
+ * eval: (c-set-offset 'substatement-open 0)
305
+ * end:
306
+ */
data/ext/tidy/tmbstr.h ADDED
@@ -0,0 +1,92 @@
1
+ #ifndef __TMBSTR_H__
2
+ #define __TMBSTR_H__
3
+
4
+ /* tmbstr.h - Tidy string utility functions
5
+
6
+ (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
7
+ See tidy.h for the copyright notice.
8
+
9
+ CVS Info :
10
+
11
+ $Author: arnaud02 $
12
+ $Date: 2006/12/29 16:31:09 $
13
+ $Revision: 1.11 $
14
+
15
+ */
16
+
17
+ #include "platform.h"
18
+
19
+ #ifdef __cplusplus
20
+ extern "C"
21
+ {
22
+ #endif
23
+
24
+ /* like strdup but using an allocator */
25
+ tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str );
26
+
27
+ /* like strndup but using an allocator */
28
+ tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len);
29
+
30
+ /* exactly same as strncpy */
31
+ uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size );
32
+
33
+ uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 );
34
+
35
+ uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 );
36
+
37
+ /* exactly same as strcmp */
38
+ int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 );
39
+
40
+ /* returns byte count, not char count */
41
+ uint TY_(tmbstrlen)( ctmbstr str );
42
+
43
+ /*
44
+ MS C 4.2 doesn't include strcasecmp.
45
+ Note that tolower and toupper won't
46
+ work on chars > 127.
47
+
48
+ Neither do Lexer.ToLower() or Lexer.ToUpper()!
49
+
50
+ We get away with this because, except for XML tags,
51
+ we are always comparing to ascii element and
52
+ attribute names defined by HTML specs.
53
+ */
54
+ int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 );
55
+
56
+ int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n );
57
+
58
+ int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n );
59
+
60
+ /* return offset of cc from beginning of s1,
61
+ ** -1 if not found.
62
+ */
63
+ /* int TY_(tmbstrnchr)( ctmbstr s1, uint len1, tmbchar cc ); */
64
+
65
+ ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 );
66
+ /* ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ); */
67
+ ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 );
68
+
69
+ /* transform string to lower case */
70
+ tmbstr TY_(tmbstrtolower)( tmbstr s );
71
+
72
+ /* Transform ASCII chars in string to upper case */
73
+ tmbstr TY_(tmbstrtoupper)( tmbstr s );
74
+
75
+ /* Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */
76
+
77
+ int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
78
+ #ifdef __GNUC__
79
+ __attribute__((format(printf, 3, 0)))
80
+ #endif
81
+ ;
82
+ int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
83
+ #ifdef __GNUC__
84
+ __attribute__((format(printf, 3, 4)))
85
+ #endif
86
+ ;
87
+
88
+ #ifdef __cplusplus
89
+ } /* extern "C" */
90
+ #endif
91
+
92
+ #endif /* __TMBSTR_H__ */