tidy-ext 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/.gitignore +4 -0
  2. data/LICENSE +50 -0
  3. data/README +12 -0
  4. data/Rakefile +60 -0
  5. data/VERSION +1 -0
  6. data/ext/tidy/access.c +3310 -0
  7. data/ext/tidy/access.h +279 -0
  8. data/ext/tidy/alloc.c +107 -0
  9. data/ext/tidy/attrask.c +209 -0
  10. data/ext/tidy/attrdict.c +2398 -0
  11. data/ext/tidy/attrdict.h +122 -0
  12. data/ext/tidy/attrget.c +213 -0
  13. data/ext/tidy/attrs.c +1911 -0
  14. data/ext/tidy/attrs.h +374 -0
  15. data/ext/tidy/buffio.c +232 -0
  16. data/ext/tidy/buffio.h +118 -0
  17. data/ext/tidy/charsets.c +1032 -0
  18. data/ext/tidy/charsets.h +14 -0
  19. data/ext/tidy/clean.c +2674 -0
  20. data/ext/tidy/clean.h +87 -0
  21. data/ext/tidy/config.c +1746 -0
  22. data/ext/tidy/config.h +153 -0
  23. data/ext/tidy/entities.c +419 -0
  24. data/ext/tidy/entities.h +24 -0
  25. data/ext/tidy/extconf.rb +5 -0
  26. data/ext/tidy/fileio.c +106 -0
  27. data/ext/tidy/fileio.h +46 -0
  28. data/ext/tidy/forward.h +69 -0
  29. data/ext/tidy/iconvtc.c +105 -0
  30. data/ext/tidy/iconvtc.h +15 -0
  31. data/ext/tidy/istack.c +373 -0
  32. data/ext/tidy/lexer.c +3825 -0
  33. data/ext/tidy/lexer.h +617 -0
  34. data/ext/tidy/localize.c +1882 -0
  35. data/ext/tidy/mappedio.c +329 -0
  36. data/ext/tidy/mappedio.h +16 -0
  37. data/ext/tidy/message.h +207 -0
  38. data/ext/tidy/parser.c +4408 -0
  39. data/ext/tidy/parser.h +76 -0
  40. data/ext/tidy/platform.h +636 -0
  41. data/ext/tidy/pprint.c +2276 -0
  42. data/ext/tidy/pprint.h +93 -0
  43. data/ext/tidy/ruby-tidy.c +195 -0
  44. data/ext/tidy/streamio.c +1407 -0
  45. data/ext/tidy/streamio.h +222 -0
  46. data/ext/tidy/tagask.c +286 -0
  47. data/ext/tidy/tags.c +955 -0
  48. data/ext/tidy/tags.h +235 -0
  49. data/ext/tidy/tidy-int.h +129 -0
  50. data/ext/tidy/tidy.h +1097 -0
  51. data/ext/tidy/tidyenum.h +622 -0
  52. data/ext/tidy/tidylib.c +1751 -0
  53. data/ext/tidy/tmbstr.c +306 -0
  54. data/ext/tidy/tmbstr.h +92 -0
  55. data/ext/tidy/utf8.c +539 -0
  56. data/ext/tidy/utf8.h +52 -0
  57. data/ext/tidy/version.h +14 -0
  58. data/ext/tidy/win32tc.c +795 -0
  59. data/ext/tidy/win32tc.h +19 -0
  60. data/spec/spec_helper.rb +5 -0
  61. data/spec/tidy/compat_spec.rb +44 -0
  62. data/spec/tidy/remote_uri_spec.rb +14 -0
  63. data/spec/tidy/test1.html +5 -0
  64. data/spec/tidy/tidy_spec.rb +34 -0
  65. metadata +125 -0
data/ext/tidy/tmbstr.c ADDED
@@ -0,0 +1,306 @@
1
+ /* tmbstr.c -- Tidy string utility functions
2
+
3
+ (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
4
+ See tidy.h for the copyright notice.
5
+
6
+ CVS Info :
7
+
8
+ $Author: arnaud02 $
9
+ $Date: 2006/12/29 16:31:08 $
10
+ $Revision: 1.13 $
11
+
12
+ */
13
+
14
+ #include "forward.h"
15
+ #include "tmbstr.h"
16
+ #include "lexer.h"
17
+
18
+ /* like strdup but using an allocator */
19
+ tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str )
20
+ {
21
+ tmbstr s = NULL;
22
+ if ( str )
23
+ {
24
+ uint len = TY_(tmbstrlen)( str );
25
+ tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
26
+ while ( 0 != (*cp++ = *str++) )
27
+ /**/;
28
+ }
29
+ return s;
30
+ }
31
+
32
+ /* like strndup but using an allocator */
33
+ tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len )
34
+ {
35
+ tmbstr s = NULL;
36
+ if ( str && len > 0 )
37
+ {
38
+ tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
39
+ while ( len-- > 0 && (*cp++ = *str++) )
40
+ /**/;
41
+ *cp = 0;
42
+ }
43
+ return s;
44
+ }
45
+
46
+ /* exactly same as strncpy */
47
+ uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size )
48
+ {
49
+ if ( s1 != NULL && s2 != NULL )
50
+ {
51
+ tmbstr cp = s1;
52
+ while ( *s2 && --size ) /* Predecrement: reserve byte */
53
+ *cp++ = *s2++; /* for NULL terminator. */
54
+ *cp = 0;
55
+ }
56
+ return size;
57
+ }
58
+
59
+ /* Allows expressions like: cp += tmbstrcpy( cp, "joebob" );
60
+ */
61
+ uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 )
62
+ {
63
+ uint ncpy = 0;
64
+ while (0 != (*s1++ = *s2++) )
65
+ ++ncpy;
66
+ return ncpy;
67
+ }
68
+
69
+ /* Allows expressions like: cp += tmbstrcat( cp, "joebob" );
70
+ */
71
+ uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 )
72
+ {
73
+ uint ncpy = 0;
74
+ while ( *s1 )
75
+ ++s1;
76
+
77
+ while (0 != (*s1++ = *s2++) )
78
+ ++ncpy;
79
+ return ncpy;
80
+ }
81
+
82
+ /* exactly same as strcmp */
83
+ int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 )
84
+ {
85
+ int c;
86
+ while ((c = *s1) == *s2)
87
+ {
88
+ if (c == '\0')
89
+ return 0;
90
+
91
+ ++s1;
92
+ ++s2;
93
+ }
94
+
95
+ return (*s1 > *s2 ? 1 : -1);
96
+ }
97
+
98
+ /* returns byte count, not char count */
99
+ uint TY_(tmbstrlen)( ctmbstr str )
100
+ {
101
+ uint len = 0;
102
+ if ( str )
103
+ {
104
+ while ( *str++ )
105
+ ++len;
106
+ }
107
+ return len;
108
+ }
109
+
110
+ /*
111
+ MS C 4.2 doesn't include strcasecmp.
112
+ Note that tolower and toupper won't
113
+ work on chars > 127.
114
+
115
+ Neither does ToLower()!
116
+ */
117
+ int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 )
118
+ {
119
+ uint c;
120
+
121
+ while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
122
+ {
123
+ if (c == '\0')
124
+ return 0;
125
+
126
+ ++s1;
127
+ ++s2;
128
+ }
129
+
130
+ return (*s1 > *s2 ? 1 : -1);
131
+ }
132
+
133
+ int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n )
134
+ {
135
+ uint c;
136
+
137
+ while ((c = (byte)*s1) == (byte)*s2)
138
+ {
139
+ if (c == '\0')
140
+ return 0;
141
+
142
+ if (n == 0)
143
+ return 0;
144
+
145
+ ++s1;
146
+ ++s2;
147
+ --n;
148
+ }
149
+
150
+ if (n == 0)
151
+ return 0;
152
+
153
+ return (*s1 > *s2 ? 1 : -1);
154
+ }
155
+
156
+ int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n )
157
+ {
158
+ uint c;
159
+
160
+ while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
161
+ {
162
+ if (c == '\0')
163
+ return 0;
164
+
165
+ if (n == 0)
166
+ return 0;
167
+
168
+ ++s1;
169
+ ++s2;
170
+ --n;
171
+ }
172
+
173
+ if (n == 0)
174
+ return 0;
175
+
176
+ return (*s1 > *s2 ? 1 : -1);
177
+ }
178
+
179
+ #if 0
180
+ /* return offset of cc from beginning of s1,
181
+ ** -1 if not found.
182
+ */
183
+ int TY_(tmbstrnchr)( ctmbstr s1, uint maxlen, tmbchar cc )
184
+ {
185
+ int i;
186
+ ctmbstr cp = s1;
187
+
188
+ for ( i = 0; (uint)i < maxlen; ++i, ++cp )
189
+ {
190
+ if ( *cp == cc )
191
+ return i;
192
+ }
193
+
194
+ return -1;
195
+ }
196
+ #endif
197
+
198
+ ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 )
199
+ {
200
+ uint len2 = TY_(tmbstrlen)(s2);
201
+ int ix, diff = len1 - len2;
202
+
203
+ for ( ix = 0; ix <= diff; ++ix )
204
+ {
205
+ if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 )
206
+ return (ctmbstr) s1+ix;
207
+ }
208
+ return NULL;
209
+ }
210
+
211
+ #if 0
212
+ ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 )
213
+ {
214
+ uint len2 = TY_(tmbstrlen)(s2);
215
+ int ix, diff = len1 - len2;
216
+
217
+ for ( ix = 0; ix <= diff; ++ix )
218
+ {
219
+ if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
220
+ return (ctmbstr) s1+ix;
221
+ }
222
+ return NULL;
223
+ }
224
+ #endif
225
+
226
+ ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 )
227
+ {
228
+ uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2);
229
+ int ix, diff = len1 - len2;
230
+
231
+ for ( ix = 0; ix <= diff; ++ix )
232
+ {
233
+ if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
234
+ return (ctmbstr) s1+ix;
235
+ }
236
+ return NULL;
237
+ }
238
+
239
+ /* Transform ASCII chars in string to lower case */
240
+ tmbstr TY_(tmbstrtolower)( tmbstr s )
241
+ {
242
+ tmbstr cp;
243
+ for ( cp=s; *cp; ++cp )
244
+ *cp = (tmbchar) TY_(ToLower)( *cp );
245
+ return s;
246
+ }
247
+
248
+ /* Transform ASCII chars in string to upper case */
249
+ tmbstr TY_(tmbstrtoupper)(tmbstr s)
250
+ {
251
+ tmbstr cp;
252
+
253
+ for (cp = s; *cp; ++cp)
254
+ *cp = (tmbchar)TY_(ToUpper)(*cp);
255
+
256
+ return s;
257
+ }
258
+
259
+ #if 0
260
+ Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 )
261
+ {
262
+ #if FILENAMES_CASE_SENSITIVE
263
+ return ( TY_(tmbstrcmp)( filename1, filename2 ) == 0 );
264
+ #else
265
+ return ( TY_(tmbstrcasecmp)( filename1, filename2 ) == 0 );
266
+ #endif
267
+ }
268
+ #endif
269
+
270
+ int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
271
+ {
272
+ int retval;
273
+ #if HAS_VSNPRINTF
274
+ retval = vsnprintf(buffer, count - 1, format, args);
275
+ /* todo: conditionally null-terminate the string? */
276
+ buffer[count - 1] = 0;
277
+ #else
278
+ retval = vsprintf(buffer, format, args);
279
+ #endif /* HAS_VSNPRINTF */
280
+ return retval;
281
+ }
282
+
283
+ int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
284
+ {
285
+ int retval;
286
+ va_list args;
287
+ va_start(args, format);
288
+ #if HAS_VSNPRINTF
289
+ retval = vsnprintf(buffer, count - 1, format, args);
290
+ /* todo: conditionally null-terminate the string? */
291
+ buffer[count - 1] = 0;
292
+ #else
293
+ retval = vsprintf(buffer, format, args);
294
+ #endif /* HAS_VSNPRINTF */
295
+ va_end(args);
296
+ return retval;
297
+ }
298
+
299
+ /*
300
+ * local variables:
301
+ * mode: c
302
+ * indent-tabs-mode: nil
303
+ * c-basic-offset: 4
304
+ * eval: (c-set-offset 'substatement-open 0)
305
+ * end:
306
+ */
data/ext/tidy/tmbstr.h ADDED
@@ -0,0 +1,92 @@
1
+ #ifndef __TMBSTR_H__
2
+ #define __TMBSTR_H__
3
+
4
+ /* tmbstr.h - Tidy string utility functions
5
+
6
+ (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
7
+ See tidy.h for the copyright notice.
8
+
9
+ CVS Info :
10
+
11
+ $Author: arnaud02 $
12
+ $Date: 2006/12/29 16:31:09 $
13
+ $Revision: 1.11 $
14
+
15
+ */
16
+
17
+ #include "platform.h"
18
+
19
+ #ifdef __cplusplus
20
+ extern "C"
21
+ {
22
+ #endif
23
+
24
+ /* like strdup but using an allocator */
25
+ tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str );
26
+
27
+ /* like strndup but using an allocator */
28
+ tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len);
29
+
30
+ /* exactly same as strncpy */
31
+ uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size );
32
+
33
+ uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 );
34
+
35
+ uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 );
36
+
37
+ /* exactly same as strcmp */
38
+ int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 );
39
+
40
+ /* returns byte count, not char count */
41
+ uint TY_(tmbstrlen)( ctmbstr str );
42
+
43
+ /*
44
+ MS C 4.2 doesn't include strcasecmp.
45
+ Note that tolower and toupper won't
46
+ work on chars > 127.
47
+
48
+ Neither do Lexer.ToLower() or Lexer.ToUpper()!
49
+
50
+ We get away with this because, except for XML tags,
51
+ we are always comparing to ascii element and
52
+ attribute names defined by HTML specs.
53
+ */
54
+ int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 );
55
+
56
+ int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n );
57
+
58
+ int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n );
59
+
60
+ /* return offset of cc from beginning of s1,
61
+ ** -1 if not found.
62
+ */
63
+ /* int TY_(tmbstrnchr)( ctmbstr s1, uint len1, tmbchar cc ); */
64
+
65
+ ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 );
66
+ /* ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ); */
67
+ ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 );
68
+
69
+ /* transform string to lower case */
70
+ tmbstr TY_(tmbstrtolower)( tmbstr s );
71
+
72
+ /* Transform ASCII chars in string to upper case */
73
+ tmbstr TY_(tmbstrtoupper)( tmbstr s );
74
+
75
+ /* Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */
76
+
77
+ int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
78
+ #ifdef __GNUC__
79
+ __attribute__((format(printf, 3, 0)))
80
+ #endif
81
+ ;
82
+ int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
83
+ #ifdef __GNUC__
84
+ __attribute__((format(printf, 3, 4)))
85
+ #endif
86
+ ;
87
+
88
+ #ifdef __cplusplus
89
+ } /* extern "C" */
90
+ #endif
91
+
92
+ #endif /* __TMBSTR_H__ */