tidy-ext 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/.gitignore +4 -0
  2. data/LICENSE +50 -0
  3. data/README +12 -0
  4. data/Rakefile +60 -0
  5. data/VERSION +1 -0
  6. data/ext/tidy/access.c +3310 -0
  7. data/ext/tidy/access.h +279 -0
  8. data/ext/tidy/alloc.c +107 -0
  9. data/ext/tidy/attrask.c +209 -0
  10. data/ext/tidy/attrdict.c +2398 -0
  11. data/ext/tidy/attrdict.h +122 -0
  12. data/ext/tidy/attrget.c +213 -0
  13. data/ext/tidy/attrs.c +1911 -0
  14. data/ext/tidy/attrs.h +374 -0
  15. data/ext/tidy/buffio.c +232 -0
  16. data/ext/tidy/buffio.h +118 -0
  17. data/ext/tidy/charsets.c +1032 -0
  18. data/ext/tidy/charsets.h +14 -0
  19. data/ext/tidy/clean.c +2674 -0
  20. data/ext/tidy/clean.h +87 -0
  21. data/ext/tidy/config.c +1746 -0
  22. data/ext/tidy/config.h +153 -0
  23. data/ext/tidy/entities.c +419 -0
  24. data/ext/tidy/entities.h +24 -0
  25. data/ext/tidy/extconf.rb +5 -0
  26. data/ext/tidy/fileio.c +106 -0
  27. data/ext/tidy/fileio.h +46 -0
  28. data/ext/tidy/forward.h +69 -0
  29. data/ext/tidy/iconvtc.c +105 -0
  30. data/ext/tidy/iconvtc.h +15 -0
  31. data/ext/tidy/istack.c +373 -0
  32. data/ext/tidy/lexer.c +3825 -0
  33. data/ext/tidy/lexer.h +617 -0
  34. data/ext/tidy/localize.c +1882 -0
  35. data/ext/tidy/mappedio.c +329 -0
  36. data/ext/tidy/mappedio.h +16 -0
  37. data/ext/tidy/message.h +207 -0
  38. data/ext/tidy/parser.c +4408 -0
  39. data/ext/tidy/parser.h +76 -0
  40. data/ext/tidy/platform.h +636 -0
  41. data/ext/tidy/pprint.c +2276 -0
  42. data/ext/tidy/pprint.h +93 -0
  43. data/ext/tidy/ruby-tidy.c +195 -0
  44. data/ext/tidy/streamio.c +1407 -0
  45. data/ext/tidy/streamio.h +222 -0
  46. data/ext/tidy/tagask.c +286 -0
  47. data/ext/tidy/tags.c +955 -0
  48. data/ext/tidy/tags.h +235 -0
  49. data/ext/tidy/tidy-int.h +129 -0
  50. data/ext/tidy/tidy.h +1097 -0
  51. data/ext/tidy/tidyenum.h +622 -0
  52. data/ext/tidy/tidylib.c +1751 -0
  53. data/ext/tidy/tmbstr.c +306 -0
  54. data/ext/tidy/tmbstr.h +92 -0
  55. data/ext/tidy/utf8.c +539 -0
  56. data/ext/tidy/utf8.h +52 -0
  57. data/ext/tidy/version.h +14 -0
  58. data/ext/tidy/win32tc.c +795 -0
  59. data/ext/tidy/win32tc.h +19 -0
  60. data/spec/spec_helper.rb +5 -0
  61. data/spec/tidy/compat_spec.rb +44 -0
  62. data/spec/tidy/remote_uri_spec.rb +14 -0
  63. data/spec/tidy/test1.html +5 -0
  64. data/spec/tidy/tidy_spec.rb +34 -0
  65. metadata +125 -0
data/ext/tidy/attrs.c ADDED
@@ -0,0 +1,1911 @@
1
+ /* attrs.c -- recognize HTML attributes
2
+
3
+ (c) 1998-2009 (W3C) MIT, ERCIM, Keio University
4
+ See tidy.h for the copyright notice.
5
+
6
+ CVS Info :
7
+
8
+ $Author: arnaud02 $
9
+ $Date: 2009/03/26 13:05:22 $
10
+ $Revision: 1.132 $
11
+
12
+ */
13
+
14
+ #include "tidy-int.h"
15
+ #include "attrs.h"
16
+ #include "message.h"
17
+ #include "tmbstr.h"
18
+ #include "utf8.h"
19
+
20
+ /*
21
+ Bind attribute types to procedures to check values.
22
+ You can add new procedures for better validation
23
+ and each procedure has access to the node in which
24
+ the attribute occurred as well as the attribute name
25
+ and its value.
26
+
27
+ By default, attributes are checked without regard
28
+ to the element they are found on. You have the choice
29
+ of making the procedure test which element is involved
30
+ or in writing methods for each element which controls
31
+ exactly how the attributes of that element are checked.
32
+ This latter approach is best for detecting the absence
33
+ of required attributes.
34
+ */
35
+
36
+ static AttrCheck CheckAction;
37
+ static AttrCheck CheckScript;
38
+ static AttrCheck CheckName;
39
+ static AttrCheck CheckId;
40
+ static AttrCheck CheckAlign;
41
+ static AttrCheck CheckValign;
42
+ static AttrCheck CheckBool;
43
+ static AttrCheck CheckLength;
44
+ static AttrCheck CheckTarget;
45
+ static AttrCheck CheckFsubmit;
46
+ static AttrCheck CheckClear;
47
+ static AttrCheck CheckShape;
48
+ static AttrCheck CheckNumber;
49
+ static AttrCheck CheckScope;
50
+ static AttrCheck CheckColor;
51
+ static AttrCheck CheckVType;
52
+ static AttrCheck CheckScroll;
53
+ static AttrCheck CheckTextDir;
54
+ static AttrCheck CheckLang;
55
+ static AttrCheck CheckType;
56
+
57
+ #define CH_PCDATA NULL
58
+ #define CH_CHARSET NULL
59
+ #define CH_TYPE CheckType
60
+ #define CH_XTYPE NULL
61
+ #define CH_CHARACTER NULL
62
+ #define CH_URLS NULL
63
+ #define CH_URL TY_(CheckUrl)
64
+ #define CH_SCRIPT CheckScript
65
+ #define CH_ALIGN CheckAlign
66
+ #define CH_VALIGN CheckValign
67
+ #define CH_COLOR CheckColor
68
+ #define CH_CLEAR CheckClear
69
+ #define CH_BORDER CheckBool /* kludge */
70
+ #define CH_LANG CheckLang
71
+ #define CH_BOOL CheckBool
72
+ #define CH_COLS NULL
73
+ #define CH_NUMBER CheckNumber
74
+ #define CH_LENGTH CheckLength
75
+ #define CH_COORDS NULL
76
+ #define CH_DATE NULL
77
+ #define CH_TEXTDIR CheckTextDir
78
+ #define CH_IDREFS NULL
79
+ #define CH_IDREF NULL
80
+ #define CH_IDDEF CheckId
81
+ #define CH_NAME CheckName
82
+ #define CH_TFRAME NULL
83
+ #define CH_FBORDER NULL
84
+ #define CH_MEDIA NULL
85
+ #define CH_FSUBMIT CheckFsubmit
86
+ #define CH_LINKTYPES NULL
87
+ #define CH_TRULES NULL
88
+ #define CH_SCOPE CheckScope
89
+ #define CH_SHAPE CheckShape
90
+ #define CH_SCROLL CheckScroll
91
+ #define CH_TARGET CheckTarget
92
+ #define CH_VTYPE CheckVType
93
+ #define CH_ACTION CheckAction
94
+
95
+ static const Attribute attribute_defs [] =
96
+ {
97
+ { TidyAttr_UNKNOWN, "unknown!", VERS_PROPRIETARY, NULL },
98
+ { TidyAttr_ABBR, "abbr", VERS_HTML40, CH_PCDATA },
99
+ { TidyAttr_ACCEPT, "accept", VERS_ALL, CH_XTYPE },
100
+ { TidyAttr_ACCEPT_CHARSET, "accept-charset", VERS_HTML40, CH_CHARSET },
101
+ { TidyAttr_ACCESSKEY, "accesskey", VERS_HTML40, CH_CHARACTER },
102
+ { TidyAttr_ACTION, "action", VERS_ALL, CH_ACTION },
103
+ { TidyAttr_ADD_DATE, "add_date", VERS_NETSCAPE, CH_PCDATA }, /* A */
104
+ { TidyAttr_ALIGN, "align", VERS_ALL, CH_ALIGN }, /* varies by element */
105
+ { TidyAttr_ALINK, "alink", VERS_LOOSE, CH_COLOR },
106
+ { TidyAttr_ALT, "alt", VERS_ALL, CH_PCDATA }, /* nowrap */
107
+ { TidyAttr_ARCHIVE, "archive", VERS_HTML40, CH_URLS }, /* space or comma separated list */
108
+ { TidyAttr_AXIS, "axis", VERS_HTML40, CH_PCDATA },
109
+ { TidyAttr_BACKGROUND, "background", VERS_LOOSE, CH_URL },
110
+ { TidyAttr_BGCOLOR, "bgcolor", VERS_LOOSE, CH_COLOR },
111
+ { TidyAttr_BGPROPERTIES, "bgproperties", VERS_PROPRIETARY, CH_PCDATA }, /* BODY "fixed" fixes background */
112
+ { TidyAttr_BORDER, "border", VERS_ALL, CH_BORDER }, /* like LENGTH + "border" */
113
+ { TidyAttr_BORDERCOLOR, "bordercolor", VERS_MICROSOFT, CH_COLOR }, /* used on TABLE */
114
+ { TidyAttr_BOTTOMMARGIN, "bottommargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
115
+ { TidyAttr_CELLPADDING, "cellpadding", VERS_FROM32, CH_LENGTH }, /* % or pixel values */
116
+ { TidyAttr_CELLSPACING, "cellspacing", VERS_FROM32, CH_LENGTH },
117
+ { TidyAttr_CHAR, "char", VERS_HTML40, CH_CHARACTER },
118
+ { TidyAttr_CHAROFF, "charoff", VERS_HTML40, CH_LENGTH },
119
+ { TidyAttr_CHARSET, "charset", VERS_HTML40, CH_CHARSET },
120
+ { TidyAttr_CHECKED, "checked", VERS_ALL, CH_BOOL }, /* i.e. "checked" or absent */
121
+ { TidyAttr_CITE, "cite", VERS_HTML40, CH_URL },
122
+ { TidyAttr_CLASS, "class", VERS_HTML40, CH_PCDATA },
123
+ { TidyAttr_CLASSID, "classid", VERS_HTML40, CH_URL },
124
+ { TidyAttr_CLEAR, "clear", VERS_LOOSE, CH_CLEAR }, /* BR: left, right, all */
125
+ { TidyAttr_CODE, "code", VERS_LOOSE, CH_PCDATA }, /* APPLET */
126
+ { TidyAttr_CODEBASE, "codebase", VERS_HTML40, CH_URL }, /* OBJECT */
127
+ { TidyAttr_CODETYPE, "codetype", VERS_HTML40, CH_XTYPE }, /* OBJECT */
128
+ { TidyAttr_COLOR, "color", VERS_LOOSE, CH_COLOR }, /* BASEFONT, FONT */
129
+ { TidyAttr_COLS, "cols", VERS_IFRAME, CH_COLS }, /* TABLE & FRAMESET */
130
+ { TidyAttr_COLSPAN, "colspan", VERS_FROM32, CH_NUMBER },
131
+ { TidyAttr_COMPACT, "compact", VERS_ALL, CH_BOOL }, /* lists */
132
+ { TidyAttr_CONTENT, "content", VERS_ALL, CH_PCDATA },
133
+ { TidyAttr_COORDS, "coords", VERS_FROM32, CH_COORDS }, /* AREA, A */
134
+ { TidyAttr_DATA, "data", VERS_HTML40, CH_URL }, /* OBJECT */
135
+ { TidyAttr_DATAFLD, "datafld", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */
136
+ { TidyAttr_DATAFORMATAS, "dataformatas", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */
137
+ { TidyAttr_DATAPAGESIZE, "datapagesize", VERS_MICROSOFT, CH_NUMBER }, /* used on DIV, IMG */
138
+ { TidyAttr_DATASRC, "datasrc", VERS_MICROSOFT, CH_URL }, /* used on TABLE */
139
+ { TidyAttr_DATETIME, "datetime", VERS_HTML40, CH_DATE }, /* INS, DEL */
140
+ { TidyAttr_DECLARE, "declare", VERS_HTML40, CH_BOOL }, /* OBJECT */
141
+ { TidyAttr_DEFER, "defer", VERS_HTML40, CH_BOOL }, /* SCRIPT */
142
+ { TidyAttr_DIR, "dir", VERS_HTML40, CH_TEXTDIR }, /* ltr or rtl */
143
+ { TidyAttr_DISABLED, "disabled", VERS_HTML40, CH_BOOL }, /* form fields */
144
+ { TidyAttr_ENCODING, "encoding", VERS_XML, CH_PCDATA }, /* <?xml?> */
145
+ { TidyAttr_ENCTYPE, "enctype", VERS_ALL, CH_XTYPE }, /* FORM */
146
+ { TidyAttr_FACE, "face", VERS_LOOSE, CH_PCDATA }, /* BASEFONT, FONT */
147
+ { TidyAttr_FOR, "for", VERS_HTML40, CH_IDREF }, /* LABEL */
148
+ { TidyAttr_FRAME, "frame", VERS_HTML40, CH_TFRAME }, /* TABLE */
149
+ { TidyAttr_FRAMEBORDER, "frameborder", VERS_FRAMESET, CH_FBORDER }, /* 0 or 1 */
150
+ { TidyAttr_FRAMESPACING, "framespacing", VERS_PROPRIETARY, CH_NUMBER },
151
+ { TidyAttr_GRIDX, "gridx", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive*/
152
+ { TidyAttr_GRIDY, "gridy", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive */
153
+ { TidyAttr_HEADERS, "headers", VERS_HTML40, CH_IDREFS }, /* table cells */
154
+ { TidyAttr_HEIGHT, "height", VERS_ALL, CH_LENGTH }, /* pixels only for TH/TD */
155
+ { TidyAttr_HREF, "href", VERS_ALL, CH_URL }, /* A, AREA, LINK and BASE */
156
+ { TidyAttr_HREFLANG, "hreflang", VERS_HTML40, CH_LANG }, /* A, LINK */
157
+ { TidyAttr_HSPACE, "hspace", VERS_ALL, CH_NUMBER }, /* APPLET, IMG, OBJECT */
158
+ { TidyAttr_HTTP_EQUIV, "http-equiv", VERS_ALL, CH_PCDATA }, /* META */
159
+ { TidyAttr_ID, "id", VERS_HTML40, CH_IDDEF },
160
+ { TidyAttr_ISMAP, "ismap", VERS_ALL, CH_BOOL }, /* IMG */
161
+ { TidyAttr_LABEL, "label", VERS_HTML40, CH_PCDATA }, /* OPT, OPTGROUP */
162
+ { TidyAttr_LANG, "lang", VERS_HTML40, CH_LANG },
163
+ { TidyAttr_LANGUAGE, "language", VERS_LOOSE, CH_PCDATA }, /* SCRIPT */
164
+ { TidyAttr_LAST_MODIFIED, "last_modified", VERS_NETSCAPE, CH_PCDATA }, /* A */
165
+ { TidyAttr_LAST_VISIT, "last_visit", VERS_NETSCAPE, CH_PCDATA }, /* A */
166
+ { TidyAttr_LEFTMARGIN, "leftmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
167
+ { TidyAttr_LINK, "link", VERS_LOOSE, CH_COLOR }, /* BODY */
168
+ { TidyAttr_LONGDESC, "longdesc", VERS_HTML40, CH_URL }, /* IMG */
169
+ { TidyAttr_LOWSRC, "lowsrc", VERS_PROPRIETARY, CH_URL }, /* IMG */
170
+ { TidyAttr_MARGINHEIGHT, "marginheight", VERS_IFRAME, CH_NUMBER }, /* FRAME, IFRAME, BODY */
171
+ { TidyAttr_MARGINWIDTH, "marginwidth", VERS_IFRAME, CH_NUMBER }, /* ditto */
172
+ { TidyAttr_MAXLENGTH, "maxlength", VERS_ALL, CH_NUMBER }, /* INPUT */
173
+ { TidyAttr_MEDIA, "media", VERS_HTML40, CH_MEDIA }, /* STYLE, LINK */
174
+ { TidyAttr_METHOD, "method", VERS_ALL, CH_FSUBMIT }, /* FORM: get or post */
175
+ { TidyAttr_MULTIPLE, "multiple", VERS_ALL, CH_BOOL }, /* SELECT */
176
+ { TidyAttr_NAME, "name", VERS_ALL, CH_NAME },
177
+ { TidyAttr_NOHREF, "nohref", VERS_FROM32, CH_BOOL }, /* AREA */
178
+ { TidyAttr_NORESIZE, "noresize", VERS_FRAMESET, CH_BOOL }, /* FRAME */
179
+ { TidyAttr_NOSHADE, "noshade", VERS_LOOSE, CH_BOOL }, /* HR */
180
+ { TidyAttr_NOWRAP, "nowrap", VERS_LOOSE, CH_BOOL }, /* table cells */
181
+ { TidyAttr_OBJECT, "object", VERS_HTML40_LOOSE, CH_PCDATA }, /* APPLET */
182
+ { TidyAttr_OnAFTERUPDATE, "onafterupdate", VERS_MICROSOFT, CH_SCRIPT },
183
+ { TidyAttr_OnBEFOREUNLOAD, "onbeforeunload", VERS_MICROSOFT, CH_SCRIPT },
184
+ { TidyAttr_OnBEFOREUPDATE, "onbeforeupdate", VERS_MICROSOFT, CH_SCRIPT },
185
+ { TidyAttr_OnBLUR, "onblur", VERS_EVENTS, CH_SCRIPT }, /* event */
186
+ { TidyAttr_OnCHANGE, "onchange", VERS_EVENTS, CH_SCRIPT }, /* event */
187
+ { TidyAttr_OnCLICK, "onclick", VERS_EVENTS, CH_SCRIPT }, /* event */
188
+ { TidyAttr_OnDATAAVAILABLE, "ondataavailable", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */
189
+ { TidyAttr_OnDATASETCHANGED, "ondatasetchanged", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */
190
+ { TidyAttr_OnDATASETCOMPLETE, "ondatasetcomplete", VERS_MICROSOFT, CH_SCRIPT },
191
+ { TidyAttr_OnDBLCLICK, "ondblclick", VERS_EVENTS, CH_SCRIPT }, /* event */
192
+ { TidyAttr_OnERRORUPDATE, "onerrorupdate", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */
193
+ { TidyAttr_OnFOCUS, "onfocus", VERS_EVENTS, CH_SCRIPT }, /* event */
194
+ { TidyAttr_OnKEYDOWN, "onkeydown", VERS_EVENTS, CH_SCRIPT }, /* event */
195
+ { TidyAttr_OnKEYPRESS, "onkeypress", VERS_EVENTS, CH_SCRIPT }, /* event */
196
+ { TidyAttr_OnKEYUP, "onkeyup", VERS_EVENTS, CH_SCRIPT }, /* event */
197
+ { TidyAttr_OnLOAD, "onload", VERS_EVENTS, CH_SCRIPT }, /* event */
198
+ { TidyAttr_OnMOUSEDOWN, "onmousedown", VERS_EVENTS, CH_SCRIPT }, /* event */
199
+ { TidyAttr_OnMOUSEMOVE, "onmousemove", VERS_EVENTS, CH_SCRIPT }, /* event */
200
+ { TidyAttr_OnMOUSEOUT, "onmouseout", VERS_EVENTS, CH_SCRIPT }, /* event */
201
+ { TidyAttr_OnMOUSEOVER, "onmouseover", VERS_EVENTS, CH_SCRIPT }, /* event */
202
+ { TidyAttr_OnMOUSEUP, "onmouseup", VERS_EVENTS, CH_SCRIPT }, /* event */
203
+ { TidyAttr_OnRESET, "onreset", VERS_EVENTS, CH_SCRIPT }, /* event */
204
+ { TidyAttr_OnROWENTER, "onrowenter", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */
205
+ { TidyAttr_OnROWEXIT, "onrowexit", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */
206
+ { TidyAttr_OnSELECT, "onselect", VERS_EVENTS, CH_SCRIPT }, /* event */
207
+ { TidyAttr_OnSUBMIT, "onsubmit", VERS_EVENTS, CH_SCRIPT }, /* event */
208
+ { TidyAttr_OnUNLOAD, "onunload", VERS_EVENTS, CH_SCRIPT }, /* event */
209
+ { TidyAttr_PROFILE, "profile", VERS_HTML40, CH_URL }, /* HEAD */
210
+ { TidyAttr_PROMPT, "prompt", VERS_LOOSE, CH_PCDATA }, /* ISINDEX */
211
+ { TidyAttr_RBSPAN, "rbspan", VERS_XHTML11, CH_NUMBER }, /* ruby markup */
212
+ { TidyAttr_READONLY, "readonly", VERS_HTML40, CH_BOOL }, /* form fields */
213
+ { TidyAttr_REL, "rel", VERS_ALL, CH_LINKTYPES },
214
+ { TidyAttr_REV, "rev", VERS_ALL, CH_LINKTYPES },
215
+ { TidyAttr_RIGHTMARGIN, "rightmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
216
+ { TidyAttr_ROWS, "rows", VERS_ALL, CH_NUMBER }, /* TEXTAREA */
217
+ { TidyAttr_ROWSPAN, "rowspan", VERS_ALL, CH_NUMBER }, /* table cells */
218
+ { TidyAttr_RULES, "rules", VERS_HTML40, CH_TRULES }, /* TABLE */
219
+ { TidyAttr_SCHEME, "scheme", VERS_HTML40, CH_PCDATA }, /* META */
220
+ { TidyAttr_SCOPE, "scope", VERS_HTML40, CH_SCOPE }, /* table cells */
221
+ { TidyAttr_SCROLLING, "scrolling", VERS_IFRAME, CH_SCROLL }, /* yes, no or auto */
222
+ { TidyAttr_SELECTED, "selected", VERS_ALL, CH_BOOL }, /* OPTION */
223
+ { TidyAttr_SHAPE, "shape", VERS_FROM32, CH_SHAPE }, /* AREA, A */
224
+ { TidyAttr_SHOWGRID, "showgrid", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive */
225
+ { TidyAttr_SHOWGRIDX, "showgridx", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/
226
+ { TidyAttr_SHOWGRIDY, "showgridy", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/
227
+ { TidyAttr_SIZE, "size", VERS_LOOSE, CH_NUMBER }, /* HR, FONT, BASEFONT, SELECT */
228
+ { TidyAttr_SPAN, "span", VERS_HTML40, CH_NUMBER }, /* COL, COLGROUP */
229
+ { TidyAttr_SRC, "src", VERS_ALL, CH_URL }, /* IMG, FRAME, IFRAME */
230
+ { TidyAttr_STANDBY, "standby", VERS_HTML40, CH_PCDATA }, /* OBJECT */
231
+ { TidyAttr_START, "start", VERS_ALL, CH_NUMBER }, /* OL */
232
+ { TidyAttr_STYLE, "style", VERS_HTML40, CH_PCDATA },
233
+ { TidyAttr_SUMMARY, "summary", VERS_HTML40, CH_PCDATA }, /* TABLE */
234
+ { TidyAttr_TABINDEX, "tabindex", VERS_HTML40, CH_NUMBER }, /* fields, OBJECT and A */
235
+ { TidyAttr_TARGET, "target", VERS_HTML40, CH_TARGET }, /* names a frame/window */
236
+ { TidyAttr_TEXT, "text", VERS_LOOSE, CH_COLOR }, /* BODY */
237
+ { TidyAttr_TITLE, "title", VERS_HTML40, CH_PCDATA }, /* text tool tip */
238
+ { TidyAttr_TOPMARGIN, "topmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
239
+ { TidyAttr_TYPE, "type", VERS_FROM32, CH_TYPE }, /* also used by SPACER */
240
+ { TidyAttr_USEMAP, "usemap", VERS_ALL, CH_URL }, /* things with images */
241
+ { TidyAttr_VALIGN, "valign", VERS_FROM32, CH_VALIGN },
242
+ { TidyAttr_VALUE, "value", VERS_ALL, CH_PCDATA },
243
+ { TidyAttr_VALUETYPE, "valuetype", VERS_HTML40, CH_VTYPE }, /* PARAM: data, ref, object */
244
+ { TidyAttr_VERSION, "version", VERS_ALL|VERS_XML, CH_PCDATA }, /* HTML <?xml?> */
245
+ { TidyAttr_VLINK, "vlink", VERS_LOOSE, CH_COLOR }, /* BODY */
246
+ { TidyAttr_VSPACE, "vspace", VERS_LOOSE, CH_NUMBER }, /* IMG, OBJECT, APPLET */
247
+ { TidyAttr_WIDTH, "width", VERS_ALL, CH_LENGTH }, /* pixels only for TD/TH */
248
+ { TidyAttr_WRAP, "wrap", VERS_NETSCAPE, CH_PCDATA }, /* textarea */
249
+ { TidyAttr_XML_LANG, "xml:lang", VERS_XML, CH_LANG }, /* XML language */
250
+ { TidyAttr_XML_SPACE, "xml:space", VERS_XML, CH_PCDATA }, /* XML white space */
251
+
252
+ /* todo: VERS_ALL is wrong! */
253
+ { TidyAttr_XMLNS, "xmlns", VERS_ALL, CH_PCDATA }, /* name space */
254
+ { TidyAttr_EVENT, "event", VERS_HTML40, CH_PCDATA }, /* reserved for <script> */
255
+ { TidyAttr_METHODS, "methods", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */
256
+ { TidyAttr_N, "n", VERS_HTML20, CH_PCDATA }, /* for <nextid> */
257
+ { TidyAttr_SDAFORM, "sdaform", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
258
+ { TidyAttr_SDAPREF, "sdapref", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
259
+ { TidyAttr_SDASUFF, "sdasuff", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
260
+ { TidyAttr_URN, "urn", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */
261
+
262
+ /* this must be the final entry */
263
+ { N_TIDY_ATTRIBS, NULL, VERS_UNKNOWN, NULL }
264
+ };
265
+
266
+ static uint AttributeVersions(Node* node, AttVal* attval)
267
+ {
268
+ uint i;
269
+
270
+ if (!attval || !attval->dict)
271
+ return VERS_UNKNOWN;
272
+
273
+ if (!node || !node->tag || !node->tag->attrvers)
274
+ return attval->dict->versions;
275
+
276
+ for (i = 0; node->tag->attrvers[i].attribute; ++i)
277
+ if (node->tag->attrvers[i].attribute == attval->dict->id)
278
+ return node->tag->attrvers[i].versions;
279
+
280
+ return attval->dict->versions & VERS_ALL
281
+ ? VERS_UNKNOWN
282
+ : attval->dict->versions;
283
+
284
+ }
285
+
286
+
287
+ /* return the version of the attribute "id" of element "node" */
288
+ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id )
289
+ {
290
+ uint i;
291
+
292
+ if (!node || !node->tag || !node->tag->attrvers)
293
+ return VERS_UNKNOWN;
294
+
295
+ for (i = 0; node->tag->attrvers[i].attribute; ++i)
296
+ if (node->tag->attrvers[i].attribute == id)
297
+ return node->tag->attrvers[i].versions;
298
+
299
+ return VERS_UNKNOWN;
300
+ }
301
+
302
+ /* returns true if the element is a W3C defined element */
303
+ /* but the element/attribute combination is not */
304
+ static Bool AttributeIsProprietary(Node* node, AttVal* attval)
305
+ {
306
+ if (!node || !attval)
307
+ return no;
308
+
309
+ if (!node->tag)
310
+ return no;
311
+
312
+ if (!(node->tag->versions & VERS_ALL))
313
+ return no;
314
+
315
+ if (AttributeVersions(node, attval) & VERS_ALL)
316
+ return no;
317
+
318
+ return yes;
319
+ }
320
+
321
+ /* used by CheckColor() */
322
+ struct _colors
323
+ {
324
+ ctmbstr name;
325
+ ctmbstr hex;
326
+ };
327
+
328
+ static const struct _colors colors[] =
329
+ {
330
+ { "black", "#000000" },
331
+ { "green", "#008000" },
332
+ { "silver", "#C0C0C0" },
333
+ { "lime", "#00FF00" },
334
+ { "gray", "#808080" },
335
+ { "olive", "#808000" },
336
+ { "white", "#FFFFFF" },
337
+ { "yellow", "#FFFF00" },
338
+ { "maroon", "#800000" },
339
+ { "navy", "#000080" },
340
+ { "red", "#FF0000" },
341
+ { "blue", "#0000FF" },
342
+ { "purple", "#800080" },
343
+ { "teal", "#008080" },
344
+ { "fuchsia", "#FF00FF" },
345
+ { "aqua", "#00FFFF" },
346
+ { NULL, NULL }
347
+ };
348
+
349
+ static ctmbstr GetColorCode(ctmbstr name)
350
+ {
351
+ uint i;
352
+
353
+ for (i = 0; colors[i].name; ++i)
354
+ if (TY_(tmbstrcasecmp)(name, colors[i].name) == 0)
355
+ return colors[i].hex;
356
+
357
+ return NULL;
358
+ }
359
+
360
+ static ctmbstr GetColorName(ctmbstr code)
361
+ {
362
+ uint i;
363
+
364
+ for (i = 0; colors[i].name; ++i)
365
+ if (TY_(tmbstrcasecmp)(code, colors[i].hex) == 0)
366
+ return colors[i].name;
367
+
368
+ return NULL;
369
+ }
370
+
371
+ #if 0
372
+ static const struct _colors fancy_colors[] =
373
+ {
374
+ { "darkgreen", "#006400" },
375
+ { "antiquewhite", "#FAEBD7" },
376
+ { "aqua", "#00FFFF" },
377
+ { "aquamarine", "#7FFFD4" },
378
+ { "azure", "#F0FFFF" },
379
+ { "beige", "#F5F5DC" },
380
+ { "bisque", "#FFE4C4" },
381
+ { "black", "#000000" },
382
+ { "blanchedalmond", "#FFEBCD" },
383
+ { "blue", "#0000FF" },
384
+ { "blueviolet", "#8A2BE2" },
385
+ { "brown", "#A52A2A" },
386
+ { "burlywood", "#DEB887" },
387
+ { "cadetblue", "#5F9EA0" },
388
+ { "chartreuse", "#7FFF00" },
389
+ { "chocolate", "#D2691E" },
390
+ { "coral", "#FF7F50" },
391
+ { "cornflowerblue", "#6495ED" },
392
+ { "cornsilk", "#FFF8DC" },
393
+ { "crimson", "#DC143C" },
394
+ { "cyan", "#00FFFF" },
395
+ { "darkblue", "#00008B" },
396
+ { "darkcyan", "#008B8B" },
397
+ { "darkgoldenrod", "#B8860B" },
398
+ { "darkgray", "#A9A9A9" },
399
+ { "darkgreen", "#006400" },
400
+ { "darkkhaki", "#BDB76B" },
401
+ { "darkmagenta", "#8B008B" },
402
+ { "darkolivegreen", "#556B2F" },
403
+ { "darkorange", "#FF8C00" },
404
+ { "darkorchid", "#9932CC" },
405
+ { "darkred", "#8B0000" },
406
+ { "darksalmon", "#E9967A" },
407
+ { "darkseagreen", "#8FBC8F" },
408
+ { "darkslateblue", "#483D8B" },
409
+ { "darkslategray", "#2F4F4F" },
410
+ { "darkturquoise", "#00CED1" },
411
+ { "darkviolet", "#9400D3" },
412
+ { "deeppink", "#FF1493" },
413
+ { "deepskyblue", "#00BFFF" },
414
+ { "dimgray", "#696969" },
415
+ { "dodgerblue", "#1E90FF" },
416
+ { "firebrick", "#B22222" },
417
+ { "floralwhite", "#FFFAF0" },
418
+ { "forestgreen", "#228B22" },
419
+ { "fuchsia", "#FF00FF" },
420
+ { "gainsboro", "#DCDCDC" },
421
+ { "ghostwhite", "#F8F8FF" },
422
+ { "gold", "#FFD700" },
423
+ { "goldenrod", "#DAA520" },
424
+ { "gray", "#808080" },
425
+ { "green", "#008000" },
426
+ { "greenyellow", "#ADFF2F" },
427
+ { "honeydew", "#F0FFF0" },
428
+ { "hotpink", "#FF69B4" },
429
+ { "indianred", "#CD5C5C" },
430
+ { "indigo", "#4B0082" },
431
+ { "ivory", "#FFFFF0" },
432
+ { "khaki", "#F0E68C" },
433
+ { "lavender", "#E6E6FA" },
434
+ { "lavenderblush", "#FFF0F5" },
435
+ { "lawngreen", "#7CFC00" },
436
+ { "lemonchiffon", "#FFFACD" },
437
+ { "lightblue", "#ADD8E6" },
438
+ { "lightcoral", "#F08080" },
439
+ { "lightcyan", "#E0FFFF" },
440
+ { "lightgoldenrodyellow", "#FAFAD2" },
441
+ { "lightgreen", "#90EE90" },
442
+ { "lightgrey", "#D3D3D3" },
443
+ { "lightpink", "#FFB6C1" },
444
+ { "lightsalmon", "#FFA07A" },
445
+ { "lightseagreen", "#20B2AA" },
446
+ { "lightskyblue", "#87CEFA" },
447
+ { "lightslategray", "#778899" },
448
+ { "lightsteelblue", "#B0C4DE" },
449
+ { "lightyellow", "#FFFFE0" },
450
+ { "lime", "#00FF00" },
451
+ { "limegreen", "#32CD32" },
452
+ { "linen", "#FAF0E6" },
453
+ { "magenta", "#FF00FF" },
454
+ { "maroon", "#800000" },
455
+ { "mediumaquamarine", "#66CDAA" },
456
+ { "mediumblue", "#0000CD" },
457
+ { "mediumorchid", "#BA55D3" },
458
+ { "mediumpurple", "#9370DB" },
459
+ { "mediumseagreen", "#3CB371" },
460
+ { "mediumslateblue", "#7B68EE" },
461
+ { "mediumspringgreen", "#00FA9A" },
462
+ { "mediumturquoise", "#48D1CC" },
463
+ { "mediumvioletred", "#C71585" },
464
+ { "midnightblue", "#191970" },
465
+ { "mintcream", "#F5FFFA" },
466
+ { "mistyrose", "#FFE4E1" },
467
+ { "moccasin", "#FFE4B5" },
468
+ { "navajowhite", "#FFDEAD" },
469
+ { "navy", "#000080" },
470
+ { "oldlace", "#FDF5E6" },
471
+ { "olive", "#808000" },
472
+ { "olivedrab", "#6B8E23" },
473
+ { "orange", "#FFA500" },
474
+ { "orangered", "#FF4500" },
475
+ { "orchid", "#DA70D6" },
476
+ { "palegoldenrod", "#EEE8AA" },
477
+ { "palegreen", "#98FB98" },
478
+ { "paleturquoise", "#AFEEEE" },
479
+ { "palevioletred", "#DB7093" },
480
+ { "papayawhip", "#FFEFD5" },
481
+ { "peachpuff", "#FFDAB9" },
482
+ { "peru", "#CD853F" },
483
+ { "pink", "#FFC0CB" },
484
+ { "plum", "#DDA0DD" },
485
+ { "powderblue", "#B0E0E6" },
486
+ { "purple", "#800080" },
487
+ { "red", "#FF0000" },
488
+ { "rosybrown", "#BC8F8F" },
489
+ { "royalblue", "#4169E1" },
490
+ { "saddlebrown", "#8B4513" },
491
+ { "salmon", "#FA8072" },
492
+ { "sandybrown", "#F4A460" },
493
+ { "seagreen", "#2E8B57" },
494
+ { "seashell", "#FFF5EE" },
495
+ { "sienna", "#A0522D" },
496
+ { "silver", "#C0C0C0" },
497
+ { "skyblue", "#87CEEB" },
498
+ { "slateblue", "#6A5ACD" },
499
+ { "slategray", "#708090" },
500
+ { "snow", "#FFFAFA" },
501
+ { "springgreen", "#00FF7F" },
502
+ { "steelblue", "#4682B4" },
503
+ { "tan", "#D2B48C" },
504
+ { "teal", "#008080" },
505
+ { "thistle", "#D8BFD8" },
506
+ { "tomato", "#FF6347" },
507
+ { "turquoise", "#40E0D0" },
508
+ { "violet", "#EE82EE" },
509
+ { "wheat", "#F5DEB3" },
510
+ { "white", "#FFFFFF" },
511
+ { "whitesmoke", "#F5F5F5" },
512
+ { "yellow", "#FFFF00" },
513
+ { "yellowgreen", "#9ACD32" },
514
+ { NULL, NULL }
515
+ };
516
+ #endif
517
+
518
+ #if ATTRIBUTE_HASH_LOOKUP
519
+ static uint attrsHash(ctmbstr s)
520
+ {
521
+ uint hashval;
522
+
523
+ for (hashval = 0; *s != '\0'; s++)
524
+ hashval = *s + 31*hashval;
525
+
526
+ return hashval % ATTRIBUTE_HASH_SIZE;
527
+ }
528
+
529
+ static const Attribute *attrsInstall(TidyDocImpl* doc, TidyAttribImpl * attribs,
530
+ const Attribute* old)
531
+ {
532
+ AttrHash *np;
533
+ uint hashval;
534
+
535
+ if (old)
536
+ {
537
+ np = (AttrHash *)TidyDocAlloc(doc, sizeof(*np));
538
+ np->attr = old;
539
+
540
+ hashval = attrsHash(old->name);
541
+ np->next = attribs->hashtab[hashval];
542
+ attribs->hashtab[hashval] = np;
543
+ }
544
+
545
+ return old;
546
+ }
547
+
548
+ static void attrsRemoveFromHash( TidyDocImpl* doc, TidyAttribImpl *attribs,
549
+ ctmbstr s )
550
+ {
551
+ uint h = attrsHash(s);
552
+ AttrHash *p, *prev = NULL;
553
+ for (p = attribs->hashtab[h]; p && p->attr; p = p->next)
554
+ {
555
+ if (TY_(tmbstrcmp)(s, p->attr->name) == 0)
556
+ {
557
+ AttrHash* next = p->next;
558
+ if ( prev )
559
+ prev->next = next;
560
+ else
561
+ attribs->hashtab[h] = next;
562
+ TidyDocFree(doc, p);
563
+ return;
564
+ }
565
+ prev = p;
566
+ }
567
+ }
568
+
569
+ static void attrsEmptyHash( TidyDocImpl* doc, TidyAttribImpl * attribs )
570
+ {
571
+ AttrHash *dict, *next;
572
+ uint i;
573
+
574
+ for (i = 0; i < ATTRIBUTE_HASH_SIZE; ++i)
575
+ {
576
+ dict = attribs->hashtab[i];
577
+
578
+ while(dict)
579
+ {
580
+ next = dict->next;
581
+ TidyDocFree(doc, dict);
582
+ dict = next;
583
+ }
584
+
585
+ attribs->hashtab[i] = NULL;
586
+ }
587
+ }
588
+ #endif
589
+
590
+ static const Attribute* attrsLookup(TidyDocImpl* doc,
591
+ TidyAttribImpl* ARG_UNUSED(attribs),
592
+ ctmbstr atnam)
593
+ {
594
+ const Attribute *np;
595
+ #if ATTRIBUTE_HASH_LOOKUP
596
+ const AttrHash *p;
597
+ #endif
598
+
599
+ if (!atnam)
600
+ return NULL;
601
+
602
+ #if ATTRIBUTE_HASH_LOOKUP
603
+ for (p = attribs->hashtab[attrsHash(atnam)]; p && p->attr; p = p->next)
604
+ if (TY_(tmbstrcmp)(atnam, p->attr->name) == 0)
605
+ return p->attr;
606
+
607
+ for (np = attribute_defs; np && np->name; ++np)
608
+ if (TY_(tmbstrcmp)(atnam, np->name) == 0)
609
+ return attrsInstall(doc, attribs, np);
610
+ #else
611
+ for (np = attribute_defs; np && np->name; ++np)
612
+ if (TY_(tmbstrcmp)(atnam, np->name) == 0)
613
+ return np;
614
+ #endif
615
+
616
+ return NULL;
617
+ }
618
+
619
+
620
+ /* Locate attributes by type */
621
+ AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id )
622
+ {
623
+ AttVal* av;
624
+ for ( av = node->attributes; av; av = av->next )
625
+ {
626
+ if ( AttrIsId(av, id) )
627
+ return av;
628
+ }
629
+ return NULL;
630
+ }
631
+
632
+ /* public method for finding attribute definition by name */
633
+ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval )
634
+ {
635
+ if ( attval )
636
+ return attrsLookup( doc, &doc->attribs, attval->attribute );
637
+ return NULL;
638
+ }
639
+
640
+ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
641
+ {
642
+ AttVal *attr;
643
+ for (attr = node->attributes; attr != NULL; attr = attr->next)
644
+ {
645
+ if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
646
+ break;
647
+ }
648
+ return attr;
649
+ }
650
+
651
+ AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
652
+ Node *node, ctmbstr name, ctmbstr value )
653
+ {
654
+ AttVal *av = TY_(NewAttribute)(doc);
655
+ av->delim = '"';
656
+ av->attribute = TY_(tmbstrdup)(doc->allocator, name);
657
+
658
+ if (value)
659
+ av->value = TY_(tmbstrdup)(doc->allocator, value);
660
+ else
661
+ av->value = NULL;
662
+
663
+ av->dict = attrsLookup(doc, &doc->attribs, name);
664
+
665
+ TY_(InsertAttributeAtEnd)(node, av);
666
+ return av;
667
+ }
668
+
669
+ AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value)
670
+ {
671
+ AttVal* old = TY_(GetAttrByName)(node, name);
672
+
673
+ if (old)
674
+ {
675
+ if (old->value)
676
+ TidyDocFree(doc, old->value);
677
+ if (value)
678
+ old->value = TY_(tmbstrdup)(doc->allocator, value);
679
+ else
680
+ old->value = NULL;
681
+
682
+ return old;
683
+ }
684
+ else
685
+ return TY_(AddAttribute)(doc, node, name, value);
686
+ }
687
+
688
+ static Bool CheckAttrType( TidyDocImpl* doc,
689
+ ctmbstr attrname, AttrCheck type )
690
+ {
691
+ const Attribute* np = attrsLookup( doc, &doc->attribs, attrname );
692
+ return (Bool)( np && np->attrchk == type );
693
+ }
694
+
695
+ Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname )
696
+ {
697
+ return CheckAttrType( doc, attrname, CH_URL );
698
+ }
699
+
700
+ /*
701
+ Bool IsBool( TidyDocImpl* doc, ctmbstr attrname )
702
+ {
703
+ return CheckAttrType( doc, attrname, CH_BOOL );
704
+ }
705
+ */
706
+
707
+ Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname )
708
+ {
709
+ return CheckAttrType( doc, attrname, CH_SCRIPT );
710
+ }
711
+
712
+ /* may id or name serve as anchor? */
713
+ Bool TY_(IsAnchorElement)( TidyDocImpl* ARG_UNUSED(doc), Node* node)
714
+ {
715
+ TidyTagId tid = TagId( node );
716
+ if ( tid == TidyTag_A ||
717
+ tid == TidyTag_APPLET ||
718
+ tid == TidyTag_FORM ||
719
+ tid == TidyTag_FRAME ||
720
+ tid == TidyTag_IFRAME ||
721
+ tid == TidyTag_IMG ||
722
+ tid == TidyTag_MAP )
723
+ return yes;
724
+
725
+ return no;
726
+ }
727
+
728
+ /*
729
+ In CSS1, selectors can contain only the characters A-Z, 0-9,
730
+ and Unicode characters 161-255, plus dash (-); they cannot start
731
+ with a dash or a digit; they can also contain escaped characters
732
+ and any Unicode character as a numeric code (see next item).
733
+
734
+ The backslash followed by at most four hexadecimal digits
735
+ (0..9A..F) stands for the Unicode character with that number.
736
+
737
+ Any character except a hexadecimal digit can be escaped to remove
738
+ its special meaning, by putting a backslash in front.
739
+
740
+ #508936 - CSS class naming for -clean option
741
+ */
742
+ Bool TY_(IsCSS1Selector)( ctmbstr buf )
743
+ {
744
+ Bool valid = yes;
745
+ int esclen = 0;
746
+ byte c;
747
+ int pos;
748
+
749
+ for ( pos=0; valid && (c = *buf++); ++pos )
750
+ {
751
+ if ( c == '\\' )
752
+ {
753
+ esclen = 1; /* ab\555\444 is 4 chars {'a', 'b', \555, \444} */
754
+ }
755
+ else if ( isdigit( c ) )
756
+ {
757
+ /* Digit not 1st, unless escaped (Max length "\112F") */
758
+ if ( esclen > 0 )
759
+ valid = ( ++esclen < 6 );
760
+ if ( valid )
761
+ valid = ( pos>0 || esclen>0 );
762
+ }
763
+ else
764
+ {
765
+ valid = (
766
+ esclen > 0 /* Escaped? Anything goes. */
767
+ || ( pos>0 && c == '-' ) /* Dash cannot be 1st char */
768
+ || isalpha(c) /* a-z, A-Z anywhere */
769
+ || ( c >= 161 ) /* Unicode 161-255 anywhere */
770
+ );
771
+ esclen = 0;
772
+ }
773
+ }
774
+ return valid;
775
+ }
776
+
777
+ /* free single anchor */
778
+ static void FreeAnchor(TidyDocImpl* doc, Anchor *a)
779
+ {
780
+ if ( a )
781
+ TidyDocFree( doc, a->name );
782
+ TidyDocFree( doc, a );
783
+ }
784
+
785
+ /* removes anchor for specific node */
786
+ void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node )
787
+ {
788
+ TidyAttribImpl* attribs = &doc->attribs;
789
+ Anchor *delme = NULL, *curr, *prev = NULL;
790
+
791
+ for ( curr=attribs->anchor_list; curr!=NULL; curr=curr->next )
792
+ {
793
+ if ( curr->node == node )
794
+ {
795
+ if ( prev )
796
+ prev->next = curr->next;
797
+ else
798
+ attribs->anchor_list = curr->next;
799
+ delme = curr;
800
+ break;
801
+ }
802
+ prev = curr;
803
+ }
804
+ FreeAnchor( doc, delme );
805
+ }
806
+
807
+ /* initialize new anchor */
808
+ static Anchor* NewAnchor( TidyDocImpl* doc, ctmbstr name, Node* node )
809
+ {
810
+ Anchor *a = (Anchor*) TidyDocAlloc( doc, sizeof(Anchor) );
811
+
812
+ a->name = TY_(tmbstrdup)( doc->allocator, name );
813
+ a->name = TY_(tmbstrtolower)(a->name);
814
+ a->node = node;
815
+ a->next = NULL;
816
+
817
+ return a;
818
+ }
819
+
820
+ /* add new anchor to namespace */
821
+ static Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node )
822
+ {
823
+ TidyAttribImpl* attribs = &doc->attribs;
824
+ Anchor *a = NewAnchor( doc, name, node );
825
+
826
+ if ( attribs->anchor_list == NULL)
827
+ attribs->anchor_list = a;
828
+ else
829
+ {
830
+ Anchor *here = attribs->anchor_list;
831
+ while (here->next)
832
+ here = here->next;
833
+ here->next = a;
834
+ }
835
+
836
+ return attribs->anchor_list;
837
+ }
838
+
839
+ /* return node associated with anchor */
840
+ static Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name )
841
+ {
842
+ TidyAttribImpl* attribs = &doc->attribs;
843
+ Anchor *found;
844
+ tmbstr lname = TY_(tmbstrdup)(doc->allocator, name);
845
+ lname = TY_(tmbstrtolower)(lname);
846
+
847
+ for ( found = attribs->anchor_list; found != NULL; found = found->next )
848
+ {
849
+ if ( TY_(tmbstrcmp)(found->name, lname) == 0 )
850
+ break;
851
+ }
852
+
853
+ TidyDocFree(doc, lname);
854
+ if ( found )
855
+ return found->node;
856
+ return NULL;
857
+ }
858
+
859
+ /* free all anchors */
860
+ void TY_(FreeAnchors)( TidyDocImpl* doc )
861
+ {
862
+ TidyAttribImpl* attribs = &doc->attribs;
863
+ Anchor* a;
864
+ while (NULL != (a = attribs->anchor_list) )
865
+ {
866
+ attribs->anchor_list = a->next;
867
+ FreeAnchor(doc, a);
868
+ }
869
+ }
870
+
871
+ /* public method for inititializing attribute dictionary */
872
+ void TY_(InitAttrs)( TidyDocImpl* doc )
873
+ {
874
+ TidyClearMemory( &doc->attribs, sizeof(TidyAttribImpl) );
875
+ #ifdef _DEBUG
876
+ {
877
+ /* Attribute ID is index position in Attribute type lookup table */
878
+ uint ix;
879
+ for ( ix=0; ix < N_TIDY_ATTRIBS; ++ix )
880
+ {
881
+ const Attribute* dict = &attribute_defs[ ix ];
882
+ assert( (uint) dict->id == ix );
883
+ }
884
+ }
885
+ #endif
886
+ }
887
+
888
+ /* free all declared attributes */
889
+ static void FreeDeclaredAttributes( TidyDocImpl* doc )
890
+ {
891
+ TidyAttribImpl* attribs = &doc->attribs;
892
+ Attribute* dict;
893
+ while ( NULL != (dict = attribs->declared_attr_list) )
894
+ {
895
+ attribs->declared_attr_list = dict->next;
896
+ #if ATTRIBUTE_HASH_LOOKUP
897
+ attrsRemoveFromHash( doc, &doc->attribs, dict->name );
898
+ #endif
899
+ TidyDocFree( doc, dict->name );
900
+ TidyDocFree( doc, dict );
901
+ }
902
+ }
903
+
904
+ void TY_(FreeAttrTable)( TidyDocImpl* doc )
905
+ {
906
+ #if ATTRIBUTE_HASH_LOOKUP
907
+ attrsEmptyHash( doc, &doc->attribs );
908
+ #endif
909
+ TY_(FreeAnchors)( doc );
910
+ FreeDeclaredAttributes( doc );
911
+ }
912
+
913
+ void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname )
914
+ {
915
+ uint len = TY_(tmbstrlen)(classattr->value) +
916
+ TY_(tmbstrlen)(classname) + 2;
917
+ tmbstr s = (tmbstr) TidyDocAlloc( doc, len );
918
+ s[0] = '\0';
919
+ if (classattr->value)
920
+ {
921
+ TY_(tmbstrcpy)( s, classattr->value );
922
+ TY_(tmbstrcat)( s, " " );
923
+ }
924
+ TY_(tmbstrcat)( s, classname );
925
+ if (classattr->value)
926
+ TidyDocFree( doc, classattr->value );
927
+ classattr->value = s;
928
+ }
929
+
930
+ /* concatenate styles */
931
+ static void AppendToStyleAttr( TidyDocImpl* doc, AttVal *styleattr, ctmbstr styleprop )
932
+ {
933
+ /*
934
+ this doesn't handle CSS comments and
935
+ leading/trailing white-space very well
936
+ see http://www.w3.org/TR/css-style-attr
937
+ */
938
+ uint end = TY_(tmbstrlen)(styleattr->value);
939
+
940
+ if (end >0 && styleattr->value[end - 1] == ';')
941
+ {
942
+ /* attribute ends with declaration seperator */
943
+
944
+ styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value,
945
+ end + TY_(tmbstrlen)(styleprop) + 2);
946
+
947
+ TY_(tmbstrcat)(styleattr->value, " ");
948
+ TY_(tmbstrcat)(styleattr->value, styleprop);
949
+ }
950
+ else if (end >0 && styleattr->value[end - 1] == '}')
951
+ {
952
+ /* attribute ends with rule set */
953
+
954
+ styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value,
955
+ end + TY_(tmbstrlen)(styleprop) + 6);
956
+
957
+ TY_(tmbstrcat)(styleattr->value, " { ");
958
+ TY_(tmbstrcat)(styleattr->value, styleprop);
959
+ TY_(tmbstrcat)(styleattr->value, " }");
960
+ }
961
+ else
962
+ {
963
+ /* attribute ends with property value */
964
+
965
+ styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value,
966
+ end + TY_(tmbstrlen)(styleprop) + 3);
967
+
968
+ if (end > 0)
969
+ TY_(tmbstrcat)(styleattr->value, "; ");
970
+ TY_(tmbstrcat)(styleattr->value, styleprop);
971
+ }
972
+ }
973
+
974
+ /*
975
+ the same attribute name can't be used
976
+ more than once in each element
977
+ */
978
+ static Bool AttrsHaveSameName( AttVal* av1, AttVal* av2)
979
+ {
980
+ TidyAttrId id1, id2;
981
+
982
+ id1 = AttrId(av1);
983
+ id2 = AttrId(av2);
984
+ if (id1 != TidyAttr_UNKNOWN && id2 != TidyAttr_UNKNOWN)
985
+ return AttrsHaveSameId(av1, av2);
986
+ if (id1 != TidyAttr_UNKNOWN || id2 != TidyAttr_UNKNOWN)
987
+ return no;
988
+ if (av1->attribute && av2->attribute)
989
+ return TY_(tmbstrcmp)(av1->attribute, av2->attribute) == 0;
990
+ return no;
991
+ }
992
+
993
+ void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node *node, Bool isXml )
994
+ {
995
+ AttVal *first;
996
+
997
+ for (first = node->attributes; first != NULL;)
998
+ {
999
+ AttVal *second;
1000
+ Bool firstRedefined = no;
1001
+
1002
+ if (!(first->asp == NULL && first->php == NULL))
1003
+ {
1004
+ first = first->next;
1005
+ continue;
1006
+ }
1007
+
1008
+ for (second = first->next; second != NULL;)
1009
+ {
1010
+ AttVal *temp;
1011
+
1012
+ if (!(second->asp == NULL && second->php == NULL
1013
+ && AttrsHaveSameName(first, second)))
1014
+ {
1015
+ second = second->next;
1016
+ continue;
1017
+ }
1018
+
1019
+ /* first and second attribute have same local name */
1020
+ /* now determine what to do with this duplicate... */
1021
+
1022
+ if (!isXml
1023
+ && attrIsCLASS(first) && cfgBool(doc, TidyJoinClasses)
1024
+ && AttrHasValue(first) && AttrHasValue(second))
1025
+ {
1026
+ /* concatenate classes */
1027
+
1028
+ TY_(AppendToClassAttr)(doc, first, second->value);
1029
+
1030
+ temp = second->next;
1031
+ TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE);
1032
+ TY_(RemoveAttribute)( doc, node, second );
1033
+ second = temp;
1034
+ }
1035
+ else if (!isXml
1036
+ && attrIsSTYLE(first) && cfgBool(doc, TidyJoinStyles)
1037
+ && AttrHasValue(first) && AttrHasValue(second))
1038
+ {
1039
+ AppendToStyleAttr( doc, first, second->value );
1040
+
1041
+ temp = second->next;
1042
+ TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE);
1043
+ TY_(RemoveAttribute)( doc, node, second );
1044
+ second = temp;
1045
+ }
1046
+ else if ( cfg(doc, TidyDuplicateAttrs) == TidyKeepLast )
1047
+ {
1048
+ temp = first->next;
1049
+ TY_(ReportAttrError)( doc, node, first, REPEATED_ATTRIBUTE);
1050
+ TY_(RemoveAttribute)( doc, node, first );
1051
+ firstRedefined = yes;
1052
+ first = temp;
1053
+ second = second->next;
1054
+ }
1055
+ else /* TidyDuplicateAttrs == TidyKeepFirst */
1056
+ {
1057
+ temp = second->next;
1058
+ TY_(ReportAttrError)( doc, node, second, REPEATED_ATTRIBUTE);
1059
+ TY_(RemoveAttribute)( doc, node, second );
1060
+ second = temp;
1061
+ }
1062
+ }
1063
+ if (!firstRedefined)
1064
+ first = first->next;
1065
+ }
1066
+ }
1067
+
1068
+ /* ignore unknown attributes for proprietary elements */
1069
+ const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval )
1070
+ {
1071
+ const Attribute* attribute = attval->dict;
1072
+
1073
+ if ( attribute != NULL )
1074
+ {
1075
+ if (attribute->versions & VERS_XML)
1076
+ {
1077
+ doc->lexer->isvoyager = yes;
1078
+ if (!cfgBool(doc, TidyHtmlOut))
1079
+ {
1080
+ TY_(SetOptionBool)(doc, TidyXhtmlOut, yes);
1081
+ TY_(SetOptionBool)(doc, TidyXmlOut, yes);
1082
+ }
1083
+ }
1084
+
1085
+ TY_(ConstrainVersion)(doc, AttributeVersions(node, attval));
1086
+
1087
+ if (attribute->attrchk)
1088
+ attribute->attrchk( doc, node, attval );
1089
+ }
1090
+
1091
+ if (AttributeIsProprietary(node, attval))
1092
+ {
1093
+ TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
1094
+
1095
+ if (cfgBool(doc, TidyDropPropAttrs))
1096
+ TY_(RemoveAttribute)( doc, node, attval );
1097
+ }
1098
+
1099
+ return attribute;
1100
+ }
1101
+
1102
+ Bool TY_(IsBoolAttribute)(AttVal *attval)
1103
+ {
1104
+ const Attribute *attribute = ( attval ? attval->dict : NULL );
1105
+ if ( attribute && attribute->attrchk == CH_BOOL )
1106
+ return yes;
1107
+ return no;
1108
+ }
1109
+
1110
+ Bool TY_(attrIsEvent)( AttVal* attval )
1111
+ {
1112
+ TidyAttrId atid = AttrId( attval );
1113
+
1114
+ return (atid == TidyAttr_OnAFTERUPDATE ||
1115
+ atid == TidyAttr_OnBEFOREUNLOAD ||
1116
+ atid == TidyAttr_OnBEFOREUPDATE ||
1117
+ atid == TidyAttr_OnBLUR ||
1118
+ atid == TidyAttr_OnCHANGE ||
1119
+ atid == TidyAttr_OnCLICK ||
1120
+ atid == TidyAttr_OnDATAAVAILABLE ||
1121
+ atid == TidyAttr_OnDATASETCHANGED ||
1122
+ atid == TidyAttr_OnDATASETCOMPLETE ||
1123
+ atid == TidyAttr_OnDBLCLICK ||
1124
+ atid == TidyAttr_OnERRORUPDATE ||
1125
+ atid == TidyAttr_OnFOCUS ||
1126
+ atid == TidyAttr_OnKEYDOWN ||
1127
+ atid == TidyAttr_OnKEYPRESS ||
1128
+ atid == TidyAttr_OnKEYUP ||
1129
+ atid == TidyAttr_OnLOAD ||
1130
+ atid == TidyAttr_OnMOUSEDOWN ||
1131
+ atid == TidyAttr_OnMOUSEMOVE ||
1132
+ atid == TidyAttr_OnMOUSEOUT ||
1133
+ atid == TidyAttr_OnMOUSEOVER ||
1134
+ atid == TidyAttr_OnMOUSEUP ||
1135
+ atid == TidyAttr_OnRESET ||
1136
+ atid == TidyAttr_OnROWENTER ||
1137
+ atid == TidyAttr_OnROWEXIT ||
1138
+ atid == TidyAttr_OnSELECT ||
1139
+ atid == TidyAttr_OnSUBMIT ||
1140
+ atid == TidyAttr_OnUNLOAD);
1141
+ }
1142
+
1143
+ static void CheckLowerCaseAttrValue( TidyDocImpl* doc, Node *node, AttVal *attval)
1144
+ {
1145
+ tmbstr p;
1146
+ Bool hasUpper = no;
1147
+
1148
+ if (!AttrHasValue(attval))
1149
+ return;
1150
+
1151
+ p = attval->value;
1152
+
1153
+ while (*p)
1154
+ {
1155
+ if (TY_(IsUpper)(*p)) /* #501230 - fix by Terry Teague - 09 Jan 02 */
1156
+ {
1157
+ hasUpper = yes;
1158
+ break;
1159
+ }
1160
+ p++;
1161
+ }
1162
+
1163
+ if (hasUpper)
1164
+ {
1165
+ Lexer* lexer = doc->lexer;
1166
+ if (lexer->isvoyager)
1167
+ TY_(ReportAttrError)( doc, node, attval, ATTR_VALUE_NOT_LCASE);
1168
+
1169
+ if ( lexer->isvoyager || cfgBool(doc, TidyLowerLiterals) )
1170
+ attval->value = TY_(tmbstrtolower)(attval->value);
1171
+ }
1172
+ }
1173
+
1174
+ /* methods for checking value of a specific attribute */
1175
+
1176
+ void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval)
1177
+ {
1178
+ tmbchar c;
1179
+ tmbstr dest, p;
1180
+ uint escape_count = 0, backslash_count = 0;
1181
+ uint i, pos = 0;
1182
+ uint len;
1183
+ Bool isJavascript = no;
1184
+
1185
+ if (!AttrHasValue(attval))
1186
+ {
1187
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1188
+ return;
1189
+ }
1190
+
1191
+ p = attval->value;
1192
+
1193
+ isJavascript =
1194
+ TY_(tmbstrncmp)(p,"javascript:",sizeof("javascript:")-1)==0;
1195
+
1196
+ for (i = 0; '\0' != (c = p[i]); ++i)
1197
+ {
1198
+ if (c == '\\')
1199
+ {
1200
+ ++backslash_count;
1201
+ if ( cfgBool(doc, TidyFixBackslash) && !isJavascript)
1202
+ p[i] = '/';
1203
+ }
1204
+ else if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c)))
1205
+ ++escape_count;
1206
+ }
1207
+
1208
+ if ( cfgBool(doc, TidyFixUri) && escape_count )
1209
+ {
1210
+ len = TY_(tmbstrlen)(p) + escape_count * 2 + 1;
1211
+ dest = (tmbstr) TidyDocAlloc(doc, len);
1212
+
1213
+ for (i = 0; 0 != (c = p[i]); ++i)
1214
+ {
1215
+ if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c)))
1216
+ pos += sprintf( dest + pos, "%%%02X", (byte)c );
1217
+ else
1218
+ dest[pos++] = c;
1219
+ }
1220
+ dest[pos] = 0;
1221
+
1222
+ TidyDocFree(doc, attval->value);
1223
+ attval->value = dest;
1224
+ }
1225
+ if ( backslash_count )
1226
+ {
1227
+ if ( cfgBool(doc, TidyFixBackslash) && !isJavascript )
1228
+ TY_(ReportAttrError)( doc, node, attval, FIXED_BACKSLASH );
1229
+ else
1230
+ TY_(ReportAttrError)( doc, node, attval, BACKSLASH_IN_URI );
1231
+ }
1232
+ if ( escape_count )
1233
+ {
1234
+ if ( cfgBool(doc, TidyFixUri) )
1235
+ TY_(ReportAttrError)( doc, node, attval, ESCAPED_ILLEGAL_URI);
1236
+ else
1237
+ TY_(ReportAttrError)( doc, node, attval, ILLEGAL_URI_REFERENCE);
1238
+
1239
+ doc->badChars |= BC_INVALID_URI;
1240
+ }
1241
+ }
1242
+
1243
+ /* RFC 2396, section 4.2 states:
1244
+ "[...] in the case of HTML's FORM element, [...] an
1245
+ empty URI reference represents the base URI of the
1246
+ current document and should be replaced by that URI
1247
+ when transformed into a request."
1248
+ */
1249
+ void CheckAction( TidyDocImpl* doc, Node *node, AttVal *attval)
1250
+ {
1251
+ if (AttrHasValue(attval))
1252
+ TY_(CheckUrl)( doc, node, attval );
1253
+ }
1254
+
1255
+ void CheckScript( TidyDocImpl* ARG_UNUSED(doc), Node* ARG_UNUSED(node),
1256
+ AttVal* ARG_UNUSED(attval))
1257
+ {
1258
+ }
1259
+
1260
+ Bool TY_(IsValidHTMLID)(ctmbstr id)
1261
+ {
1262
+ ctmbstr s = id;
1263
+
1264
+ if (!s)
1265
+ return no;
1266
+
1267
+ if (!TY_(IsLetter)(*s++))
1268
+ return no;
1269
+
1270
+ while (*s)
1271
+ if (!TY_(IsNamechar)(*s++))
1272
+ return no;
1273
+
1274
+ return yes;
1275
+
1276
+ }
1277
+
1278
+ Bool TY_(IsValidXMLID)(ctmbstr id)
1279
+ {
1280
+ ctmbstr s = id;
1281
+ tchar c;
1282
+
1283
+ if (!s)
1284
+ return no;
1285
+
1286
+ c = *s++;
1287
+ if (c > 0x7F)
1288
+ s += TY_(GetUTF8)(s, &c);
1289
+
1290
+ if (!(TY_(IsXMLLetter)(c) || c == '_' || c == ':'))
1291
+ return no;
1292
+
1293
+ while (*s)
1294
+ {
1295
+ c = (unsigned char)*s;
1296
+
1297
+ if (c > 0x7F)
1298
+ s += TY_(GetUTF8)(s, &c);
1299
+
1300
+ ++s;
1301
+
1302
+ if (!TY_(IsXMLNamechar)(c))
1303
+ return no;
1304
+ }
1305
+
1306
+ return yes;
1307
+ }
1308
+
1309
+ static Bool IsValidNMTOKEN(ctmbstr name)
1310
+ {
1311
+ ctmbstr s = name;
1312
+ tchar c;
1313
+
1314
+ if (!s)
1315
+ return no;
1316
+
1317
+ while (*s)
1318
+ {
1319
+ c = (unsigned char)*s;
1320
+
1321
+ if (c > 0x7F)
1322
+ s += TY_(GetUTF8)(s, &c);
1323
+
1324
+ ++s;
1325
+
1326
+ if (!TY_(IsXMLNamechar)(c))
1327
+ return no;
1328
+ }
1329
+
1330
+ return yes;
1331
+ }
1332
+
1333
+ static Bool AttrValueIsAmong(AttVal *attval, ctmbstr const list[])
1334
+ {
1335
+ const ctmbstr *v;
1336
+ for (v = list; *v; ++v)
1337
+ if (AttrValueIs(attval, *v))
1338
+ return yes;
1339
+ return no;
1340
+ }
1341
+
1342
+ static void CheckAttrValidity( TidyDocImpl* doc, Node *node, AttVal *attval,
1343
+ ctmbstr const list[])
1344
+ {
1345
+ if (!AttrHasValue(attval))
1346
+ {
1347
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1348
+ return;
1349
+ }
1350
+
1351
+ CheckLowerCaseAttrValue( doc, node, attval );
1352
+
1353
+ if (!AttrValueIsAmong(attval, list))
1354
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1355
+ }
1356
+
1357
+ void CheckName( TidyDocImpl* doc, Node *node, AttVal *attval)
1358
+ {
1359
+ Node *old;
1360
+
1361
+ if (!AttrHasValue(attval))
1362
+ {
1363
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1364
+ return;
1365
+ }
1366
+
1367
+ if ( TY_(IsAnchorElement)(doc, node) )
1368
+ {
1369
+ if (cfgBool(doc, TidyXmlOut) && !IsValidNMTOKEN(attval->value))
1370
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1371
+
1372
+ if ((old = GetNodeByAnchor(doc, attval->value)) && old != node)
1373
+ {
1374
+ TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE);
1375
+ }
1376
+ else
1377
+ AddAnchor( doc, attval->value, node );
1378
+ }
1379
+ }
1380
+
1381
+ void CheckId( TidyDocImpl* doc, Node *node, AttVal *attval )
1382
+ {
1383
+ Lexer* lexer = doc->lexer;
1384
+ Node *old;
1385
+
1386
+ if (!AttrHasValue(attval))
1387
+ {
1388
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1389
+ return;
1390
+ }
1391
+
1392
+ if (!TY_(IsValidHTMLID)(attval->value))
1393
+ {
1394
+ if (lexer->isvoyager && TY_(IsValidXMLID)(attval->value))
1395
+ TY_(ReportAttrError)( doc, node, attval, XML_ID_SYNTAX);
1396
+ else
1397
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1398
+ }
1399
+
1400
+ if ((old = GetNodeByAnchor(doc, attval->value)) && old != node)
1401
+ {
1402
+ TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE);
1403
+ }
1404
+ else
1405
+ AddAnchor( doc, attval->value, node );
1406
+ }
1407
+
1408
+ void CheckBool( TidyDocImpl* doc, Node *node, AttVal *attval)
1409
+ {
1410
+ if (!AttrHasValue(attval))
1411
+ return;
1412
+
1413
+ CheckLowerCaseAttrValue( doc, node, attval );
1414
+ }
1415
+
1416
+ void CheckAlign( TidyDocImpl* doc, Node *node, AttVal *attval)
1417
+ {
1418
+ ctmbstr const values[] = {"left", "right", "center", "justify", NULL};
1419
+
1420
+ /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
1421
+ if (node->tag && (node->tag->model & CM_IMG))
1422
+ {
1423
+ CheckValign( doc, node, attval );
1424
+ return;
1425
+ }
1426
+
1427
+ if (!AttrHasValue(attval))
1428
+ {
1429
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1430
+ return;
1431
+ }
1432
+
1433
+ CheckLowerCaseAttrValue( doc, node, attval);
1434
+
1435
+ /* currently CheckCaption(...) takes care of the remaining cases */
1436
+ if (nodeIsCAPTION(node))
1437
+ return;
1438
+
1439
+ if (!AttrValueIsAmong(attval, values))
1440
+ {
1441
+ /* align="char" is allowed for elements with CM_TABLE|CM_ROW
1442
+ except CAPTION which is excluded above, */
1443
+ if( !(AttrValueIs(attval, "char")
1444
+ && TY_(nodeHasCM)(node, CM_TABLE|CM_ROW)) )
1445
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1446
+ }
1447
+ }
1448
+
1449
+ void CheckValign( TidyDocImpl* doc, Node *node, AttVal *attval)
1450
+ {
1451
+ ctmbstr const values[] = {"top", "middle", "bottom", "baseline", NULL};
1452
+ ctmbstr const values2[] = {"left", "right", NULL};
1453
+ ctmbstr const valuesp[] = {"texttop", "absmiddle", "absbottom",
1454
+ "textbottom", NULL};
1455
+
1456
+ if (!AttrHasValue(attval))
1457
+ {
1458
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1459
+ return;
1460
+ }
1461
+
1462
+ CheckLowerCaseAttrValue( doc, node, attval );
1463
+
1464
+ if (AttrValueIsAmong(attval, values))
1465
+ {
1466
+ /* all is fine */
1467
+ }
1468
+ else if (AttrValueIsAmong(attval, values2))
1469
+ {
1470
+ if (!(node->tag && (node->tag->model & CM_IMG)))
1471
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1472
+ }
1473
+ else if (AttrValueIsAmong(attval, valuesp))
1474
+ {
1475
+ TY_(ConstrainVersion)( doc, VERS_PROPRIETARY );
1476
+ TY_(ReportAttrError)( doc, node, attval, PROPRIETARY_ATTR_VALUE);
1477
+ }
1478
+ else
1479
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1480
+ }
1481
+
1482
+ void CheckLength( TidyDocImpl* doc, Node *node, AttVal *attval)
1483
+ {
1484
+ tmbstr p;
1485
+
1486
+ if (!AttrHasValue(attval))
1487
+ {
1488
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1489
+ return;
1490
+ }
1491
+
1492
+ /* don't check for <col width=...> and <colgroup width=...> */
1493
+ if (attrIsWIDTH(attval) && (nodeIsCOL(node) || nodeIsCOLGROUP(node)))
1494
+ return;
1495
+
1496
+ p = attval->value;
1497
+
1498
+ if (!TY_(IsDigit)(*p++))
1499
+ {
1500
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1501
+ }
1502
+ else
1503
+ {
1504
+ while (*p)
1505
+ {
1506
+ if (!TY_(IsDigit)(*p) && *p != '%')
1507
+ {
1508
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1509
+ break;
1510
+ }
1511
+ ++p;
1512
+ }
1513
+ }
1514
+ }
1515
+
1516
+ void CheckTarget( TidyDocImpl* doc, Node *node, AttVal *attval)
1517
+ {
1518
+ ctmbstr const values[] = {"_blank", "_self", "_parent", "_top", NULL};
1519
+
1520
+ if (!AttrHasValue(attval))
1521
+ {
1522
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1523
+ return;
1524
+ }
1525
+
1526
+ /* target names must begin with A-Za-z ... */
1527
+ if (TY_(IsLetter)(attval->value[0]))
1528
+ return;
1529
+
1530
+ /* or be one of the allowed list */
1531
+ if (!AttrValueIsAmong(attval, values))
1532
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1533
+ }
1534
+
1535
+ void CheckFsubmit( TidyDocImpl* doc, Node *node, AttVal *attval)
1536
+ {
1537
+ ctmbstr const values[] = {"get", "post", NULL};
1538
+ CheckAttrValidity( doc, node, attval, values );
1539
+ }
1540
+
1541
+ void CheckClear( TidyDocImpl* doc, Node *node, AttVal *attval)
1542
+ {
1543
+ ctmbstr const values[] = {"none", "left", "right", "all", NULL};
1544
+
1545
+ if (!AttrHasValue(attval))
1546
+ {
1547
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1548
+ if (attval->value == NULL)
1549
+ attval->value = TY_(tmbstrdup)( doc->allocator, "none" );
1550
+ return;
1551
+ }
1552
+
1553
+ CheckLowerCaseAttrValue( doc, node, attval );
1554
+
1555
+ if (!AttrValueIsAmong(attval, values))
1556
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1557
+ }
1558
+
1559
+ void CheckShape( TidyDocImpl* doc, Node *node, AttVal *attval)
1560
+ {
1561
+ ctmbstr const values[] = {"rect", "default", "circle", "poly", NULL};
1562
+ CheckAttrValidity( doc, node, attval, values );
1563
+ }
1564
+
1565
+ void CheckScope( TidyDocImpl* doc, Node *node, AttVal *attval)
1566
+ {
1567
+ ctmbstr const values[] = {"row", "rowgroup", "col", "colgroup", NULL};
1568
+ CheckAttrValidity( doc, node, attval, values );
1569
+ }
1570
+
1571
+ void CheckNumber( TidyDocImpl* doc, Node *node, AttVal *attval)
1572
+ {
1573
+ tmbstr p;
1574
+
1575
+ if (!AttrHasValue(attval))
1576
+ {
1577
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1578
+ return;
1579
+ }
1580
+
1581
+ /* don't check <frameset cols=... rows=...> */
1582
+ if ( nodeIsFRAMESET(node) &&
1583
+ (attrIsCOLS(attval) || attrIsROWS(attval)))
1584
+ return;
1585
+
1586
+ p = attval->value;
1587
+
1588
+ /* font size may be preceded by + or - */
1589
+ if ( nodeIsFONT(node) && (*p == '+' || *p == '-') )
1590
+ ++p;
1591
+
1592
+ while (*p)
1593
+ {
1594
+ if (!TY_(IsDigit)(*p))
1595
+ {
1596
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1597
+ break;
1598
+ }
1599
+ ++p;
1600
+ }
1601
+ }
1602
+
1603
+ /* check hexadecimal color value */
1604
+ static Bool IsValidColorCode(ctmbstr color)
1605
+ {
1606
+ uint i;
1607
+
1608
+ if (TY_(tmbstrlen)(color) != 6)
1609
+ return no;
1610
+
1611
+ /* check if valid hex digits and letters */
1612
+ for (i = 0; i < 6; i++)
1613
+ if (!TY_(IsDigit)(color[i]) && !strchr("abcdef", TY_(ToLower)(color[i])))
1614
+ return no;
1615
+
1616
+ return yes;
1617
+ }
1618
+
1619
+ /* check color syntax and beautify value by option */
1620
+ void CheckColor( TidyDocImpl* doc, Node *node, AttVal *attval)
1621
+ {
1622
+ Bool valid = no;
1623
+ tmbstr given;
1624
+
1625
+ if (!AttrHasValue(attval))
1626
+ {
1627
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1628
+ return;
1629
+ }
1630
+
1631
+ given = attval->value;
1632
+
1633
+ /* 727851 - add hash to hash-less color values */
1634
+ if (given[0] != '#' && (valid = IsValidColorCode(given)))
1635
+ {
1636
+ tmbstr cp, s;
1637
+
1638
+ cp = s = (tmbstr) TidyDocAlloc(doc, 2 + TY_(tmbstrlen)(given));
1639
+ *cp++ = '#';
1640
+ while ('\0' != (*cp++ = *given++))
1641
+ continue;
1642
+
1643
+ TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE_REPLACED);
1644
+
1645
+ TidyDocFree(doc, attval->value);
1646
+ given = attval->value = s;
1647
+ }
1648
+
1649
+ if (!valid && given[0] == '#')
1650
+ valid = IsValidColorCode(given + 1);
1651
+
1652
+ if (valid && given[0] == '#' && cfgBool(doc, TidyReplaceColor))
1653
+ {
1654
+ ctmbstr newName = GetColorName(given);
1655
+
1656
+ if (newName)
1657
+ {
1658
+ TidyDocFree(doc, attval->value);
1659
+ given = attval->value = TY_(tmbstrdup)(doc->allocator, newName);
1660
+ }
1661
+ }
1662
+
1663
+ /* if it is not a valid color code, it is a color name */
1664
+ if (!valid)
1665
+ valid = GetColorCode(given) != NULL;
1666
+
1667
+ if (valid && given[0] == '#')
1668
+ attval->value = TY_(tmbstrtoupper)(attval->value);
1669
+ else if (valid)
1670
+ attval->value = TY_(tmbstrtolower)(attval->value);
1671
+
1672
+ if (!valid)
1673
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1674
+ }
1675
+
1676
+ /* check valuetype attribute for element param */
1677
+ void CheckVType( TidyDocImpl* doc, Node *node, AttVal *attval)
1678
+ {
1679
+ ctmbstr const values[] = {"data", "object", "ref", NULL};
1680
+ CheckAttrValidity( doc, node, attval, values );
1681
+ }
1682
+
1683
+ /* checks scrolling attribute */
1684
+ void CheckScroll( TidyDocImpl* doc, Node *node, AttVal *attval)
1685
+ {
1686
+ ctmbstr const values[] = {"no", "auto", "yes", NULL};
1687
+ CheckAttrValidity( doc, node, attval, values );
1688
+ }
1689
+
1690
+ /* checks dir attribute */
1691
+ void CheckTextDir( TidyDocImpl* doc, Node *node, AttVal *attval)
1692
+ {
1693
+ ctmbstr const values[] = {"rtl", "ltr", NULL};
1694
+ CheckAttrValidity( doc, node, attval, values );
1695
+ }
1696
+
1697
+ /* checks lang and xml:lang attributes */
1698
+ void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval)
1699
+ {
1700
+ /* empty xml:lang is allowed through XML 1.0 SE errata */
1701
+ if (!AttrHasValue(attval) && !attrIsXML_LANG(attval))
1702
+ {
1703
+ if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
1704
+ {
1705
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE );
1706
+ }
1707
+ return;
1708
+ }
1709
+ }
1710
+
1711
+ /* checks type attribute */
1712
+ void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval)
1713
+ {
1714
+ ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio",
1715
+ "submit", "reset", "file", "hidden",
1716
+ "image", "button", NULL};
1717
+ ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL};
1718
+ ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL};
1719
+ ctmbstr const valuesOL[] = {"1", "a", "i", NULL};
1720
+
1721
+ if (nodeIsINPUT(node))
1722
+ CheckAttrValidity( doc, node, attval, valuesINPUT );
1723
+ else if (nodeIsBUTTON(node))
1724
+ CheckAttrValidity( doc, node, attval, valuesBUTTON );
1725
+ else if (nodeIsUL(node))
1726
+ CheckAttrValidity( doc, node, attval, valuesUL );
1727
+ else if (nodeIsOL(node))
1728
+ {
1729
+ if (!AttrHasValue(attval))
1730
+ {
1731
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1732
+ return;
1733
+ }
1734
+ if (!AttrValueIsAmong(attval, valuesOL))
1735
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1736
+ }
1737
+ else if (nodeIsLI(node))
1738
+ {
1739
+ if (!AttrHasValue(attval))
1740
+ {
1741
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1742
+ return;
1743
+ }
1744
+ if (AttrValueIsAmong(attval, valuesUL))
1745
+ CheckLowerCaseAttrValue( doc, node, attval );
1746
+ else if (!AttrValueIsAmong(attval, valuesOL))
1747
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1748
+ }
1749
+ return;
1750
+ }
1751
+
1752
+ static
1753
+ AttVal *SortAttVal( AttVal* list, TidyAttrSortStrategy strat );
1754
+
1755
+ void TY_(SortAttributes)(Node* node, TidyAttrSortStrategy strat)
1756
+ {
1757
+ while (node)
1758
+ {
1759
+ node->attributes = SortAttVal( node->attributes, strat );
1760
+ if (node->content)
1761
+ TY_(SortAttributes)(node->content, strat);
1762
+ node = node->next;
1763
+ }
1764
+ }
1765
+
1766
+ /**
1767
+ * Attribute sorting contributed by Adrian Wilkins, 2007
1768
+ *
1769
+ * Portions copyright Simon Tatham 2001.
1770
+ *
1771
+ * Merge sort algortithm adpated from listsort.c linked from
1772
+ * http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html
1773
+ *
1774
+ * Original copyright notice proceeds below.
1775
+ *
1776
+ * Permission is hereby granted, free of charge, to any person
1777
+ * obtaining a copy of this software and associated documentation
1778
+ * files (the "Software"), to deal in the Software without
1779
+ * restriction, including without limitation the rights to use,
1780
+ * copy, modify, merge, publish, distribute, sublicense, and/or
1781
+ * sell copies of the Software, and to permit persons to whom the
1782
+ * Software is furnished to do so, subject to the following
1783
+ * conditions:
1784
+ *
1785
+ * The above copyright notice and this permission notice shall be
1786
+ * included in all copies or substantial portions of the Software.
1787
+ *
1788
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1789
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
1790
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1791
+ * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
1792
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
1793
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1794
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1795
+ * SOFTWARE.
1796
+ */
1797
+
1798
+ typedef int(*ptAttValComparator)(AttVal *one, AttVal *two);
1799
+
1800
+ /* Comparison function for TidySortAttrAlpha */
1801
+ static
1802
+ int AlphaComparator(AttVal *one, AttVal *two)
1803
+ {
1804
+ return TY_(tmbstrcmp)(one->attribute, two->attribute);
1805
+ }
1806
+
1807
+
1808
+ /* The "factory method" that returns a pointer to the comparator function */
1809
+ static
1810
+ ptAttValComparator GetAttValComparator(TidyAttrSortStrategy strat)
1811
+ {
1812
+ switch (strat)
1813
+ {
1814
+ case TidySortAttrAlpha:
1815
+ return AlphaComparator;
1816
+ case TidySortAttrNone:
1817
+ break;
1818
+ }
1819
+ return 0;
1820
+ }
1821
+
1822
+ /* The sort routine */
1823
+ static
1824
+ AttVal *SortAttVal( AttVal *list, TidyAttrSortStrategy strat)
1825
+ {
1826
+ ptAttValComparator ptComparator = GetAttValComparator(strat);
1827
+ AttVal *p, *q, *e, *tail;
1828
+ int insize, nmerges, psize, qsize, i;
1829
+
1830
+ /*
1831
+ * Silly special case: if `list' was passed in as NULL, return
1832
+ * NULL immediately.
1833
+ */
1834
+ if (!list)
1835
+ return NULL;
1836
+
1837
+ insize = 1;
1838
+
1839
+ while (1) {
1840
+ p = list;
1841
+ list = NULL;
1842
+ tail = NULL;
1843
+
1844
+ nmerges = 0; /* count number of merges we do in this pass */
1845
+
1846
+ while (p) {
1847
+ nmerges++; /* there exists a merge to be done */
1848
+ /* step `insize' places along from p */
1849
+ q = p;
1850
+ psize = 0;
1851
+ for (i = 0; i < insize; i++) {
1852
+ psize++;
1853
+ q = q->next;
1854
+ if(!q) break;
1855
+ }
1856
+
1857
+ /* if q hasn't fallen off end, we have two lists to merge */
1858
+ qsize = insize;
1859
+
1860
+ /* now we have two lists; merge them */
1861
+ while (psize > 0 || (qsize > 0 && q)) {
1862
+
1863
+ /* decide whether next element of merge comes from p or q */
1864
+ if (psize == 0) {
1865
+ /* p is empty; e must come from q. */
1866
+ e = q; q = q->next; qsize--;
1867
+ } else if (qsize == 0 || !q) {
1868
+ /* q is empty; e must come from p. */
1869
+ e = p; p = p->next; psize--;
1870
+ } else if (ptComparator(p,q) <= 0) {
1871
+ /* First element of p is lower (or same);
1872
+ * e must come from p. */
1873
+ e = p; p = p->next; psize--;
1874
+ } else {
1875
+ /* First element of q is lower; e must come from q. */
1876
+ e = q; q = q->next; qsize--;
1877
+ }
1878
+
1879
+ /* add the next element to the merged list */
1880
+ if (tail) {
1881
+ tail->next = e;
1882
+ } else {
1883
+ list = e;
1884
+ }
1885
+
1886
+ tail = e;
1887
+ }
1888
+
1889
+ /* now p has stepped `insize' places along, and q has too */
1890
+ p = q;
1891
+ }
1892
+
1893
+ tail->next = NULL;
1894
+
1895
+ /* If we have done only one merge, we're finished. */
1896
+ if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
1897
+ return list;
1898
+
1899
+ /* Otherwise repeat, merging lists twice the size */
1900
+ insize *= 2;
1901
+ }
1902
+ }
1903
+
1904
+ /*
1905
+ * local variables:
1906
+ * mode: c
1907
+ * indent-tabs-mode: nil
1908
+ * c-basic-offset: 4
1909
+ * eval: (c-set-offset 'substatement-open 0)
1910
+ * end:
1911
+ */