tidy-ext 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/.gitignore +4 -0
  2. data/LICENSE +50 -0
  3. data/README +12 -0
  4. data/Rakefile +60 -0
  5. data/VERSION +1 -0
  6. data/ext/tidy/access.c +3310 -0
  7. data/ext/tidy/access.h +279 -0
  8. data/ext/tidy/alloc.c +107 -0
  9. data/ext/tidy/attrask.c +209 -0
  10. data/ext/tidy/attrdict.c +2398 -0
  11. data/ext/tidy/attrdict.h +122 -0
  12. data/ext/tidy/attrget.c +213 -0
  13. data/ext/tidy/attrs.c +1911 -0
  14. data/ext/tidy/attrs.h +374 -0
  15. data/ext/tidy/buffio.c +232 -0
  16. data/ext/tidy/buffio.h +118 -0
  17. data/ext/tidy/charsets.c +1032 -0
  18. data/ext/tidy/charsets.h +14 -0
  19. data/ext/tidy/clean.c +2674 -0
  20. data/ext/tidy/clean.h +87 -0
  21. data/ext/tidy/config.c +1746 -0
  22. data/ext/tidy/config.h +153 -0
  23. data/ext/tidy/entities.c +419 -0
  24. data/ext/tidy/entities.h +24 -0
  25. data/ext/tidy/extconf.rb +5 -0
  26. data/ext/tidy/fileio.c +106 -0
  27. data/ext/tidy/fileio.h +46 -0
  28. data/ext/tidy/forward.h +69 -0
  29. data/ext/tidy/iconvtc.c +105 -0
  30. data/ext/tidy/iconvtc.h +15 -0
  31. data/ext/tidy/istack.c +373 -0
  32. data/ext/tidy/lexer.c +3825 -0
  33. data/ext/tidy/lexer.h +617 -0
  34. data/ext/tidy/localize.c +1882 -0
  35. data/ext/tidy/mappedio.c +329 -0
  36. data/ext/tidy/mappedio.h +16 -0
  37. data/ext/tidy/message.h +207 -0
  38. data/ext/tidy/parser.c +4408 -0
  39. data/ext/tidy/parser.h +76 -0
  40. data/ext/tidy/platform.h +636 -0
  41. data/ext/tidy/pprint.c +2276 -0
  42. data/ext/tidy/pprint.h +93 -0
  43. data/ext/tidy/ruby-tidy.c +195 -0
  44. data/ext/tidy/streamio.c +1407 -0
  45. data/ext/tidy/streamio.h +222 -0
  46. data/ext/tidy/tagask.c +286 -0
  47. data/ext/tidy/tags.c +955 -0
  48. data/ext/tidy/tags.h +235 -0
  49. data/ext/tidy/tidy-int.h +129 -0
  50. data/ext/tidy/tidy.h +1097 -0
  51. data/ext/tidy/tidyenum.h +622 -0
  52. data/ext/tidy/tidylib.c +1751 -0
  53. data/ext/tidy/tmbstr.c +306 -0
  54. data/ext/tidy/tmbstr.h +92 -0
  55. data/ext/tidy/utf8.c +539 -0
  56. data/ext/tidy/utf8.h +52 -0
  57. data/ext/tidy/version.h +14 -0
  58. data/ext/tidy/win32tc.c +795 -0
  59. data/ext/tidy/win32tc.h +19 -0
  60. data/spec/spec_helper.rb +5 -0
  61. data/spec/tidy/compat_spec.rb +44 -0
  62. data/spec/tidy/remote_uri_spec.rb +14 -0
  63. data/spec/tidy/test1.html +5 -0
  64. data/spec/tidy/tidy_spec.rb +34 -0
  65. metadata +125 -0
data/ext/tidy/attrs.c ADDED
@@ -0,0 +1,1911 @@
1
+ /* attrs.c -- recognize HTML attributes
2
+
3
+ (c) 1998-2009 (W3C) MIT, ERCIM, Keio University
4
+ See tidy.h for the copyright notice.
5
+
6
+ CVS Info :
7
+
8
+ $Author: arnaud02 $
9
+ $Date: 2009/03/26 13:05:22 $
10
+ $Revision: 1.132 $
11
+
12
+ */
13
+
14
+ #include "tidy-int.h"
15
+ #include "attrs.h"
16
+ #include "message.h"
17
+ #include "tmbstr.h"
18
+ #include "utf8.h"
19
+
20
+ /*
21
+ Bind attribute types to procedures to check values.
22
+ You can add new procedures for better validation
23
+ and each procedure has access to the node in which
24
+ the attribute occurred as well as the attribute name
25
+ and its value.
26
+
27
+ By default, attributes are checked without regard
28
+ to the element they are found on. You have the choice
29
+ of making the procedure test which element is involved
30
+ or in writing methods for each element which controls
31
+ exactly how the attributes of that element are checked.
32
+ This latter approach is best for detecting the absence
33
+ of required attributes.
34
+ */
35
+
36
+ static AttrCheck CheckAction;
37
+ static AttrCheck CheckScript;
38
+ static AttrCheck CheckName;
39
+ static AttrCheck CheckId;
40
+ static AttrCheck CheckAlign;
41
+ static AttrCheck CheckValign;
42
+ static AttrCheck CheckBool;
43
+ static AttrCheck CheckLength;
44
+ static AttrCheck CheckTarget;
45
+ static AttrCheck CheckFsubmit;
46
+ static AttrCheck CheckClear;
47
+ static AttrCheck CheckShape;
48
+ static AttrCheck CheckNumber;
49
+ static AttrCheck CheckScope;
50
+ static AttrCheck CheckColor;
51
+ static AttrCheck CheckVType;
52
+ static AttrCheck CheckScroll;
53
+ static AttrCheck CheckTextDir;
54
+ static AttrCheck CheckLang;
55
+ static AttrCheck CheckType;
56
+
57
+ #define CH_PCDATA NULL
58
+ #define CH_CHARSET NULL
59
+ #define CH_TYPE CheckType
60
+ #define CH_XTYPE NULL
61
+ #define CH_CHARACTER NULL
62
+ #define CH_URLS NULL
63
+ #define CH_URL TY_(CheckUrl)
64
+ #define CH_SCRIPT CheckScript
65
+ #define CH_ALIGN CheckAlign
66
+ #define CH_VALIGN CheckValign
67
+ #define CH_COLOR CheckColor
68
+ #define CH_CLEAR CheckClear
69
+ #define CH_BORDER CheckBool /* kludge */
70
+ #define CH_LANG CheckLang
71
+ #define CH_BOOL CheckBool
72
+ #define CH_COLS NULL
73
+ #define CH_NUMBER CheckNumber
74
+ #define CH_LENGTH CheckLength
75
+ #define CH_COORDS NULL
76
+ #define CH_DATE NULL
77
+ #define CH_TEXTDIR CheckTextDir
78
+ #define CH_IDREFS NULL
79
+ #define CH_IDREF NULL
80
+ #define CH_IDDEF CheckId
81
+ #define CH_NAME CheckName
82
+ #define CH_TFRAME NULL
83
+ #define CH_FBORDER NULL
84
+ #define CH_MEDIA NULL
85
+ #define CH_FSUBMIT CheckFsubmit
86
+ #define CH_LINKTYPES NULL
87
+ #define CH_TRULES NULL
88
+ #define CH_SCOPE CheckScope
89
+ #define CH_SHAPE CheckShape
90
+ #define CH_SCROLL CheckScroll
91
+ #define CH_TARGET CheckTarget
92
+ #define CH_VTYPE CheckVType
93
+ #define CH_ACTION CheckAction
94
+
95
+ static const Attribute attribute_defs [] =
96
+ {
97
+ { TidyAttr_UNKNOWN, "unknown!", VERS_PROPRIETARY, NULL },
98
+ { TidyAttr_ABBR, "abbr", VERS_HTML40, CH_PCDATA },
99
+ { TidyAttr_ACCEPT, "accept", VERS_ALL, CH_XTYPE },
100
+ { TidyAttr_ACCEPT_CHARSET, "accept-charset", VERS_HTML40, CH_CHARSET },
101
+ { TidyAttr_ACCESSKEY, "accesskey", VERS_HTML40, CH_CHARACTER },
102
+ { TidyAttr_ACTION, "action", VERS_ALL, CH_ACTION },
103
+ { TidyAttr_ADD_DATE, "add_date", VERS_NETSCAPE, CH_PCDATA }, /* A */
104
+ { TidyAttr_ALIGN, "align", VERS_ALL, CH_ALIGN }, /* varies by element */
105
+ { TidyAttr_ALINK, "alink", VERS_LOOSE, CH_COLOR },
106
+ { TidyAttr_ALT, "alt", VERS_ALL, CH_PCDATA }, /* nowrap */
107
+ { TidyAttr_ARCHIVE, "archive", VERS_HTML40, CH_URLS }, /* space or comma separated list */
108
+ { TidyAttr_AXIS, "axis", VERS_HTML40, CH_PCDATA },
109
+ { TidyAttr_BACKGROUND, "background", VERS_LOOSE, CH_URL },
110
+ { TidyAttr_BGCOLOR, "bgcolor", VERS_LOOSE, CH_COLOR },
111
+ { TidyAttr_BGPROPERTIES, "bgproperties", VERS_PROPRIETARY, CH_PCDATA }, /* BODY "fixed" fixes background */
112
+ { TidyAttr_BORDER, "border", VERS_ALL, CH_BORDER }, /* like LENGTH + "border" */
113
+ { TidyAttr_BORDERCOLOR, "bordercolor", VERS_MICROSOFT, CH_COLOR }, /* used on TABLE */
114
+ { TidyAttr_BOTTOMMARGIN, "bottommargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
115
+ { TidyAttr_CELLPADDING, "cellpadding", VERS_FROM32, CH_LENGTH }, /* % or pixel values */
116
+ { TidyAttr_CELLSPACING, "cellspacing", VERS_FROM32, CH_LENGTH },
117
+ { TidyAttr_CHAR, "char", VERS_HTML40, CH_CHARACTER },
118
+ { TidyAttr_CHAROFF, "charoff", VERS_HTML40, CH_LENGTH },
119
+ { TidyAttr_CHARSET, "charset", VERS_HTML40, CH_CHARSET },
120
+ { TidyAttr_CHECKED, "checked", VERS_ALL, CH_BOOL }, /* i.e. "checked" or absent */
121
+ { TidyAttr_CITE, "cite", VERS_HTML40, CH_URL },
122
+ { TidyAttr_CLASS, "class", VERS_HTML40, CH_PCDATA },
123
+ { TidyAttr_CLASSID, "classid", VERS_HTML40, CH_URL },
124
+ { TidyAttr_CLEAR, "clear", VERS_LOOSE, CH_CLEAR }, /* BR: left, right, all */
125
+ { TidyAttr_CODE, "code", VERS_LOOSE, CH_PCDATA }, /* APPLET */
126
+ { TidyAttr_CODEBASE, "codebase", VERS_HTML40, CH_URL }, /* OBJECT */
127
+ { TidyAttr_CODETYPE, "codetype", VERS_HTML40, CH_XTYPE }, /* OBJECT */
128
+ { TidyAttr_COLOR, "color", VERS_LOOSE, CH_COLOR }, /* BASEFONT, FONT */
129
+ { TidyAttr_COLS, "cols", VERS_IFRAME, CH_COLS }, /* TABLE & FRAMESET */
130
+ { TidyAttr_COLSPAN, "colspan", VERS_FROM32, CH_NUMBER },
131
+ { TidyAttr_COMPACT, "compact", VERS_ALL, CH_BOOL }, /* lists */
132
+ { TidyAttr_CONTENT, "content", VERS_ALL, CH_PCDATA },
133
+ { TidyAttr_COORDS, "coords", VERS_FROM32, CH_COORDS }, /* AREA, A */
134
+ { TidyAttr_DATA, "data", VERS_HTML40, CH_URL }, /* OBJECT */
135
+ { TidyAttr_DATAFLD, "datafld", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */
136
+ { TidyAttr_DATAFORMATAS, "dataformatas", VERS_MICROSOFT, CH_PCDATA }, /* used on DIV, IMG */
137
+ { TidyAttr_DATAPAGESIZE, "datapagesize", VERS_MICROSOFT, CH_NUMBER }, /* used on DIV, IMG */
138
+ { TidyAttr_DATASRC, "datasrc", VERS_MICROSOFT, CH_URL }, /* used on TABLE */
139
+ { TidyAttr_DATETIME, "datetime", VERS_HTML40, CH_DATE }, /* INS, DEL */
140
+ { TidyAttr_DECLARE, "declare", VERS_HTML40, CH_BOOL }, /* OBJECT */
141
+ { TidyAttr_DEFER, "defer", VERS_HTML40, CH_BOOL }, /* SCRIPT */
142
+ { TidyAttr_DIR, "dir", VERS_HTML40, CH_TEXTDIR }, /* ltr or rtl */
143
+ { TidyAttr_DISABLED, "disabled", VERS_HTML40, CH_BOOL }, /* form fields */
144
+ { TidyAttr_ENCODING, "encoding", VERS_XML, CH_PCDATA }, /* <?xml?> */
145
+ { TidyAttr_ENCTYPE, "enctype", VERS_ALL, CH_XTYPE }, /* FORM */
146
+ { TidyAttr_FACE, "face", VERS_LOOSE, CH_PCDATA }, /* BASEFONT, FONT */
147
+ { TidyAttr_FOR, "for", VERS_HTML40, CH_IDREF }, /* LABEL */
148
+ { TidyAttr_FRAME, "frame", VERS_HTML40, CH_TFRAME }, /* TABLE */
149
+ { TidyAttr_FRAMEBORDER, "frameborder", VERS_FRAMESET, CH_FBORDER }, /* 0 or 1 */
150
+ { TidyAttr_FRAMESPACING, "framespacing", VERS_PROPRIETARY, CH_NUMBER },
151
+ { TidyAttr_GRIDX, "gridx", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive*/
152
+ { TidyAttr_GRIDY, "gridy", VERS_PROPRIETARY, CH_NUMBER }, /* TABLE Adobe golive */
153
+ { TidyAttr_HEADERS, "headers", VERS_HTML40, CH_IDREFS }, /* table cells */
154
+ { TidyAttr_HEIGHT, "height", VERS_ALL, CH_LENGTH }, /* pixels only for TH/TD */
155
+ { TidyAttr_HREF, "href", VERS_ALL, CH_URL }, /* A, AREA, LINK and BASE */
156
+ { TidyAttr_HREFLANG, "hreflang", VERS_HTML40, CH_LANG }, /* A, LINK */
157
+ { TidyAttr_HSPACE, "hspace", VERS_ALL, CH_NUMBER }, /* APPLET, IMG, OBJECT */
158
+ { TidyAttr_HTTP_EQUIV, "http-equiv", VERS_ALL, CH_PCDATA }, /* META */
159
+ { TidyAttr_ID, "id", VERS_HTML40, CH_IDDEF },
160
+ { TidyAttr_ISMAP, "ismap", VERS_ALL, CH_BOOL }, /* IMG */
161
+ { TidyAttr_LABEL, "label", VERS_HTML40, CH_PCDATA }, /* OPT, OPTGROUP */
162
+ { TidyAttr_LANG, "lang", VERS_HTML40, CH_LANG },
163
+ { TidyAttr_LANGUAGE, "language", VERS_LOOSE, CH_PCDATA }, /* SCRIPT */
164
+ { TidyAttr_LAST_MODIFIED, "last_modified", VERS_NETSCAPE, CH_PCDATA }, /* A */
165
+ { TidyAttr_LAST_VISIT, "last_visit", VERS_NETSCAPE, CH_PCDATA }, /* A */
166
+ { TidyAttr_LEFTMARGIN, "leftmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
167
+ { TidyAttr_LINK, "link", VERS_LOOSE, CH_COLOR }, /* BODY */
168
+ { TidyAttr_LONGDESC, "longdesc", VERS_HTML40, CH_URL }, /* IMG */
169
+ { TidyAttr_LOWSRC, "lowsrc", VERS_PROPRIETARY, CH_URL }, /* IMG */
170
+ { TidyAttr_MARGINHEIGHT, "marginheight", VERS_IFRAME, CH_NUMBER }, /* FRAME, IFRAME, BODY */
171
+ { TidyAttr_MARGINWIDTH, "marginwidth", VERS_IFRAME, CH_NUMBER }, /* ditto */
172
+ { TidyAttr_MAXLENGTH, "maxlength", VERS_ALL, CH_NUMBER }, /* INPUT */
173
+ { TidyAttr_MEDIA, "media", VERS_HTML40, CH_MEDIA }, /* STYLE, LINK */
174
+ { TidyAttr_METHOD, "method", VERS_ALL, CH_FSUBMIT }, /* FORM: get or post */
175
+ { TidyAttr_MULTIPLE, "multiple", VERS_ALL, CH_BOOL }, /* SELECT */
176
+ { TidyAttr_NAME, "name", VERS_ALL, CH_NAME },
177
+ { TidyAttr_NOHREF, "nohref", VERS_FROM32, CH_BOOL }, /* AREA */
178
+ { TidyAttr_NORESIZE, "noresize", VERS_FRAMESET, CH_BOOL }, /* FRAME */
179
+ { TidyAttr_NOSHADE, "noshade", VERS_LOOSE, CH_BOOL }, /* HR */
180
+ { TidyAttr_NOWRAP, "nowrap", VERS_LOOSE, CH_BOOL }, /* table cells */
181
+ { TidyAttr_OBJECT, "object", VERS_HTML40_LOOSE, CH_PCDATA }, /* APPLET */
182
+ { TidyAttr_OnAFTERUPDATE, "onafterupdate", VERS_MICROSOFT, CH_SCRIPT },
183
+ { TidyAttr_OnBEFOREUNLOAD, "onbeforeunload", VERS_MICROSOFT, CH_SCRIPT },
184
+ { TidyAttr_OnBEFOREUPDATE, "onbeforeupdate", VERS_MICROSOFT, CH_SCRIPT },
185
+ { TidyAttr_OnBLUR, "onblur", VERS_EVENTS, CH_SCRIPT }, /* event */
186
+ { TidyAttr_OnCHANGE, "onchange", VERS_EVENTS, CH_SCRIPT }, /* event */
187
+ { TidyAttr_OnCLICK, "onclick", VERS_EVENTS, CH_SCRIPT }, /* event */
188
+ { TidyAttr_OnDATAAVAILABLE, "ondataavailable", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */
189
+ { TidyAttr_OnDATASETCHANGED, "ondatasetchanged", VERS_MICROSOFT, CH_SCRIPT }, /* object, applet */
190
+ { TidyAttr_OnDATASETCOMPLETE, "ondatasetcomplete", VERS_MICROSOFT, CH_SCRIPT },
191
+ { TidyAttr_OnDBLCLICK, "ondblclick", VERS_EVENTS, CH_SCRIPT }, /* event */
192
+ { TidyAttr_OnERRORUPDATE, "onerrorupdate", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */
193
+ { TidyAttr_OnFOCUS, "onfocus", VERS_EVENTS, CH_SCRIPT }, /* event */
194
+ { TidyAttr_OnKEYDOWN, "onkeydown", VERS_EVENTS, CH_SCRIPT }, /* event */
195
+ { TidyAttr_OnKEYPRESS, "onkeypress", VERS_EVENTS, CH_SCRIPT }, /* event */
196
+ { TidyAttr_OnKEYUP, "onkeyup", VERS_EVENTS, CH_SCRIPT }, /* event */
197
+ { TidyAttr_OnLOAD, "onload", VERS_EVENTS, CH_SCRIPT }, /* event */
198
+ { TidyAttr_OnMOUSEDOWN, "onmousedown", VERS_EVENTS, CH_SCRIPT }, /* event */
199
+ { TidyAttr_OnMOUSEMOVE, "onmousemove", VERS_EVENTS, CH_SCRIPT }, /* event */
200
+ { TidyAttr_OnMOUSEOUT, "onmouseout", VERS_EVENTS, CH_SCRIPT }, /* event */
201
+ { TidyAttr_OnMOUSEOVER, "onmouseover", VERS_EVENTS, CH_SCRIPT }, /* event */
202
+ { TidyAttr_OnMOUSEUP, "onmouseup", VERS_EVENTS, CH_SCRIPT }, /* event */
203
+ { TidyAttr_OnRESET, "onreset", VERS_EVENTS, CH_SCRIPT }, /* event */
204
+ { TidyAttr_OnROWENTER, "onrowenter", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */
205
+ { TidyAttr_OnROWEXIT, "onrowexit", VERS_MICROSOFT, CH_SCRIPT }, /* form fields */
206
+ { TidyAttr_OnSELECT, "onselect", VERS_EVENTS, CH_SCRIPT }, /* event */
207
+ { TidyAttr_OnSUBMIT, "onsubmit", VERS_EVENTS, CH_SCRIPT }, /* event */
208
+ { TidyAttr_OnUNLOAD, "onunload", VERS_EVENTS, CH_SCRIPT }, /* event */
209
+ { TidyAttr_PROFILE, "profile", VERS_HTML40, CH_URL }, /* HEAD */
210
+ { TidyAttr_PROMPT, "prompt", VERS_LOOSE, CH_PCDATA }, /* ISINDEX */
211
+ { TidyAttr_RBSPAN, "rbspan", VERS_XHTML11, CH_NUMBER }, /* ruby markup */
212
+ { TidyAttr_READONLY, "readonly", VERS_HTML40, CH_BOOL }, /* form fields */
213
+ { TidyAttr_REL, "rel", VERS_ALL, CH_LINKTYPES },
214
+ { TidyAttr_REV, "rev", VERS_ALL, CH_LINKTYPES },
215
+ { TidyAttr_RIGHTMARGIN, "rightmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
216
+ { TidyAttr_ROWS, "rows", VERS_ALL, CH_NUMBER }, /* TEXTAREA */
217
+ { TidyAttr_ROWSPAN, "rowspan", VERS_ALL, CH_NUMBER }, /* table cells */
218
+ { TidyAttr_RULES, "rules", VERS_HTML40, CH_TRULES }, /* TABLE */
219
+ { TidyAttr_SCHEME, "scheme", VERS_HTML40, CH_PCDATA }, /* META */
220
+ { TidyAttr_SCOPE, "scope", VERS_HTML40, CH_SCOPE }, /* table cells */
221
+ { TidyAttr_SCROLLING, "scrolling", VERS_IFRAME, CH_SCROLL }, /* yes, no or auto */
222
+ { TidyAttr_SELECTED, "selected", VERS_ALL, CH_BOOL }, /* OPTION */
223
+ { TidyAttr_SHAPE, "shape", VERS_FROM32, CH_SHAPE }, /* AREA, A */
224
+ { TidyAttr_SHOWGRID, "showgrid", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive */
225
+ { TidyAttr_SHOWGRIDX, "showgridx", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/
226
+ { TidyAttr_SHOWGRIDY, "showgridy", VERS_PROPRIETARY, CH_BOOL }, /* TABLE Adobe golive*/
227
+ { TidyAttr_SIZE, "size", VERS_LOOSE, CH_NUMBER }, /* HR, FONT, BASEFONT, SELECT */
228
+ { TidyAttr_SPAN, "span", VERS_HTML40, CH_NUMBER }, /* COL, COLGROUP */
229
+ { TidyAttr_SRC, "src", VERS_ALL, CH_URL }, /* IMG, FRAME, IFRAME */
230
+ { TidyAttr_STANDBY, "standby", VERS_HTML40, CH_PCDATA }, /* OBJECT */
231
+ { TidyAttr_START, "start", VERS_ALL, CH_NUMBER }, /* OL */
232
+ { TidyAttr_STYLE, "style", VERS_HTML40, CH_PCDATA },
233
+ { TidyAttr_SUMMARY, "summary", VERS_HTML40, CH_PCDATA }, /* TABLE */
234
+ { TidyAttr_TABINDEX, "tabindex", VERS_HTML40, CH_NUMBER }, /* fields, OBJECT and A */
235
+ { TidyAttr_TARGET, "target", VERS_HTML40, CH_TARGET }, /* names a frame/window */
236
+ { TidyAttr_TEXT, "text", VERS_LOOSE, CH_COLOR }, /* BODY */
237
+ { TidyAttr_TITLE, "title", VERS_HTML40, CH_PCDATA }, /* text tool tip */
238
+ { TidyAttr_TOPMARGIN, "topmargin", VERS_MICROSOFT, CH_NUMBER }, /* used on BODY */
239
+ { TidyAttr_TYPE, "type", VERS_FROM32, CH_TYPE }, /* also used by SPACER */
240
+ { TidyAttr_USEMAP, "usemap", VERS_ALL, CH_URL }, /* things with images */
241
+ { TidyAttr_VALIGN, "valign", VERS_FROM32, CH_VALIGN },
242
+ { TidyAttr_VALUE, "value", VERS_ALL, CH_PCDATA },
243
+ { TidyAttr_VALUETYPE, "valuetype", VERS_HTML40, CH_VTYPE }, /* PARAM: data, ref, object */
244
+ { TidyAttr_VERSION, "version", VERS_ALL|VERS_XML, CH_PCDATA }, /* HTML <?xml?> */
245
+ { TidyAttr_VLINK, "vlink", VERS_LOOSE, CH_COLOR }, /* BODY */
246
+ { TidyAttr_VSPACE, "vspace", VERS_LOOSE, CH_NUMBER }, /* IMG, OBJECT, APPLET */
247
+ { TidyAttr_WIDTH, "width", VERS_ALL, CH_LENGTH }, /* pixels only for TD/TH */
248
+ { TidyAttr_WRAP, "wrap", VERS_NETSCAPE, CH_PCDATA }, /* textarea */
249
+ { TidyAttr_XML_LANG, "xml:lang", VERS_XML, CH_LANG }, /* XML language */
250
+ { TidyAttr_XML_SPACE, "xml:space", VERS_XML, CH_PCDATA }, /* XML white space */
251
+
252
+ /* todo: VERS_ALL is wrong! */
253
+ { TidyAttr_XMLNS, "xmlns", VERS_ALL, CH_PCDATA }, /* name space */
254
+ { TidyAttr_EVENT, "event", VERS_HTML40, CH_PCDATA }, /* reserved for <script> */
255
+ { TidyAttr_METHODS, "methods", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */
256
+ { TidyAttr_N, "n", VERS_HTML20, CH_PCDATA }, /* for <nextid> */
257
+ { TidyAttr_SDAFORM, "sdaform", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
258
+ { TidyAttr_SDAPREF, "sdapref", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
259
+ { TidyAttr_SDASUFF, "sdasuff", VERS_HTML20, CH_PCDATA }, /* SDATA attribute in HTML 2.0 */
260
+ { TidyAttr_URN, "urn", VERS_HTML20, CH_PCDATA }, /* for <a>, never implemented */
261
+
262
+ /* this must be the final entry */
263
+ { N_TIDY_ATTRIBS, NULL, VERS_UNKNOWN, NULL }
264
+ };
265
+
266
+ static uint AttributeVersions(Node* node, AttVal* attval)
267
+ {
268
+ uint i;
269
+
270
+ if (!attval || !attval->dict)
271
+ return VERS_UNKNOWN;
272
+
273
+ if (!node || !node->tag || !node->tag->attrvers)
274
+ return attval->dict->versions;
275
+
276
+ for (i = 0; node->tag->attrvers[i].attribute; ++i)
277
+ if (node->tag->attrvers[i].attribute == attval->dict->id)
278
+ return node->tag->attrvers[i].versions;
279
+
280
+ return attval->dict->versions & VERS_ALL
281
+ ? VERS_UNKNOWN
282
+ : attval->dict->versions;
283
+
284
+ }
285
+
286
+
287
+ /* return the version of the attribute "id" of element "node" */
288
+ uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id )
289
+ {
290
+ uint i;
291
+
292
+ if (!node || !node->tag || !node->tag->attrvers)
293
+ return VERS_UNKNOWN;
294
+
295
+ for (i = 0; node->tag->attrvers[i].attribute; ++i)
296
+ if (node->tag->attrvers[i].attribute == id)
297
+ return node->tag->attrvers[i].versions;
298
+
299
+ return VERS_UNKNOWN;
300
+ }
301
+
302
+ /* returns true if the element is a W3C defined element */
303
+ /* but the element/attribute combination is not */
304
+ static Bool AttributeIsProprietary(Node* node, AttVal* attval)
305
+ {
306
+ if (!node || !attval)
307
+ return no;
308
+
309
+ if (!node->tag)
310
+ return no;
311
+
312
+ if (!(node->tag->versions & VERS_ALL))
313
+ return no;
314
+
315
+ if (AttributeVersions(node, attval) & VERS_ALL)
316
+ return no;
317
+
318
+ return yes;
319
+ }
320
+
321
+ /* used by CheckColor() */
322
+ struct _colors
323
+ {
324
+ ctmbstr name;
325
+ ctmbstr hex;
326
+ };
327
+
328
+ static const struct _colors colors[] =
329
+ {
330
+ { "black", "#000000" },
331
+ { "green", "#008000" },
332
+ { "silver", "#C0C0C0" },
333
+ { "lime", "#00FF00" },
334
+ { "gray", "#808080" },
335
+ { "olive", "#808000" },
336
+ { "white", "#FFFFFF" },
337
+ { "yellow", "#FFFF00" },
338
+ { "maroon", "#800000" },
339
+ { "navy", "#000080" },
340
+ { "red", "#FF0000" },
341
+ { "blue", "#0000FF" },
342
+ { "purple", "#800080" },
343
+ { "teal", "#008080" },
344
+ { "fuchsia", "#FF00FF" },
345
+ { "aqua", "#00FFFF" },
346
+ { NULL, NULL }
347
+ };
348
+
349
+ static ctmbstr GetColorCode(ctmbstr name)
350
+ {
351
+ uint i;
352
+
353
+ for (i = 0; colors[i].name; ++i)
354
+ if (TY_(tmbstrcasecmp)(name, colors[i].name) == 0)
355
+ return colors[i].hex;
356
+
357
+ return NULL;
358
+ }
359
+
360
+ static ctmbstr GetColorName(ctmbstr code)
361
+ {
362
+ uint i;
363
+
364
+ for (i = 0; colors[i].name; ++i)
365
+ if (TY_(tmbstrcasecmp)(code, colors[i].hex) == 0)
366
+ return colors[i].name;
367
+
368
+ return NULL;
369
+ }
370
+
371
+ #if 0
372
+ static const struct _colors fancy_colors[] =
373
+ {
374
+ { "darkgreen", "#006400" },
375
+ { "antiquewhite", "#FAEBD7" },
376
+ { "aqua", "#00FFFF" },
377
+ { "aquamarine", "#7FFFD4" },
378
+ { "azure", "#F0FFFF" },
379
+ { "beige", "#F5F5DC" },
380
+ { "bisque", "#FFE4C4" },
381
+ { "black", "#000000" },
382
+ { "blanchedalmond", "#FFEBCD" },
383
+ { "blue", "#0000FF" },
384
+ { "blueviolet", "#8A2BE2" },
385
+ { "brown", "#A52A2A" },
386
+ { "burlywood", "#DEB887" },
387
+ { "cadetblue", "#5F9EA0" },
388
+ { "chartreuse", "#7FFF00" },
389
+ { "chocolate", "#D2691E" },
390
+ { "coral", "#FF7F50" },
391
+ { "cornflowerblue", "#6495ED" },
392
+ { "cornsilk", "#FFF8DC" },
393
+ { "crimson", "#DC143C" },
394
+ { "cyan", "#00FFFF" },
395
+ { "darkblue", "#00008B" },
396
+ { "darkcyan", "#008B8B" },
397
+ { "darkgoldenrod", "#B8860B" },
398
+ { "darkgray", "#A9A9A9" },
399
+ { "darkgreen", "#006400" },
400
+ { "darkkhaki", "#BDB76B" },
401
+ { "darkmagenta", "#8B008B" },
402
+ { "darkolivegreen", "#556B2F" },
403
+ { "darkorange", "#FF8C00" },
404
+ { "darkorchid", "#9932CC" },
405
+ { "darkred", "#8B0000" },
406
+ { "darksalmon", "#E9967A" },
407
+ { "darkseagreen", "#8FBC8F" },
408
+ { "darkslateblue", "#483D8B" },
409
+ { "darkslategray", "#2F4F4F" },
410
+ { "darkturquoise", "#00CED1" },
411
+ { "darkviolet", "#9400D3" },
412
+ { "deeppink", "#FF1493" },
413
+ { "deepskyblue", "#00BFFF" },
414
+ { "dimgray", "#696969" },
415
+ { "dodgerblue", "#1E90FF" },
416
+ { "firebrick", "#B22222" },
417
+ { "floralwhite", "#FFFAF0" },
418
+ { "forestgreen", "#228B22" },
419
+ { "fuchsia", "#FF00FF" },
420
+ { "gainsboro", "#DCDCDC" },
421
+ { "ghostwhite", "#F8F8FF" },
422
+ { "gold", "#FFD700" },
423
+ { "goldenrod", "#DAA520" },
424
+ { "gray", "#808080" },
425
+ { "green", "#008000" },
426
+ { "greenyellow", "#ADFF2F" },
427
+ { "honeydew", "#F0FFF0" },
428
+ { "hotpink", "#FF69B4" },
429
+ { "indianred", "#CD5C5C" },
430
+ { "indigo", "#4B0082" },
431
+ { "ivory", "#FFFFF0" },
432
+ { "khaki", "#F0E68C" },
433
+ { "lavender", "#E6E6FA" },
434
+ { "lavenderblush", "#FFF0F5" },
435
+ { "lawngreen", "#7CFC00" },
436
+ { "lemonchiffon", "#FFFACD" },
437
+ { "lightblue", "#ADD8E6" },
438
+ { "lightcoral", "#F08080" },
439
+ { "lightcyan", "#E0FFFF" },
440
+ { "lightgoldenrodyellow", "#FAFAD2" },
441
+ { "lightgreen", "#90EE90" },
442
+ { "lightgrey", "#D3D3D3" },
443
+ { "lightpink", "#FFB6C1" },
444
+ { "lightsalmon", "#FFA07A" },
445
+ { "lightseagreen", "#20B2AA" },
446
+ { "lightskyblue", "#87CEFA" },
447
+ { "lightslategray", "#778899" },
448
+ { "lightsteelblue", "#B0C4DE" },
449
+ { "lightyellow", "#FFFFE0" },
450
+ { "lime", "#00FF00" },
451
+ { "limegreen", "#32CD32" },
452
+ { "linen", "#FAF0E6" },
453
+ { "magenta", "#FF00FF" },
454
+ { "maroon", "#800000" },
455
+ { "mediumaquamarine", "#66CDAA" },
456
+ { "mediumblue", "#0000CD" },
457
+ { "mediumorchid", "#BA55D3" },
458
+ { "mediumpurple", "#9370DB" },
459
+ { "mediumseagreen", "#3CB371" },
460
+ { "mediumslateblue", "#7B68EE" },
461
+ { "mediumspringgreen", "#00FA9A" },
462
+ { "mediumturquoise", "#48D1CC" },
463
+ { "mediumvioletred", "#C71585" },
464
+ { "midnightblue", "#191970" },
465
+ { "mintcream", "#F5FFFA" },
466
+ { "mistyrose", "#FFE4E1" },
467
+ { "moccasin", "#FFE4B5" },
468
+ { "navajowhite", "#FFDEAD" },
469
+ { "navy", "#000080" },
470
+ { "oldlace", "#FDF5E6" },
471
+ { "olive", "#808000" },
472
+ { "olivedrab", "#6B8E23" },
473
+ { "orange", "#FFA500" },
474
+ { "orangered", "#FF4500" },
475
+ { "orchid", "#DA70D6" },
476
+ { "palegoldenrod", "#EEE8AA" },
477
+ { "palegreen", "#98FB98" },
478
+ { "paleturquoise", "#AFEEEE" },
479
+ { "palevioletred", "#DB7093" },
480
+ { "papayawhip", "#FFEFD5" },
481
+ { "peachpuff", "#FFDAB9" },
482
+ { "peru", "#CD853F" },
483
+ { "pink", "#FFC0CB" },
484
+ { "plum", "#DDA0DD" },
485
+ { "powderblue", "#B0E0E6" },
486
+ { "purple", "#800080" },
487
+ { "red", "#FF0000" },
488
+ { "rosybrown", "#BC8F8F" },
489
+ { "royalblue", "#4169E1" },
490
+ { "saddlebrown", "#8B4513" },
491
+ { "salmon", "#FA8072" },
492
+ { "sandybrown", "#F4A460" },
493
+ { "seagreen", "#2E8B57" },
494
+ { "seashell", "#FFF5EE" },
495
+ { "sienna", "#A0522D" },
496
+ { "silver", "#C0C0C0" },
497
+ { "skyblue", "#87CEEB" },
498
+ { "slateblue", "#6A5ACD" },
499
+ { "slategray", "#708090" },
500
+ { "snow", "#FFFAFA" },
501
+ { "springgreen", "#00FF7F" },
502
+ { "steelblue", "#4682B4" },
503
+ { "tan", "#D2B48C" },
504
+ { "teal", "#008080" },
505
+ { "thistle", "#D8BFD8" },
506
+ { "tomato", "#FF6347" },
507
+ { "turquoise", "#40E0D0" },
508
+ { "violet", "#EE82EE" },
509
+ { "wheat", "#F5DEB3" },
510
+ { "white", "#FFFFFF" },
511
+ { "whitesmoke", "#F5F5F5" },
512
+ { "yellow", "#FFFF00" },
513
+ { "yellowgreen", "#9ACD32" },
514
+ { NULL, NULL }
515
+ };
516
+ #endif
517
+
518
+ #if ATTRIBUTE_HASH_LOOKUP
519
+ static uint attrsHash(ctmbstr s)
520
+ {
521
+ uint hashval;
522
+
523
+ for (hashval = 0; *s != '\0'; s++)
524
+ hashval = *s + 31*hashval;
525
+
526
+ return hashval % ATTRIBUTE_HASH_SIZE;
527
+ }
528
+
529
+ static const Attribute *attrsInstall(TidyDocImpl* doc, TidyAttribImpl * attribs,
530
+ const Attribute* old)
531
+ {
532
+ AttrHash *np;
533
+ uint hashval;
534
+
535
+ if (old)
536
+ {
537
+ np = (AttrHash *)TidyDocAlloc(doc, sizeof(*np));
538
+ np->attr = old;
539
+
540
+ hashval = attrsHash(old->name);
541
+ np->next = attribs->hashtab[hashval];
542
+ attribs->hashtab[hashval] = np;
543
+ }
544
+
545
+ return old;
546
+ }
547
+
548
+ static void attrsRemoveFromHash( TidyDocImpl* doc, TidyAttribImpl *attribs,
549
+ ctmbstr s )
550
+ {
551
+ uint h = attrsHash(s);
552
+ AttrHash *p, *prev = NULL;
553
+ for (p = attribs->hashtab[h]; p && p->attr; p = p->next)
554
+ {
555
+ if (TY_(tmbstrcmp)(s, p->attr->name) == 0)
556
+ {
557
+ AttrHash* next = p->next;
558
+ if ( prev )
559
+ prev->next = next;
560
+ else
561
+ attribs->hashtab[h] = next;
562
+ TidyDocFree(doc, p);
563
+ return;
564
+ }
565
+ prev = p;
566
+ }
567
+ }
568
+
569
+ static void attrsEmptyHash( TidyDocImpl* doc, TidyAttribImpl * attribs )
570
+ {
571
+ AttrHash *dict, *next;
572
+ uint i;
573
+
574
+ for (i = 0; i < ATTRIBUTE_HASH_SIZE; ++i)
575
+ {
576
+ dict = attribs->hashtab[i];
577
+
578
+ while(dict)
579
+ {
580
+ next = dict->next;
581
+ TidyDocFree(doc, dict);
582
+ dict = next;
583
+ }
584
+
585
+ attribs->hashtab[i] = NULL;
586
+ }
587
+ }
588
+ #endif
589
+
590
+ static const Attribute* attrsLookup(TidyDocImpl* doc,
591
+ TidyAttribImpl* ARG_UNUSED(attribs),
592
+ ctmbstr atnam)
593
+ {
594
+ const Attribute *np;
595
+ #if ATTRIBUTE_HASH_LOOKUP
596
+ const AttrHash *p;
597
+ #endif
598
+
599
+ if (!atnam)
600
+ return NULL;
601
+
602
+ #if ATTRIBUTE_HASH_LOOKUP
603
+ for (p = attribs->hashtab[attrsHash(atnam)]; p && p->attr; p = p->next)
604
+ if (TY_(tmbstrcmp)(atnam, p->attr->name) == 0)
605
+ return p->attr;
606
+
607
+ for (np = attribute_defs; np && np->name; ++np)
608
+ if (TY_(tmbstrcmp)(atnam, np->name) == 0)
609
+ return attrsInstall(doc, attribs, np);
610
+ #else
611
+ for (np = attribute_defs; np && np->name; ++np)
612
+ if (TY_(tmbstrcmp)(atnam, np->name) == 0)
613
+ return np;
614
+ #endif
615
+
616
+ return NULL;
617
+ }
618
+
619
+
620
+ /* Locate attributes by type */
621
+ AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id )
622
+ {
623
+ AttVal* av;
624
+ for ( av = node->attributes; av; av = av->next )
625
+ {
626
+ if ( AttrIsId(av, id) )
627
+ return av;
628
+ }
629
+ return NULL;
630
+ }
631
+
632
+ /* public method for finding attribute definition by name */
633
+ const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval )
634
+ {
635
+ if ( attval )
636
+ return attrsLookup( doc, &doc->attribs, attval->attribute );
637
+ return NULL;
638
+ }
639
+
640
+ AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name )
641
+ {
642
+ AttVal *attr;
643
+ for (attr = node->attributes; attr != NULL; attr = attr->next)
644
+ {
645
+ if (attr->attribute && TY_(tmbstrcmp)(attr->attribute, name) == 0)
646
+ break;
647
+ }
648
+ return attr;
649
+ }
650
+
651
+ AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
652
+ Node *node, ctmbstr name, ctmbstr value )
653
+ {
654
+ AttVal *av = TY_(NewAttribute)(doc);
655
+ av->delim = '"';
656
+ av->attribute = TY_(tmbstrdup)(doc->allocator, name);
657
+
658
+ if (value)
659
+ av->value = TY_(tmbstrdup)(doc->allocator, value);
660
+ else
661
+ av->value = NULL;
662
+
663
+ av->dict = attrsLookup(doc, &doc->attribs, name);
664
+
665
+ TY_(InsertAttributeAtEnd)(node, av);
666
+ return av;
667
+ }
668
+
669
+ AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value)
670
+ {
671
+ AttVal* old = TY_(GetAttrByName)(node, name);
672
+
673
+ if (old)
674
+ {
675
+ if (old->value)
676
+ TidyDocFree(doc, old->value);
677
+ if (value)
678
+ old->value = TY_(tmbstrdup)(doc->allocator, value);
679
+ else
680
+ old->value = NULL;
681
+
682
+ return old;
683
+ }
684
+ else
685
+ return TY_(AddAttribute)(doc, node, name, value);
686
+ }
687
+
688
+ static Bool CheckAttrType( TidyDocImpl* doc,
689
+ ctmbstr attrname, AttrCheck type )
690
+ {
691
+ const Attribute* np = attrsLookup( doc, &doc->attribs, attrname );
692
+ return (Bool)( np && np->attrchk == type );
693
+ }
694
+
695
+ Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname )
696
+ {
697
+ return CheckAttrType( doc, attrname, CH_URL );
698
+ }
699
+
700
+ /*
701
+ Bool IsBool( TidyDocImpl* doc, ctmbstr attrname )
702
+ {
703
+ return CheckAttrType( doc, attrname, CH_BOOL );
704
+ }
705
+ */
706
+
707
+ Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname )
708
+ {
709
+ return CheckAttrType( doc, attrname, CH_SCRIPT );
710
+ }
711
+
712
+ /* may id or name serve as anchor? */
713
+ Bool TY_(IsAnchorElement)( TidyDocImpl* ARG_UNUSED(doc), Node* node)
714
+ {
715
+ TidyTagId tid = TagId( node );
716
+ if ( tid == TidyTag_A ||
717
+ tid == TidyTag_APPLET ||
718
+ tid == TidyTag_FORM ||
719
+ tid == TidyTag_FRAME ||
720
+ tid == TidyTag_IFRAME ||
721
+ tid == TidyTag_IMG ||
722
+ tid == TidyTag_MAP )
723
+ return yes;
724
+
725
+ return no;
726
+ }
727
+
728
+ /*
729
+ In CSS1, selectors can contain only the characters A-Z, 0-9,
730
+ and Unicode characters 161-255, plus dash (-); they cannot start
731
+ with a dash or a digit; they can also contain escaped characters
732
+ and any Unicode character as a numeric code (see next item).
733
+
734
+ The backslash followed by at most four hexadecimal digits
735
+ (0..9A..F) stands for the Unicode character with that number.
736
+
737
+ Any character except a hexadecimal digit can be escaped to remove
738
+ its special meaning, by putting a backslash in front.
739
+
740
+ #508936 - CSS class naming for -clean option
741
+ */
742
+ Bool TY_(IsCSS1Selector)( ctmbstr buf )
743
+ {
744
+ Bool valid = yes;
745
+ int esclen = 0;
746
+ byte c;
747
+ int pos;
748
+
749
+ for ( pos=0; valid && (c = *buf++); ++pos )
750
+ {
751
+ if ( c == '\\' )
752
+ {
753
+ esclen = 1; /* ab\555\444 is 4 chars {'a', 'b', \555, \444} */
754
+ }
755
+ else if ( isdigit( c ) )
756
+ {
757
+ /* Digit not 1st, unless escaped (Max length "\112F") */
758
+ if ( esclen > 0 )
759
+ valid = ( ++esclen < 6 );
760
+ if ( valid )
761
+ valid = ( pos>0 || esclen>0 );
762
+ }
763
+ else
764
+ {
765
+ valid = (
766
+ esclen > 0 /* Escaped? Anything goes. */
767
+ || ( pos>0 && c == '-' ) /* Dash cannot be 1st char */
768
+ || isalpha(c) /* a-z, A-Z anywhere */
769
+ || ( c >= 161 ) /* Unicode 161-255 anywhere */
770
+ );
771
+ esclen = 0;
772
+ }
773
+ }
774
+ return valid;
775
+ }
776
+
777
+ /* free single anchor */
778
+ static void FreeAnchor(TidyDocImpl* doc, Anchor *a)
779
+ {
780
+ if ( a )
781
+ TidyDocFree( doc, a->name );
782
+ TidyDocFree( doc, a );
783
+ }
784
+
785
+ /* removes anchor for specific node */
786
+ void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node )
787
+ {
788
+ TidyAttribImpl* attribs = &doc->attribs;
789
+ Anchor *delme = NULL, *curr, *prev = NULL;
790
+
791
+ for ( curr=attribs->anchor_list; curr!=NULL; curr=curr->next )
792
+ {
793
+ if ( curr->node == node )
794
+ {
795
+ if ( prev )
796
+ prev->next = curr->next;
797
+ else
798
+ attribs->anchor_list = curr->next;
799
+ delme = curr;
800
+ break;
801
+ }
802
+ prev = curr;
803
+ }
804
+ FreeAnchor( doc, delme );
805
+ }
806
+
807
+ /* initialize new anchor */
808
+ static Anchor* NewAnchor( TidyDocImpl* doc, ctmbstr name, Node* node )
809
+ {
810
+ Anchor *a = (Anchor*) TidyDocAlloc( doc, sizeof(Anchor) );
811
+
812
+ a->name = TY_(tmbstrdup)( doc->allocator, name );
813
+ a->name = TY_(tmbstrtolower)(a->name);
814
+ a->node = node;
815
+ a->next = NULL;
816
+
817
+ return a;
818
+ }
819
+
820
+ /* add new anchor to namespace */
821
+ static Anchor* AddAnchor( TidyDocImpl* doc, ctmbstr name, Node *node )
822
+ {
823
+ TidyAttribImpl* attribs = &doc->attribs;
824
+ Anchor *a = NewAnchor( doc, name, node );
825
+
826
+ if ( attribs->anchor_list == NULL)
827
+ attribs->anchor_list = a;
828
+ else
829
+ {
830
+ Anchor *here = attribs->anchor_list;
831
+ while (here->next)
832
+ here = here->next;
833
+ here->next = a;
834
+ }
835
+
836
+ return attribs->anchor_list;
837
+ }
838
+
839
+ /* return node associated with anchor */
840
+ static Node* GetNodeByAnchor( TidyDocImpl* doc, ctmbstr name )
841
+ {
842
+ TidyAttribImpl* attribs = &doc->attribs;
843
+ Anchor *found;
844
+ tmbstr lname = TY_(tmbstrdup)(doc->allocator, name);
845
+ lname = TY_(tmbstrtolower)(lname);
846
+
847
+ for ( found = attribs->anchor_list; found != NULL; found = found->next )
848
+ {
849
+ if ( TY_(tmbstrcmp)(found->name, lname) == 0 )
850
+ break;
851
+ }
852
+
853
+ TidyDocFree(doc, lname);
854
+ if ( found )
855
+ return found->node;
856
+ return NULL;
857
+ }
858
+
859
+ /* free all anchors */
860
+ void TY_(FreeAnchors)( TidyDocImpl* doc )
861
+ {
862
+ TidyAttribImpl* attribs = &doc->attribs;
863
+ Anchor* a;
864
+ while (NULL != (a = attribs->anchor_list) )
865
+ {
866
+ attribs->anchor_list = a->next;
867
+ FreeAnchor(doc, a);
868
+ }
869
+ }
870
+
871
+ /* public method for inititializing attribute dictionary */
872
+ void TY_(InitAttrs)( TidyDocImpl* doc )
873
+ {
874
+ TidyClearMemory( &doc->attribs, sizeof(TidyAttribImpl) );
875
+ #ifdef _DEBUG
876
+ {
877
+ /* Attribute ID is index position in Attribute type lookup table */
878
+ uint ix;
879
+ for ( ix=0; ix < N_TIDY_ATTRIBS; ++ix )
880
+ {
881
+ const Attribute* dict = &attribute_defs[ ix ];
882
+ assert( (uint) dict->id == ix );
883
+ }
884
+ }
885
+ #endif
886
+ }
887
+
888
+ /* free all declared attributes */
889
+ static void FreeDeclaredAttributes( TidyDocImpl* doc )
890
+ {
891
+ TidyAttribImpl* attribs = &doc->attribs;
892
+ Attribute* dict;
893
+ while ( NULL != (dict = attribs->declared_attr_list) )
894
+ {
895
+ attribs->declared_attr_list = dict->next;
896
+ #if ATTRIBUTE_HASH_LOOKUP
897
+ attrsRemoveFromHash( doc, &doc->attribs, dict->name );
898
+ #endif
899
+ TidyDocFree( doc, dict->name );
900
+ TidyDocFree( doc, dict );
901
+ }
902
+ }
903
+
904
+ void TY_(FreeAttrTable)( TidyDocImpl* doc )
905
+ {
906
+ #if ATTRIBUTE_HASH_LOOKUP
907
+ attrsEmptyHash( doc, &doc->attribs );
908
+ #endif
909
+ TY_(FreeAnchors)( doc );
910
+ FreeDeclaredAttributes( doc );
911
+ }
912
+
913
+ void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname )
914
+ {
915
+ uint len = TY_(tmbstrlen)(classattr->value) +
916
+ TY_(tmbstrlen)(classname) + 2;
917
+ tmbstr s = (tmbstr) TidyDocAlloc( doc, len );
918
+ s[0] = '\0';
919
+ if (classattr->value)
920
+ {
921
+ TY_(tmbstrcpy)( s, classattr->value );
922
+ TY_(tmbstrcat)( s, " " );
923
+ }
924
+ TY_(tmbstrcat)( s, classname );
925
+ if (classattr->value)
926
+ TidyDocFree( doc, classattr->value );
927
+ classattr->value = s;
928
+ }
929
+
930
+ /* concatenate styles */
931
+ static void AppendToStyleAttr( TidyDocImpl* doc, AttVal *styleattr, ctmbstr styleprop )
932
+ {
933
+ /*
934
+ this doesn't handle CSS comments and
935
+ leading/trailing white-space very well
936
+ see http://www.w3.org/TR/css-style-attr
937
+ */
938
+ uint end = TY_(tmbstrlen)(styleattr->value);
939
+
940
+ if (end >0 && styleattr->value[end - 1] == ';')
941
+ {
942
+ /* attribute ends with declaration seperator */
943
+
944
+ styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value,
945
+ end + TY_(tmbstrlen)(styleprop) + 2);
946
+
947
+ TY_(tmbstrcat)(styleattr->value, " ");
948
+ TY_(tmbstrcat)(styleattr->value, styleprop);
949
+ }
950
+ else if (end >0 && styleattr->value[end - 1] == '}')
951
+ {
952
+ /* attribute ends with rule set */
953
+
954
+ styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value,
955
+ end + TY_(tmbstrlen)(styleprop) + 6);
956
+
957
+ TY_(tmbstrcat)(styleattr->value, " { ");
958
+ TY_(tmbstrcat)(styleattr->value, styleprop);
959
+ TY_(tmbstrcat)(styleattr->value, " }");
960
+ }
961
+ else
962
+ {
963
+ /* attribute ends with property value */
964
+
965
+ styleattr->value = (tmbstr) TidyDocRealloc(doc, styleattr->value,
966
+ end + TY_(tmbstrlen)(styleprop) + 3);
967
+
968
+ if (end > 0)
969
+ TY_(tmbstrcat)(styleattr->value, "; ");
970
+ TY_(tmbstrcat)(styleattr->value, styleprop);
971
+ }
972
+ }
973
+
974
+ /*
975
+ the same attribute name can't be used
976
+ more than once in each element
977
+ */
978
+ static Bool AttrsHaveSameName( AttVal* av1, AttVal* av2)
979
+ {
980
+ TidyAttrId id1, id2;
981
+
982
+ id1 = AttrId(av1);
983
+ id2 = AttrId(av2);
984
+ if (id1 != TidyAttr_UNKNOWN && id2 != TidyAttr_UNKNOWN)
985
+ return AttrsHaveSameId(av1, av2);
986
+ if (id1 != TidyAttr_UNKNOWN || id2 != TidyAttr_UNKNOWN)
987
+ return no;
988
+ if (av1->attribute && av2->attribute)
989
+ return TY_(tmbstrcmp)(av1->attribute, av2->attribute) == 0;
990
+ return no;
991
+ }
992
+
993
+ void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node *node, Bool isXml )
994
+ {
995
+ AttVal *first;
996
+
997
+ for (first = node->attributes; first != NULL;)
998
+ {
999
+ AttVal *second;
1000
+ Bool firstRedefined = no;
1001
+
1002
+ if (!(first->asp == NULL && first->php == NULL))
1003
+ {
1004
+ first = first->next;
1005
+ continue;
1006
+ }
1007
+
1008
+ for (second = first->next; second != NULL;)
1009
+ {
1010
+ AttVal *temp;
1011
+
1012
+ if (!(second->asp == NULL && second->php == NULL
1013
+ && AttrsHaveSameName(first, second)))
1014
+ {
1015
+ second = second->next;
1016
+ continue;
1017
+ }
1018
+
1019
+ /* first and second attribute have same local name */
1020
+ /* now determine what to do with this duplicate... */
1021
+
1022
+ if (!isXml
1023
+ && attrIsCLASS(first) && cfgBool(doc, TidyJoinClasses)
1024
+ && AttrHasValue(first) && AttrHasValue(second))
1025
+ {
1026
+ /* concatenate classes */
1027
+
1028
+ TY_(AppendToClassAttr)(doc, first, second->value);
1029
+
1030
+ temp = second->next;
1031
+ TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE);
1032
+ TY_(RemoveAttribute)( doc, node, second );
1033
+ second = temp;
1034
+ }
1035
+ else if (!isXml
1036
+ && attrIsSTYLE(first) && cfgBool(doc, TidyJoinStyles)
1037
+ && AttrHasValue(first) && AttrHasValue(second))
1038
+ {
1039
+ AppendToStyleAttr( doc, first, second->value );
1040
+
1041
+ temp = second->next;
1042
+ TY_(ReportAttrError)( doc, node, second, JOINING_ATTRIBUTE);
1043
+ TY_(RemoveAttribute)( doc, node, second );
1044
+ second = temp;
1045
+ }
1046
+ else if ( cfg(doc, TidyDuplicateAttrs) == TidyKeepLast )
1047
+ {
1048
+ temp = first->next;
1049
+ TY_(ReportAttrError)( doc, node, first, REPEATED_ATTRIBUTE);
1050
+ TY_(RemoveAttribute)( doc, node, first );
1051
+ firstRedefined = yes;
1052
+ first = temp;
1053
+ second = second->next;
1054
+ }
1055
+ else /* TidyDuplicateAttrs == TidyKeepFirst */
1056
+ {
1057
+ temp = second->next;
1058
+ TY_(ReportAttrError)( doc, node, second, REPEATED_ATTRIBUTE);
1059
+ TY_(RemoveAttribute)( doc, node, second );
1060
+ second = temp;
1061
+ }
1062
+ }
1063
+ if (!firstRedefined)
1064
+ first = first->next;
1065
+ }
1066
+ }
1067
+
1068
+ /* ignore unknown attributes for proprietary elements */
1069
+ const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval )
1070
+ {
1071
+ const Attribute* attribute = attval->dict;
1072
+
1073
+ if ( attribute != NULL )
1074
+ {
1075
+ if (attribute->versions & VERS_XML)
1076
+ {
1077
+ doc->lexer->isvoyager = yes;
1078
+ if (!cfgBool(doc, TidyHtmlOut))
1079
+ {
1080
+ TY_(SetOptionBool)(doc, TidyXhtmlOut, yes);
1081
+ TY_(SetOptionBool)(doc, TidyXmlOut, yes);
1082
+ }
1083
+ }
1084
+
1085
+ TY_(ConstrainVersion)(doc, AttributeVersions(node, attval));
1086
+
1087
+ if (attribute->attrchk)
1088
+ attribute->attrchk( doc, node, attval );
1089
+ }
1090
+
1091
+ if (AttributeIsProprietary(node, attval))
1092
+ {
1093
+ TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
1094
+
1095
+ if (cfgBool(doc, TidyDropPropAttrs))
1096
+ TY_(RemoveAttribute)( doc, node, attval );
1097
+ }
1098
+
1099
+ return attribute;
1100
+ }
1101
+
1102
+ Bool TY_(IsBoolAttribute)(AttVal *attval)
1103
+ {
1104
+ const Attribute *attribute = ( attval ? attval->dict : NULL );
1105
+ if ( attribute && attribute->attrchk == CH_BOOL )
1106
+ return yes;
1107
+ return no;
1108
+ }
1109
+
1110
+ Bool TY_(attrIsEvent)( AttVal* attval )
1111
+ {
1112
+ TidyAttrId atid = AttrId( attval );
1113
+
1114
+ return (atid == TidyAttr_OnAFTERUPDATE ||
1115
+ atid == TidyAttr_OnBEFOREUNLOAD ||
1116
+ atid == TidyAttr_OnBEFOREUPDATE ||
1117
+ atid == TidyAttr_OnBLUR ||
1118
+ atid == TidyAttr_OnCHANGE ||
1119
+ atid == TidyAttr_OnCLICK ||
1120
+ atid == TidyAttr_OnDATAAVAILABLE ||
1121
+ atid == TidyAttr_OnDATASETCHANGED ||
1122
+ atid == TidyAttr_OnDATASETCOMPLETE ||
1123
+ atid == TidyAttr_OnDBLCLICK ||
1124
+ atid == TidyAttr_OnERRORUPDATE ||
1125
+ atid == TidyAttr_OnFOCUS ||
1126
+ atid == TidyAttr_OnKEYDOWN ||
1127
+ atid == TidyAttr_OnKEYPRESS ||
1128
+ atid == TidyAttr_OnKEYUP ||
1129
+ atid == TidyAttr_OnLOAD ||
1130
+ atid == TidyAttr_OnMOUSEDOWN ||
1131
+ atid == TidyAttr_OnMOUSEMOVE ||
1132
+ atid == TidyAttr_OnMOUSEOUT ||
1133
+ atid == TidyAttr_OnMOUSEOVER ||
1134
+ atid == TidyAttr_OnMOUSEUP ||
1135
+ atid == TidyAttr_OnRESET ||
1136
+ atid == TidyAttr_OnROWENTER ||
1137
+ atid == TidyAttr_OnROWEXIT ||
1138
+ atid == TidyAttr_OnSELECT ||
1139
+ atid == TidyAttr_OnSUBMIT ||
1140
+ atid == TidyAttr_OnUNLOAD);
1141
+ }
1142
+
1143
+ static void CheckLowerCaseAttrValue( TidyDocImpl* doc, Node *node, AttVal *attval)
1144
+ {
1145
+ tmbstr p;
1146
+ Bool hasUpper = no;
1147
+
1148
+ if (!AttrHasValue(attval))
1149
+ return;
1150
+
1151
+ p = attval->value;
1152
+
1153
+ while (*p)
1154
+ {
1155
+ if (TY_(IsUpper)(*p)) /* #501230 - fix by Terry Teague - 09 Jan 02 */
1156
+ {
1157
+ hasUpper = yes;
1158
+ break;
1159
+ }
1160
+ p++;
1161
+ }
1162
+
1163
+ if (hasUpper)
1164
+ {
1165
+ Lexer* lexer = doc->lexer;
1166
+ if (lexer->isvoyager)
1167
+ TY_(ReportAttrError)( doc, node, attval, ATTR_VALUE_NOT_LCASE);
1168
+
1169
+ if ( lexer->isvoyager || cfgBool(doc, TidyLowerLiterals) )
1170
+ attval->value = TY_(tmbstrtolower)(attval->value);
1171
+ }
1172
+ }
1173
+
1174
+ /* methods for checking value of a specific attribute */
1175
+
1176
+ void TY_(CheckUrl)( TidyDocImpl* doc, Node *node, AttVal *attval)
1177
+ {
1178
+ tmbchar c;
1179
+ tmbstr dest, p;
1180
+ uint escape_count = 0, backslash_count = 0;
1181
+ uint i, pos = 0;
1182
+ uint len;
1183
+ Bool isJavascript = no;
1184
+
1185
+ if (!AttrHasValue(attval))
1186
+ {
1187
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1188
+ return;
1189
+ }
1190
+
1191
+ p = attval->value;
1192
+
1193
+ isJavascript =
1194
+ TY_(tmbstrncmp)(p,"javascript:",sizeof("javascript:")-1)==0;
1195
+
1196
+ for (i = 0; '\0' != (c = p[i]); ++i)
1197
+ {
1198
+ if (c == '\\')
1199
+ {
1200
+ ++backslash_count;
1201
+ if ( cfgBool(doc, TidyFixBackslash) && !isJavascript)
1202
+ p[i] = '/';
1203
+ }
1204
+ else if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c)))
1205
+ ++escape_count;
1206
+ }
1207
+
1208
+ if ( cfgBool(doc, TidyFixUri) && escape_count )
1209
+ {
1210
+ len = TY_(tmbstrlen)(p) + escape_count * 2 + 1;
1211
+ dest = (tmbstr) TidyDocAlloc(doc, len);
1212
+
1213
+ for (i = 0; 0 != (c = p[i]); ++i)
1214
+ {
1215
+ if ((c > 0x7e) || (c <= 0x20) || (strchr("<>", c)))
1216
+ pos += sprintf( dest + pos, "%%%02X", (byte)c );
1217
+ else
1218
+ dest[pos++] = c;
1219
+ }
1220
+ dest[pos] = 0;
1221
+
1222
+ TidyDocFree(doc, attval->value);
1223
+ attval->value = dest;
1224
+ }
1225
+ if ( backslash_count )
1226
+ {
1227
+ if ( cfgBool(doc, TidyFixBackslash) && !isJavascript )
1228
+ TY_(ReportAttrError)( doc, node, attval, FIXED_BACKSLASH );
1229
+ else
1230
+ TY_(ReportAttrError)( doc, node, attval, BACKSLASH_IN_URI );
1231
+ }
1232
+ if ( escape_count )
1233
+ {
1234
+ if ( cfgBool(doc, TidyFixUri) )
1235
+ TY_(ReportAttrError)( doc, node, attval, ESCAPED_ILLEGAL_URI);
1236
+ else
1237
+ TY_(ReportAttrError)( doc, node, attval, ILLEGAL_URI_REFERENCE);
1238
+
1239
+ doc->badChars |= BC_INVALID_URI;
1240
+ }
1241
+ }
1242
+
1243
+ /* RFC 2396, section 4.2 states:
1244
+ "[...] in the case of HTML's FORM element, [...] an
1245
+ empty URI reference represents the base URI of the
1246
+ current document and should be replaced by that URI
1247
+ when transformed into a request."
1248
+ */
1249
+ void CheckAction( TidyDocImpl* doc, Node *node, AttVal *attval)
1250
+ {
1251
+ if (AttrHasValue(attval))
1252
+ TY_(CheckUrl)( doc, node, attval );
1253
+ }
1254
+
1255
+ void CheckScript( TidyDocImpl* ARG_UNUSED(doc), Node* ARG_UNUSED(node),
1256
+ AttVal* ARG_UNUSED(attval))
1257
+ {
1258
+ }
1259
+
1260
+ Bool TY_(IsValidHTMLID)(ctmbstr id)
1261
+ {
1262
+ ctmbstr s = id;
1263
+
1264
+ if (!s)
1265
+ return no;
1266
+
1267
+ if (!TY_(IsLetter)(*s++))
1268
+ return no;
1269
+
1270
+ while (*s)
1271
+ if (!TY_(IsNamechar)(*s++))
1272
+ return no;
1273
+
1274
+ return yes;
1275
+
1276
+ }
1277
+
1278
+ Bool TY_(IsValidXMLID)(ctmbstr id)
1279
+ {
1280
+ ctmbstr s = id;
1281
+ tchar c;
1282
+
1283
+ if (!s)
1284
+ return no;
1285
+
1286
+ c = *s++;
1287
+ if (c > 0x7F)
1288
+ s += TY_(GetUTF8)(s, &c);
1289
+
1290
+ if (!(TY_(IsXMLLetter)(c) || c == '_' || c == ':'))
1291
+ return no;
1292
+
1293
+ while (*s)
1294
+ {
1295
+ c = (unsigned char)*s;
1296
+
1297
+ if (c > 0x7F)
1298
+ s += TY_(GetUTF8)(s, &c);
1299
+
1300
+ ++s;
1301
+
1302
+ if (!TY_(IsXMLNamechar)(c))
1303
+ return no;
1304
+ }
1305
+
1306
+ return yes;
1307
+ }
1308
+
1309
+ static Bool IsValidNMTOKEN(ctmbstr name)
1310
+ {
1311
+ ctmbstr s = name;
1312
+ tchar c;
1313
+
1314
+ if (!s)
1315
+ return no;
1316
+
1317
+ while (*s)
1318
+ {
1319
+ c = (unsigned char)*s;
1320
+
1321
+ if (c > 0x7F)
1322
+ s += TY_(GetUTF8)(s, &c);
1323
+
1324
+ ++s;
1325
+
1326
+ if (!TY_(IsXMLNamechar)(c))
1327
+ return no;
1328
+ }
1329
+
1330
+ return yes;
1331
+ }
1332
+
1333
+ static Bool AttrValueIsAmong(AttVal *attval, ctmbstr const list[])
1334
+ {
1335
+ const ctmbstr *v;
1336
+ for (v = list; *v; ++v)
1337
+ if (AttrValueIs(attval, *v))
1338
+ return yes;
1339
+ return no;
1340
+ }
1341
+
1342
+ static void CheckAttrValidity( TidyDocImpl* doc, Node *node, AttVal *attval,
1343
+ ctmbstr const list[])
1344
+ {
1345
+ if (!AttrHasValue(attval))
1346
+ {
1347
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1348
+ return;
1349
+ }
1350
+
1351
+ CheckLowerCaseAttrValue( doc, node, attval );
1352
+
1353
+ if (!AttrValueIsAmong(attval, list))
1354
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1355
+ }
1356
+
1357
+ void CheckName( TidyDocImpl* doc, Node *node, AttVal *attval)
1358
+ {
1359
+ Node *old;
1360
+
1361
+ if (!AttrHasValue(attval))
1362
+ {
1363
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1364
+ return;
1365
+ }
1366
+
1367
+ if ( TY_(IsAnchorElement)(doc, node) )
1368
+ {
1369
+ if (cfgBool(doc, TidyXmlOut) && !IsValidNMTOKEN(attval->value))
1370
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1371
+
1372
+ if ((old = GetNodeByAnchor(doc, attval->value)) && old != node)
1373
+ {
1374
+ TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE);
1375
+ }
1376
+ else
1377
+ AddAnchor( doc, attval->value, node );
1378
+ }
1379
+ }
1380
+
1381
+ void CheckId( TidyDocImpl* doc, Node *node, AttVal *attval )
1382
+ {
1383
+ Lexer* lexer = doc->lexer;
1384
+ Node *old;
1385
+
1386
+ if (!AttrHasValue(attval))
1387
+ {
1388
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1389
+ return;
1390
+ }
1391
+
1392
+ if (!TY_(IsValidHTMLID)(attval->value))
1393
+ {
1394
+ if (lexer->isvoyager && TY_(IsValidXMLID)(attval->value))
1395
+ TY_(ReportAttrError)( doc, node, attval, XML_ID_SYNTAX);
1396
+ else
1397
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1398
+ }
1399
+
1400
+ if ((old = GetNodeByAnchor(doc, attval->value)) && old != node)
1401
+ {
1402
+ TY_(ReportAttrError)( doc, node, attval, ANCHOR_NOT_UNIQUE);
1403
+ }
1404
+ else
1405
+ AddAnchor( doc, attval->value, node );
1406
+ }
1407
+
1408
+ void CheckBool( TidyDocImpl* doc, Node *node, AttVal *attval)
1409
+ {
1410
+ if (!AttrHasValue(attval))
1411
+ return;
1412
+
1413
+ CheckLowerCaseAttrValue( doc, node, attval );
1414
+ }
1415
+
1416
+ void CheckAlign( TidyDocImpl* doc, Node *node, AttVal *attval)
1417
+ {
1418
+ ctmbstr const values[] = {"left", "right", "center", "justify", NULL};
1419
+
1420
+ /* IMG, OBJECT, APPLET and EMBED use align for vertical position */
1421
+ if (node->tag && (node->tag->model & CM_IMG))
1422
+ {
1423
+ CheckValign( doc, node, attval );
1424
+ return;
1425
+ }
1426
+
1427
+ if (!AttrHasValue(attval))
1428
+ {
1429
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1430
+ return;
1431
+ }
1432
+
1433
+ CheckLowerCaseAttrValue( doc, node, attval);
1434
+
1435
+ /* currently CheckCaption(...) takes care of the remaining cases */
1436
+ if (nodeIsCAPTION(node))
1437
+ return;
1438
+
1439
+ if (!AttrValueIsAmong(attval, values))
1440
+ {
1441
+ /* align="char" is allowed for elements with CM_TABLE|CM_ROW
1442
+ except CAPTION which is excluded above, */
1443
+ if( !(AttrValueIs(attval, "char")
1444
+ && TY_(nodeHasCM)(node, CM_TABLE|CM_ROW)) )
1445
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1446
+ }
1447
+ }
1448
+
1449
+ void CheckValign( TidyDocImpl* doc, Node *node, AttVal *attval)
1450
+ {
1451
+ ctmbstr const values[] = {"top", "middle", "bottom", "baseline", NULL};
1452
+ ctmbstr const values2[] = {"left", "right", NULL};
1453
+ ctmbstr const valuesp[] = {"texttop", "absmiddle", "absbottom",
1454
+ "textbottom", NULL};
1455
+
1456
+ if (!AttrHasValue(attval))
1457
+ {
1458
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1459
+ return;
1460
+ }
1461
+
1462
+ CheckLowerCaseAttrValue( doc, node, attval );
1463
+
1464
+ if (AttrValueIsAmong(attval, values))
1465
+ {
1466
+ /* all is fine */
1467
+ }
1468
+ else if (AttrValueIsAmong(attval, values2))
1469
+ {
1470
+ if (!(node->tag && (node->tag->model & CM_IMG)))
1471
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1472
+ }
1473
+ else if (AttrValueIsAmong(attval, valuesp))
1474
+ {
1475
+ TY_(ConstrainVersion)( doc, VERS_PROPRIETARY );
1476
+ TY_(ReportAttrError)( doc, node, attval, PROPRIETARY_ATTR_VALUE);
1477
+ }
1478
+ else
1479
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1480
+ }
1481
+
1482
+ void CheckLength( TidyDocImpl* doc, Node *node, AttVal *attval)
1483
+ {
1484
+ tmbstr p;
1485
+
1486
+ if (!AttrHasValue(attval))
1487
+ {
1488
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1489
+ return;
1490
+ }
1491
+
1492
+ /* don't check for <col width=...> and <colgroup width=...> */
1493
+ if (attrIsWIDTH(attval) && (nodeIsCOL(node) || nodeIsCOLGROUP(node)))
1494
+ return;
1495
+
1496
+ p = attval->value;
1497
+
1498
+ if (!TY_(IsDigit)(*p++))
1499
+ {
1500
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1501
+ }
1502
+ else
1503
+ {
1504
+ while (*p)
1505
+ {
1506
+ if (!TY_(IsDigit)(*p) && *p != '%')
1507
+ {
1508
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1509
+ break;
1510
+ }
1511
+ ++p;
1512
+ }
1513
+ }
1514
+ }
1515
+
1516
+ void CheckTarget( TidyDocImpl* doc, Node *node, AttVal *attval)
1517
+ {
1518
+ ctmbstr const values[] = {"_blank", "_self", "_parent", "_top", NULL};
1519
+
1520
+ if (!AttrHasValue(attval))
1521
+ {
1522
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1523
+ return;
1524
+ }
1525
+
1526
+ /* target names must begin with A-Za-z ... */
1527
+ if (TY_(IsLetter)(attval->value[0]))
1528
+ return;
1529
+
1530
+ /* or be one of the allowed list */
1531
+ if (!AttrValueIsAmong(attval, values))
1532
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1533
+ }
1534
+
1535
+ void CheckFsubmit( TidyDocImpl* doc, Node *node, AttVal *attval)
1536
+ {
1537
+ ctmbstr const values[] = {"get", "post", NULL};
1538
+ CheckAttrValidity( doc, node, attval, values );
1539
+ }
1540
+
1541
+ void CheckClear( TidyDocImpl* doc, Node *node, AttVal *attval)
1542
+ {
1543
+ ctmbstr const values[] = {"none", "left", "right", "all", NULL};
1544
+
1545
+ if (!AttrHasValue(attval))
1546
+ {
1547
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1548
+ if (attval->value == NULL)
1549
+ attval->value = TY_(tmbstrdup)( doc->allocator, "none" );
1550
+ return;
1551
+ }
1552
+
1553
+ CheckLowerCaseAttrValue( doc, node, attval );
1554
+
1555
+ if (!AttrValueIsAmong(attval, values))
1556
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1557
+ }
1558
+
1559
+ void CheckShape( TidyDocImpl* doc, Node *node, AttVal *attval)
1560
+ {
1561
+ ctmbstr const values[] = {"rect", "default", "circle", "poly", NULL};
1562
+ CheckAttrValidity( doc, node, attval, values );
1563
+ }
1564
+
1565
+ void CheckScope( TidyDocImpl* doc, Node *node, AttVal *attval)
1566
+ {
1567
+ ctmbstr const values[] = {"row", "rowgroup", "col", "colgroup", NULL};
1568
+ CheckAttrValidity( doc, node, attval, values );
1569
+ }
1570
+
1571
+ void CheckNumber( TidyDocImpl* doc, Node *node, AttVal *attval)
1572
+ {
1573
+ tmbstr p;
1574
+
1575
+ if (!AttrHasValue(attval))
1576
+ {
1577
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1578
+ return;
1579
+ }
1580
+
1581
+ /* don't check <frameset cols=... rows=...> */
1582
+ if ( nodeIsFRAMESET(node) &&
1583
+ (attrIsCOLS(attval) || attrIsROWS(attval)))
1584
+ return;
1585
+
1586
+ p = attval->value;
1587
+
1588
+ /* font size may be preceded by + or - */
1589
+ if ( nodeIsFONT(node) && (*p == '+' || *p == '-') )
1590
+ ++p;
1591
+
1592
+ while (*p)
1593
+ {
1594
+ if (!TY_(IsDigit)(*p))
1595
+ {
1596
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1597
+ break;
1598
+ }
1599
+ ++p;
1600
+ }
1601
+ }
1602
+
1603
+ /* check hexadecimal color value */
1604
+ static Bool IsValidColorCode(ctmbstr color)
1605
+ {
1606
+ uint i;
1607
+
1608
+ if (TY_(tmbstrlen)(color) != 6)
1609
+ return no;
1610
+
1611
+ /* check if valid hex digits and letters */
1612
+ for (i = 0; i < 6; i++)
1613
+ if (!TY_(IsDigit)(color[i]) && !strchr("abcdef", TY_(ToLower)(color[i])))
1614
+ return no;
1615
+
1616
+ return yes;
1617
+ }
1618
+
1619
+ /* check color syntax and beautify value by option */
1620
+ void CheckColor( TidyDocImpl* doc, Node *node, AttVal *attval)
1621
+ {
1622
+ Bool valid = no;
1623
+ tmbstr given;
1624
+
1625
+ if (!AttrHasValue(attval))
1626
+ {
1627
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1628
+ return;
1629
+ }
1630
+
1631
+ given = attval->value;
1632
+
1633
+ /* 727851 - add hash to hash-less color values */
1634
+ if (given[0] != '#' && (valid = IsValidColorCode(given)))
1635
+ {
1636
+ tmbstr cp, s;
1637
+
1638
+ cp = s = (tmbstr) TidyDocAlloc(doc, 2 + TY_(tmbstrlen)(given));
1639
+ *cp++ = '#';
1640
+ while ('\0' != (*cp++ = *given++))
1641
+ continue;
1642
+
1643
+ TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE_REPLACED);
1644
+
1645
+ TidyDocFree(doc, attval->value);
1646
+ given = attval->value = s;
1647
+ }
1648
+
1649
+ if (!valid && given[0] == '#')
1650
+ valid = IsValidColorCode(given + 1);
1651
+
1652
+ if (valid && given[0] == '#' && cfgBool(doc, TidyReplaceColor))
1653
+ {
1654
+ ctmbstr newName = GetColorName(given);
1655
+
1656
+ if (newName)
1657
+ {
1658
+ TidyDocFree(doc, attval->value);
1659
+ given = attval->value = TY_(tmbstrdup)(doc->allocator, newName);
1660
+ }
1661
+ }
1662
+
1663
+ /* if it is not a valid color code, it is a color name */
1664
+ if (!valid)
1665
+ valid = GetColorCode(given) != NULL;
1666
+
1667
+ if (valid && given[0] == '#')
1668
+ attval->value = TY_(tmbstrtoupper)(attval->value);
1669
+ else if (valid)
1670
+ attval->value = TY_(tmbstrtolower)(attval->value);
1671
+
1672
+ if (!valid)
1673
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1674
+ }
1675
+
1676
+ /* check valuetype attribute for element param */
1677
+ void CheckVType( TidyDocImpl* doc, Node *node, AttVal *attval)
1678
+ {
1679
+ ctmbstr const values[] = {"data", "object", "ref", NULL};
1680
+ CheckAttrValidity( doc, node, attval, values );
1681
+ }
1682
+
1683
+ /* checks scrolling attribute */
1684
+ void CheckScroll( TidyDocImpl* doc, Node *node, AttVal *attval)
1685
+ {
1686
+ ctmbstr const values[] = {"no", "auto", "yes", NULL};
1687
+ CheckAttrValidity( doc, node, attval, values );
1688
+ }
1689
+
1690
+ /* checks dir attribute */
1691
+ void CheckTextDir( TidyDocImpl* doc, Node *node, AttVal *attval)
1692
+ {
1693
+ ctmbstr const values[] = {"rtl", "ltr", NULL};
1694
+ CheckAttrValidity( doc, node, attval, values );
1695
+ }
1696
+
1697
+ /* checks lang and xml:lang attributes */
1698
+ void CheckLang( TidyDocImpl* doc, Node *node, AttVal *attval)
1699
+ {
1700
+ /* empty xml:lang is allowed through XML 1.0 SE errata */
1701
+ if (!AttrHasValue(attval) && !attrIsXML_LANG(attval))
1702
+ {
1703
+ if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
1704
+ {
1705
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE );
1706
+ }
1707
+ return;
1708
+ }
1709
+ }
1710
+
1711
+ /* checks type attribute */
1712
+ void CheckType( TidyDocImpl* doc, Node *node, AttVal *attval)
1713
+ {
1714
+ ctmbstr const valuesINPUT[] = {"text", "password", "checkbox", "radio",
1715
+ "submit", "reset", "file", "hidden",
1716
+ "image", "button", NULL};
1717
+ ctmbstr const valuesBUTTON[] = {"button", "submit", "reset", NULL};
1718
+ ctmbstr const valuesUL[] = {"disc", "square", "circle", NULL};
1719
+ ctmbstr const valuesOL[] = {"1", "a", "i", NULL};
1720
+
1721
+ if (nodeIsINPUT(node))
1722
+ CheckAttrValidity( doc, node, attval, valuesINPUT );
1723
+ else if (nodeIsBUTTON(node))
1724
+ CheckAttrValidity( doc, node, attval, valuesBUTTON );
1725
+ else if (nodeIsUL(node))
1726
+ CheckAttrValidity( doc, node, attval, valuesUL );
1727
+ else if (nodeIsOL(node))
1728
+ {
1729
+ if (!AttrHasValue(attval))
1730
+ {
1731
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1732
+ return;
1733
+ }
1734
+ if (!AttrValueIsAmong(attval, valuesOL))
1735
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1736
+ }
1737
+ else if (nodeIsLI(node))
1738
+ {
1739
+ if (!AttrHasValue(attval))
1740
+ {
1741
+ TY_(ReportAttrError)( doc, node, attval, MISSING_ATTR_VALUE);
1742
+ return;
1743
+ }
1744
+ if (AttrValueIsAmong(attval, valuesUL))
1745
+ CheckLowerCaseAttrValue( doc, node, attval );
1746
+ else if (!AttrValueIsAmong(attval, valuesOL))
1747
+ TY_(ReportAttrError)( doc, node, attval, BAD_ATTRIBUTE_VALUE);
1748
+ }
1749
+ return;
1750
+ }
1751
+
1752
+ static
1753
+ AttVal *SortAttVal( AttVal* list, TidyAttrSortStrategy strat );
1754
+
1755
+ void TY_(SortAttributes)(Node* node, TidyAttrSortStrategy strat)
1756
+ {
1757
+ while (node)
1758
+ {
1759
+ node->attributes = SortAttVal( node->attributes, strat );
1760
+ if (node->content)
1761
+ TY_(SortAttributes)(node->content, strat);
1762
+ node = node->next;
1763
+ }
1764
+ }
1765
+
1766
+ /**
1767
+ * Attribute sorting contributed by Adrian Wilkins, 2007
1768
+ *
1769
+ * Portions copyright Simon Tatham 2001.
1770
+ *
1771
+ * Merge sort algortithm adpated from listsort.c linked from
1772
+ * http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html
1773
+ *
1774
+ * Original copyright notice proceeds below.
1775
+ *
1776
+ * Permission is hereby granted, free of charge, to any person
1777
+ * obtaining a copy of this software and associated documentation
1778
+ * files (the "Software"), to deal in the Software without
1779
+ * restriction, including without limitation the rights to use,
1780
+ * copy, modify, merge, publish, distribute, sublicense, and/or
1781
+ * sell copies of the Software, and to permit persons to whom the
1782
+ * Software is furnished to do so, subject to the following
1783
+ * conditions:
1784
+ *
1785
+ * The above copyright notice and this permission notice shall be
1786
+ * included in all copies or substantial portions of the Software.
1787
+ *
1788
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1789
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
1790
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1791
+ * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
1792
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
1793
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1794
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1795
+ * SOFTWARE.
1796
+ */
1797
+
1798
+ typedef int(*ptAttValComparator)(AttVal *one, AttVal *two);
1799
+
1800
+ /* Comparison function for TidySortAttrAlpha */
1801
+ static
1802
+ int AlphaComparator(AttVal *one, AttVal *two)
1803
+ {
1804
+ return TY_(tmbstrcmp)(one->attribute, two->attribute);
1805
+ }
1806
+
1807
+
1808
+ /* The "factory method" that returns a pointer to the comparator function */
1809
+ static
1810
+ ptAttValComparator GetAttValComparator(TidyAttrSortStrategy strat)
1811
+ {
1812
+ switch (strat)
1813
+ {
1814
+ case TidySortAttrAlpha:
1815
+ return AlphaComparator;
1816
+ case TidySortAttrNone:
1817
+ break;
1818
+ }
1819
+ return 0;
1820
+ }
1821
+
1822
+ /* The sort routine */
1823
+ static
1824
+ AttVal *SortAttVal( AttVal *list, TidyAttrSortStrategy strat)
1825
+ {
1826
+ ptAttValComparator ptComparator = GetAttValComparator(strat);
1827
+ AttVal *p, *q, *e, *tail;
1828
+ int insize, nmerges, psize, qsize, i;
1829
+
1830
+ /*
1831
+ * Silly special case: if `list' was passed in as NULL, return
1832
+ * NULL immediately.
1833
+ */
1834
+ if (!list)
1835
+ return NULL;
1836
+
1837
+ insize = 1;
1838
+
1839
+ while (1) {
1840
+ p = list;
1841
+ list = NULL;
1842
+ tail = NULL;
1843
+
1844
+ nmerges = 0; /* count number of merges we do in this pass */
1845
+
1846
+ while (p) {
1847
+ nmerges++; /* there exists a merge to be done */
1848
+ /* step `insize' places along from p */
1849
+ q = p;
1850
+ psize = 0;
1851
+ for (i = 0; i < insize; i++) {
1852
+ psize++;
1853
+ q = q->next;
1854
+ if(!q) break;
1855
+ }
1856
+
1857
+ /* if q hasn't fallen off end, we have two lists to merge */
1858
+ qsize = insize;
1859
+
1860
+ /* now we have two lists; merge them */
1861
+ while (psize > 0 || (qsize > 0 && q)) {
1862
+
1863
+ /* decide whether next element of merge comes from p or q */
1864
+ if (psize == 0) {
1865
+ /* p is empty; e must come from q. */
1866
+ e = q; q = q->next; qsize--;
1867
+ } else if (qsize == 0 || !q) {
1868
+ /* q is empty; e must come from p. */
1869
+ e = p; p = p->next; psize--;
1870
+ } else if (ptComparator(p,q) <= 0) {
1871
+ /* First element of p is lower (or same);
1872
+ * e must come from p. */
1873
+ e = p; p = p->next; psize--;
1874
+ } else {
1875
+ /* First element of q is lower; e must come from q. */
1876
+ e = q; q = q->next; qsize--;
1877
+ }
1878
+
1879
+ /* add the next element to the merged list */
1880
+ if (tail) {
1881
+ tail->next = e;
1882
+ } else {
1883
+ list = e;
1884
+ }
1885
+
1886
+ tail = e;
1887
+ }
1888
+
1889
+ /* now p has stepped `insize' places along, and q has too */
1890
+ p = q;
1891
+ }
1892
+
1893
+ tail->next = NULL;
1894
+
1895
+ /* If we have done only one merge, we're finished. */
1896
+ if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
1897
+ return list;
1898
+
1899
+ /* Otherwise repeat, merging lists twice the size */
1900
+ insize *= 2;
1901
+ }
1902
+ }
1903
+
1904
+ /*
1905
+ * local variables:
1906
+ * mode: c
1907
+ * indent-tabs-mode: nil
1908
+ * c-basic-offset: 4
1909
+ * eval: (c-set-offset 'substatement-open 0)
1910
+ * end:
1911
+ */