tidy-ext 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/.gitignore +4 -0
  2. data/LICENSE +50 -0
  3. data/README +12 -0
  4. data/Rakefile +60 -0
  5. data/VERSION +1 -0
  6. data/ext/tidy/access.c +3310 -0
  7. data/ext/tidy/access.h +279 -0
  8. data/ext/tidy/alloc.c +107 -0
  9. data/ext/tidy/attrask.c +209 -0
  10. data/ext/tidy/attrdict.c +2398 -0
  11. data/ext/tidy/attrdict.h +122 -0
  12. data/ext/tidy/attrget.c +213 -0
  13. data/ext/tidy/attrs.c +1911 -0
  14. data/ext/tidy/attrs.h +374 -0
  15. data/ext/tidy/buffio.c +232 -0
  16. data/ext/tidy/buffio.h +118 -0
  17. data/ext/tidy/charsets.c +1032 -0
  18. data/ext/tidy/charsets.h +14 -0
  19. data/ext/tidy/clean.c +2674 -0
  20. data/ext/tidy/clean.h +87 -0
  21. data/ext/tidy/config.c +1746 -0
  22. data/ext/tidy/config.h +153 -0
  23. data/ext/tidy/entities.c +419 -0
  24. data/ext/tidy/entities.h +24 -0
  25. data/ext/tidy/extconf.rb +5 -0
  26. data/ext/tidy/fileio.c +106 -0
  27. data/ext/tidy/fileio.h +46 -0
  28. data/ext/tidy/forward.h +69 -0
  29. data/ext/tidy/iconvtc.c +105 -0
  30. data/ext/tidy/iconvtc.h +15 -0
  31. data/ext/tidy/istack.c +373 -0
  32. data/ext/tidy/lexer.c +3825 -0
  33. data/ext/tidy/lexer.h +617 -0
  34. data/ext/tidy/localize.c +1882 -0
  35. data/ext/tidy/mappedio.c +329 -0
  36. data/ext/tidy/mappedio.h +16 -0
  37. data/ext/tidy/message.h +207 -0
  38. data/ext/tidy/parser.c +4408 -0
  39. data/ext/tidy/parser.h +76 -0
  40. data/ext/tidy/platform.h +636 -0
  41. data/ext/tidy/pprint.c +2276 -0
  42. data/ext/tidy/pprint.h +93 -0
  43. data/ext/tidy/ruby-tidy.c +195 -0
  44. data/ext/tidy/streamio.c +1407 -0
  45. data/ext/tidy/streamio.h +222 -0
  46. data/ext/tidy/tagask.c +286 -0
  47. data/ext/tidy/tags.c +955 -0
  48. data/ext/tidy/tags.h +235 -0
  49. data/ext/tidy/tidy-int.h +129 -0
  50. data/ext/tidy/tidy.h +1097 -0
  51. data/ext/tidy/tidyenum.h +622 -0
  52. data/ext/tidy/tidylib.c +1751 -0
  53. data/ext/tidy/tmbstr.c +306 -0
  54. data/ext/tidy/tmbstr.h +92 -0
  55. data/ext/tidy/utf8.c +539 -0
  56. data/ext/tidy/utf8.h +52 -0
  57. data/ext/tidy/version.h +14 -0
  58. data/ext/tidy/win32tc.c +795 -0
  59. data/ext/tidy/win32tc.h +19 -0
  60. data/spec/spec_helper.rb +5 -0
  61. data/spec/tidy/compat_spec.rb +44 -0
  62. data/spec/tidy/remote_uri_spec.rb +14 -0
  63. data/spec/tidy/test1.html +5 -0
  64. data/spec/tidy/tidy_spec.rb +34 -0
  65. metadata +125 -0
data/ext/tidy/tags.c ADDED
@@ -0,0 +1,955 @@
1
+ /* tags.c -- recognize HTML tags
2
+
3
+ (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4
+ See tidy.h for the copyright notice.
5
+
6
+ CVS Info :
7
+
8
+ $Author: hoehrmann $
9
+ $Date: 2008/08/09 11:55:27 $
10
+ $Revision: 1.71 $
11
+
12
+ The HTML tags are stored as 8 bit ASCII strings.
13
+
14
+ */
15
+
16
+ #include "tidy-int.h"
17
+ #include "message.h"
18
+ #include "tmbstr.h"
19
+
20
+ /* Attribute checking methods */
21
+ static CheckAttribs CheckIMG;
22
+ static CheckAttribs CheckLINK;
23
+ static CheckAttribs CheckAREA;
24
+ static CheckAttribs CheckTABLE;
25
+ static CheckAttribs CheckCaption;
26
+ static CheckAttribs CheckSCRIPT;
27
+ static CheckAttribs CheckSTYLE;
28
+ static CheckAttribs CheckHTML;
29
+ static CheckAttribs CheckFORM;
30
+ static CheckAttribs CheckMETA;
31
+
32
+ #define VERS_ELEM_A (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
33
+ #define VERS_ELEM_ABBR (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
34
+ #define VERS_ELEM_ACRONYM (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
35
+ #define VERS_ELEM_ADDRESS (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
36
+ #define VERS_ELEM_APPLET (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
37
+ #define VERS_ELEM_AREA (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
38
+ #define VERS_ELEM_B (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
39
+ #define VERS_ELEM_BASE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
40
+ #define VERS_ELEM_BASEFONT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
41
+ #define VERS_ELEM_BDO (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
42
+ #define VERS_ELEM_BIG (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
43
+ #define VERS_ELEM_BLOCKQUOTE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
44
+ #define VERS_ELEM_BODY (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
45
+ #define VERS_ELEM_BR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
46
+ #define VERS_ELEM_BUTTON (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
47
+ #define VERS_ELEM_CAPTION (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
48
+ #define VERS_ELEM_CENTER (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
49
+ #define VERS_ELEM_CITE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
50
+ #define VERS_ELEM_CODE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
51
+ #define VERS_ELEM_COL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
52
+ #define VERS_ELEM_COLGROUP (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
53
+ #define VERS_ELEM_DD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
54
+ #define VERS_ELEM_DEL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
55
+ #define VERS_ELEM_DFN (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
56
+ #define VERS_ELEM_DIR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
57
+ #define VERS_ELEM_DIV (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
58
+ #define VERS_ELEM_DL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
59
+ #define VERS_ELEM_DT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
60
+ #define VERS_ELEM_EM (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
61
+ #define VERS_ELEM_FIELDSET (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
62
+ #define VERS_ELEM_FONT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
63
+ #define VERS_ELEM_FORM (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
64
+ #define VERS_ELEM_FRAME (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
65
+ #define VERS_ELEM_FRAMESET (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
66
+ #define VERS_ELEM_H1 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
67
+ #define VERS_ELEM_H2 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
68
+ #define VERS_ELEM_H3 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
69
+ #define VERS_ELEM_H4 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
70
+ #define VERS_ELEM_H5 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
71
+ #define VERS_ELEM_H6 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
72
+ #define VERS_ELEM_HEAD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
73
+ #define VERS_ELEM_HR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
74
+ #define VERS_ELEM_HTML (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
75
+ #define VERS_ELEM_I (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
76
+ #define VERS_ELEM_IFRAME (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
77
+ #define VERS_ELEM_IMG (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
78
+ #define VERS_ELEM_INPUT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
79
+ #define VERS_ELEM_INS (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
80
+ #define VERS_ELEM_ISINDEX (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
81
+ #define VERS_ELEM_KBD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
82
+ #define VERS_ELEM_LABEL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
83
+ #define VERS_ELEM_LEGEND (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
84
+ #define VERS_ELEM_LI (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
85
+ #define VERS_ELEM_LINK (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
86
+ #define VERS_ELEM_LISTING (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
87
+ #define VERS_ELEM_MAP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
88
+ #define VERS_ELEM_MENU (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
89
+ #define VERS_ELEM_META (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
90
+ #define VERS_ELEM_NEXTID (HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
91
+ #define VERS_ELEM_NOFRAMES (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
92
+ #define VERS_ELEM_NOSCRIPT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
93
+ #define VERS_ELEM_OBJECT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
94
+ #define VERS_ELEM_OL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
95
+ #define VERS_ELEM_OPTGROUP (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
96
+ #define VERS_ELEM_OPTION (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
97
+ #define VERS_ELEM_P (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
98
+ #define VERS_ELEM_PARAM (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
99
+ #define VERS_ELEM_PLAINTEXT (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
100
+ #define VERS_ELEM_PRE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
101
+ #define VERS_ELEM_Q (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
102
+ #define VERS_ELEM_RB (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
103
+ #define VERS_ELEM_RBC (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
104
+ #define VERS_ELEM_RP (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
105
+ #define VERS_ELEM_RT (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
106
+ #define VERS_ELEM_RTC (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
107
+ #define VERS_ELEM_RUBY (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
108
+ #define VERS_ELEM_S (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
109
+ #define VERS_ELEM_SAMP (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
110
+ #define VERS_ELEM_SCRIPT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
111
+ #define VERS_ELEM_SELECT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
112
+ #define VERS_ELEM_SMALL (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
113
+ #define VERS_ELEM_SPAN (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
114
+ #define VERS_ELEM_STRIKE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
115
+ #define VERS_ELEM_STRONG (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
116
+ #define VERS_ELEM_STYLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
117
+ #define VERS_ELEM_SUB (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
118
+ #define VERS_ELEM_SUP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
119
+ #define VERS_ELEM_TABLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
120
+ #define VERS_ELEM_TBODY (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
121
+ #define VERS_ELEM_TD (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
122
+ #define VERS_ELEM_TEXTAREA (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
123
+ #define VERS_ELEM_TFOOT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
124
+ #define VERS_ELEM_TH (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
125
+ #define VERS_ELEM_THEAD (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
126
+ #define VERS_ELEM_TITLE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
127
+ #define VERS_ELEM_TR (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
128
+ #define VERS_ELEM_TT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
129
+ #define VERS_ELEM_U (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
130
+ #define VERS_ELEM_UL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
131
+ #define VERS_ELEM_VAR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
132
+ #define VERS_ELEM_XMP (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
133
+
134
+ static const Dict tag_defs[] =
135
+ {
136
+ { TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL },
137
+
138
+ /* W3C defined elements */
139
+ { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL },
140
+ { TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL },
141
+ { TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL },
142
+ { TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseInline), NULL },
143
+ { TidyTag_APPLET, "applet", VERS_ELEM_APPLET, &TY_(W3CAttrsFor_APPLET)[0], (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
144
+ { TidyTag_AREA, "area", VERS_ELEM_AREA, &TY_(W3CAttrsFor_AREA)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckAREA },
145
+ { TidyTag_B, "b", VERS_ELEM_B, &TY_(W3CAttrsFor_B)[0], (CM_INLINE), TY_(ParseInline), NULL },
146
+ { TidyTag_BASE, "base", VERS_ELEM_BASE, &TY_(W3CAttrsFor_BASE)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
147
+ { TidyTag_BASEFONT, "basefont", VERS_ELEM_BASEFONT, &TY_(W3CAttrsFor_BASEFONT)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
148
+ { TidyTag_BDO, "bdo", VERS_ELEM_BDO, &TY_(W3CAttrsFor_BDO)[0], (CM_INLINE), TY_(ParseInline), NULL },
149
+ { TidyTag_BIG, "big", VERS_ELEM_BIG, &TY_(W3CAttrsFor_BIG)[0], (CM_INLINE), TY_(ParseInline), NULL },
150
+ { TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
151
+ { TidyTag_BODY, "body", VERS_ELEM_BODY, &TY_(W3CAttrsFor_BODY)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseBody), NULL },
152
+ { TidyTag_BR, "br", VERS_ELEM_BR, &TY_(W3CAttrsFor_BR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
153
+ { TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseBlock), NULL },
154
+ { TidyTag_CAPTION, "caption", VERS_ELEM_CAPTION, &TY_(W3CAttrsFor_CAPTION)[0], (CM_TABLE), TY_(ParseInline), CheckCaption },
155
+ { TidyTag_CENTER, "center", VERS_ELEM_CENTER, &TY_(W3CAttrsFor_CENTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
156
+ { TidyTag_CITE, "cite", VERS_ELEM_CITE, &TY_(W3CAttrsFor_CITE)[0], (CM_INLINE), TY_(ParseInline), NULL },
157
+ { TidyTag_CODE, "code", VERS_ELEM_CODE, &TY_(W3CAttrsFor_CODE)[0], (CM_INLINE), TY_(ParseInline), NULL },
158
+ { TidyTag_COL, "col", VERS_ELEM_COL, &TY_(W3CAttrsFor_COL)[0], (CM_TABLE|CM_EMPTY), TY_(ParseEmpty), NULL },
159
+ { TidyTag_COLGROUP, "colgroup", VERS_ELEM_COLGROUP, &TY_(W3CAttrsFor_COLGROUP)[0], (CM_TABLE|CM_OPT), TY_(ParseColGroup), NULL },
160
+ { TidyTag_DD, "dd", VERS_ELEM_DD, &TY_(W3CAttrsFor_DD)[0], (CM_DEFLIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
161
+ { TidyTag_DEL, "del", VERS_ELEM_DEL, &TY_(W3CAttrsFor_DEL)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL },
162
+ { TidyTag_DFN, "dfn", VERS_ELEM_DFN, &TY_(W3CAttrsFor_DFN)[0], (CM_INLINE), TY_(ParseInline), NULL },
163
+ { TidyTag_DIR, "dir", VERS_ELEM_DIR, &TY_(W3CAttrsFor_DIR)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL },
164
+ { TidyTag_DIV, "div", VERS_ELEM_DIV, &TY_(W3CAttrsFor_DIV)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
165
+ { TidyTag_DL, "dl", VERS_ELEM_DL, &TY_(W3CAttrsFor_DL)[0], (CM_BLOCK), TY_(ParseDefList), NULL },
166
+ { TidyTag_DT, "dt", VERS_ELEM_DT, &TY_(W3CAttrsFor_DT)[0], (CM_DEFLIST|CM_OPT|CM_NO_INDENT), TY_(ParseInline), NULL },
167
+ { TidyTag_EM, "em", VERS_ELEM_EM, &TY_(W3CAttrsFor_EM)[0], (CM_INLINE), TY_(ParseInline), NULL },
168
+ { TidyTag_FIELDSET, "fieldset", VERS_ELEM_FIELDSET, &TY_(W3CAttrsFor_FIELDSET)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
169
+ { TidyTag_FONT, "font", VERS_ELEM_FONT, &TY_(W3CAttrsFor_FONT)[0], (CM_INLINE), TY_(ParseInline), NULL },
170
+ { TidyTag_FORM, "form", VERS_ELEM_FORM, &TY_(W3CAttrsFor_FORM)[0], (CM_BLOCK), TY_(ParseBlock), CheckFORM },
171
+ { TidyTag_FRAME, "frame", VERS_ELEM_FRAME, &TY_(W3CAttrsFor_FRAME)[0], (CM_FRAMES|CM_EMPTY), TY_(ParseEmpty), NULL },
172
+ { TidyTag_FRAMESET, "frameset", VERS_ELEM_FRAMESET, &TY_(W3CAttrsFor_FRAMESET)[0], (CM_HTML|CM_FRAMES), TY_(ParseFrameSet), NULL },
173
+ { TidyTag_H1, "h1", VERS_ELEM_H1, &TY_(W3CAttrsFor_H1)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
174
+ { TidyTag_H2, "h2", VERS_ELEM_H2, &TY_(W3CAttrsFor_H2)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
175
+ { TidyTag_H3, "h3", VERS_ELEM_H3, &TY_(W3CAttrsFor_H3)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
176
+ { TidyTag_H4, "h4", VERS_ELEM_H4, &TY_(W3CAttrsFor_H4)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
177
+ { TidyTag_H5, "h5", VERS_ELEM_H5, &TY_(W3CAttrsFor_H5)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
178
+ { TidyTag_H6, "h6", VERS_ELEM_H6, &TY_(W3CAttrsFor_H6)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
179
+ { TidyTag_HEAD, "head", VERS_ELEM_HEAD, &TY_(W3CAttrsFor_HEAD)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseHead), NULL },
180
+ { TidyTag_HR, "hr", VERS_ELEM_HR, &TY_(W3CAttrsFor_HR)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL },
181
+ { TidyTag_HTML, "html", VERS_ELEM_HTML, &TY_(W3CAttrsFor_HTML)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseHTML), CheckHTML },
182
+ { TidyTag_I, "i", VERS_ELEM_I, &TY_(W3CAttrsFor_I)[0], (CM_INLINE), TY_(ParseInline), NULL },
183
+ { TidyTag_IFRAME, "iframe", VERS_ELEM_IFRAME, &TY_(W3CAttrsFor_IFRAME)[0], (CM_INLINE), TY_(ParseBlock), NULL },
184
+ { TidyTag_IMG, "img", VERS_ELEM_IMG, &TY_(W3CAttrsFor_IMG)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), CheckIMG },
185
+ { TidyTag_INPUT, "input", VERS_ELEM_INPUT, &TY_(W3CAttrsFor_INPUT)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
186
+ { TidyTag_INS, "ins", VERS_ELEM_INS, &TY_(W3CAttrsFor_INS)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL },
187
+ { TidyTag_ISINDEX, "isindex", VERS_ELEM_ISINDEX, &TY_(W3CAttrsFor_ISINDEX)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL },
188
+ { TidyTag_KBD, "kbd", VERS_ELEM_KBD, &TY_(W3CAttrsFor_KBD)[0], (CM_INLINE), TY_(ParseInline), NULL },
189
+ { TidyTag_LABEL, "label", VERS_ELEM_LABEL, &TY_(W3CAttrsFor_LABEL)[0], (CM_INLINE), TY_(ParseInline), NULL },
190
+ { TidyTag_LEGEND, "legend", VERS_ELEM_LEGEND, &TY_(W3CAttrsFor_LEGEND)[0], (CM_INLINE), TY_(ParseInline), NULL },
191
+ { TidyTag_LI, "li", VERS_ELEM_LI, &TY_(W3CAttrsFor_LI)[0], (CM_LIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
192
+ { TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckLINK },
193
+ { TidyTag_LISTING, "listing", VERS_ELEM_LISTING, &TY_(W3CAttrsFor_LISTING)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
194
+ { TidyTag_MAP, "map", VERS_ELEM_MAP, &TY_(W3CAttrsFor_MAP)[0], (CM_INLINE), TY_(ParseBlock), NULL },
195
+ { TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL },
196
+ { TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckMETA },
197
+ { TidyTag_NOFRAMES, "noframes", VERS_ELEM_NOFRAMES, &TY_(W3CAttrsFor_NOFRAMES)[0], (CM_BLOCK|CM_FRAMES), TY_(ParseNoFrames), NULL },
198
+ { TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },
199
+ { TidyTag_OBJECT, "object", VERS_ELEM_OBJECT, &TY_(W3CAttrsFor_OBJECT)[0], (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
200
+ { TidyTag_OL, "ol", VERS_ELEM_OL, &TY_(W3CAttrsFor_OL)[0], (CM_BLOCK), TY_(ParseList), NULL },
201
+ { TidyTag_OPTGROUP, "optgroup", VERS_ELEM_OPTGROUP, &TY_(W3CAttrsFor_OPTGROUP)[0], (CM_FIELD|CM_OPT), TY_(ParseOptGroup), NULL },
202
+ { TidyTag_OPTION, "option", VERS_ELEM_OPTION, &TY_(W3CAttrsFor_OPTION)[0], (CM_FIELD|CM_OPT), TY_(ParseText), NULL },
203
+ { TidyTag_P, "p", VERS_ELEM_P, &TY_(W3CAttrsFor_P)[0], (CM_BLOCK|CM_OPT), TY_(ParseInline), NULL },
204
+ { TidyTag_PARAM, "param", VERS_ELEM_PARAM, &TY_(W3CAttrsFor_PARAM)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
205
+ { TidyTag_PLAINTEXT, "plaintext", VERS_ELEM_PLAINTEXT, &TY_(W3CAttrsFor_PLAINTEXT)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
206
+ { TidyTag_PRE, "pre", VERS_ELEM_PRE, &TY_(W3CAttrsFor_PRE)[0], (CM_BLOCK), TY_(ParsePre), NULL },
207
+ { TidyTag_Q, "q", VERS_ELEM_Q, &TY_(W3CAttrsFor_Q)[0], (CM_INLINE), TY_(ParseInline), NULL },
208
+ { TidyTag_RB, "rb", VERS_ELEM_RB, &TY_(W3CAttrsFor_RB)[0], (CM_INLINE), TY_(ParseInline), NULL },
209
+ { TidyTag_RBC, "rbc", VERS_ELEM_RBC, &TY_(W3CAttrsFor_RBC)[0], (CM_INLINE), TY_(ParseInline), NULL },
210
+ { TidyTag_RP, "rp", VERS_ELEM_RP, &TY_(W3CAttrsFor_RP)[0], (CM_INLINE), TY_(ParseInline), NULL },
211
+ { TidyTag_RT, "rt", VERS_ELEM_RT, &TY_(W3CAttrsFor_RT)[0], (CM_INLINE), TY_(ParseInline), NULL },
212
+ { TidyTag_RTC, "rtc", VERS_ELEM_RTC, &TY_(W3CAttrsFor_RTC)[0], (CM_INLINE), TY_(ParseInline), NULL },
213
+ { TidyTag_RUBY, "ruby", VERS_ELEM_RUBY, &TY_(W3CAttrsFor_RUBY)[0], (CM_INLINE), TY_(ParseInline), NULL },
214
+ { TidyTag_S, "s", VERS_ELEM_S, &TY_(W3CAttrsFor_S)[0], (CM_INLINE), TY_(ParseInline), NULL },
215
+ { TidyTag_SAMP, "samp", VERS_ELEM_SAMP, &TY_(W3CAttrsFor_SAMP)[0], (CM_INLINE), TY_(ParseInline), NULL },
216
+ { TidyTag_SCRIPT, "script", VERS_ELEM_SCRIPT, &TY_(W3CAttrsFor_SCRIPT)[0], (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), CheckSCRIPT },
217
+ { TidyTag_SELECT, "select", VERS_ELEM_SELECT, &TY_(W3CAttrsFor_SELECT)[0], (CM_INLINE|CM_FIELD), TY_(ParseSelect), NULL },
218
+ { TidyTag_SMALL, "small", VERS_ELEM_SMALL, &TY_(W3CAttrsFor_SMALL)[0], (CM_INLINE), TY_(ParseInline), NULL },
219
+ { TidyTag_SPAN, "span", VERS_ELEM_SPAN, &TY_(W3CAttrsFor_SPAN)[0], (CM_INLINE), TY_(ParseInline), NULL },
220
+ { TidyTag_STRIKE, "strike", VERS_ELEM_STRIKE, &TY_(W3CAttrsFor_STRIKE)[0], (CM_INLINE), TY_(ParseInline), NULL },
221
+ { TidyTag_STRONG, "strong", VERS_ELEM_STRONG, &TY_(W3CAttrsFor_STRONG)[0], (CM_INLINE), TY_(ParseInline), NULL },
222
+ { TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD), TY_(ParseScript), CheckSTYLE },
223
+ { TidyTag_SUB, "sub", VERS_ELEM_SUB, &TY_(W3CAttrsFor_SUB)[0], (CM_INLINE), TY_(ParseInline), NULL },
224
+ { TidyTag_SUP, "sup", VERS_ELEM_SUP, &TY_(W3CAttrsFor_SUP)[0], (CM_INLINE), TY_(ParseInline), NULL },
225
+ { TidyTag_TABLE, "table", VERS_ELEM_TABLE, &TY_(W3CAttrsFor_TABLE)[0], (CM_BLOCK), TY_(ParseTableTag), CheckTABLE },
226
+ { TidyTag_TBODY, "tbody", VERS_ELEM_TBODY, &TY_(W3CAttrsFor_TBODY)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL },
227
+ { TidyTag_TD, "td", VERS_ELEM_TD, &TY_(W3CAttrsFor_TD)[0], (CM_ROW|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
228
+ { TidyTag_TEXTAREA, "textarea", VERS_ELEM_TEXTAREA, &TY_(W3CAttrsFor_TEXTAREA)[0], (CM_INLINE|CM_FIELD), TY_(ParseText), NULL },
229
+ { TidyTag_TFOOT, "tfoot", VERS_ELEM_TFOOT, &TY_(W3CAttrsFor_TFOOT)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL },
230
+ { TidyTag_TH, "th", VERS_ELEM_TH, &TY_(W3CAttrsFor_TH)[0], (CM_ROW|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
231
+ { TidyTag_THEAD, "thead", VERS_ELEM_THEAD, &TY_(W3CAttrsFor_THEAD)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL },
232
+ { TidyTag_TITLE, "title", VERS_ELEM_TITLE, &TY_(W3CAttrsFor_TITLE)[0], (CM_HEAD), TY_(ParseTitle), NULL },
233
+ { TidyTag_TR, "tr", VERS_ELEM_TR, &TY_(W3CAttrsFor_TR)[0], (CM_TABLE|CM_OPT), TY_(ParseRow), NULL },
234
+ { TidyTag_TT, "tt", VERS_ELEM_TT, &TY_(W3CAttrsFor_TT)[0], (CM_INLINE), TY_(ParseInline), NULL },
235
+ { TidyTag_U, "u", VERS_ELEM_U, &TY_(W3CAttrsFor_U)[0], (CM_INLINE), TY_(ParseInline), NULL },
236
+ { TidyTag_UL, "ul", VERS_ELEM_UL, &TY_(W3CAttrsFor_UL)[0], (CM_BLOCK), TY_(ParseList), NULL },
237
+ { TidyTag_VAR, "var", VERS_ELEM_VAR, &TY_(W3CAttrsFor_VAR)[0], (CM_INLINE), TY_(ParseInline), NULL },
238
+ { TidyTag_XMP, "xmp", VERS_ELEM_XMP, &TY_(W3CAttrsFor_XMP)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
239
+ { TidyTag_NEXTID, "nextid", VERS_ELEM_NEXTID, &TY_(W3CAttrsFor_NEXTID)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
240
+
241
+ /* proprietary elements */
242
+ { TidyTag_ALIGN, "align", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
243
+ { TidyTag_BGSOUND, "bgsound", VERS_MICROSOFT, NULL, (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
244
+ { TidyTag_BLINK, "blink", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL },
245
+ { TidyTag_COMMENT, "comment", VERS_MICROSOFT, NULL, (CM_INLINE), TY_(ParseInline), NULL },
246
+ { TidyTag_EMBED, "embed", VERS_NETSCAPE, NULL, (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
247
+ { TidyTag_ILAYER, "ilayer", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL },
248
+ { TidyTag_KEYGEN, "keygen", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
249
+ { TidyTag_LAYER, "layer", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
250
+ { TidyTag_MARQUEE, "marquee", VERS_MICROSOFT, NULL, (CM_INLINE|CM_OPT), TY_(ParseInline), NULL },
251
+ { TidyTag_MULTICOL, "multicol", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
252
+ { TidyTag_NOBR, "nobr", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL },
253
+ { TidyTag_NOEMBED, "noembed", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL },
254
+ { TidyTag_NOLAYER, "nolayer", VERS_NETSCAPE, NULL, (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },
255
+ { TidyTag_NOSAVE, "nosave", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
256
+ { TidyTag_SERVER, "server", VERS_NETSCAPE, NULL, (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL },
257
+ { TidyTag_SERVLET, "servlet", VERS_SUN, NULL, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
258
+ { TidyTag_SPACER, "spacer", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
259
+ { TidyTag_WBR, "wbr", VERS_PROPRIETARY, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
260
+
261
+ /* this must be the final entry */
262
+ { (TidyTagId)0, NULL, 0, NULL, (0), NULL, NULL }
263
+ };
264
+
265
+ #if ELEMENT_HASH_LOOKUP
266
+ static uint tagsHash(ctmbstr s)
267
+ {
268
+ uint hashval;
269
+
270
+ for (hashval = 0; *s != '\0'; s++)
271
+ hashval = *s + 31*hashval;
272
+
273
+ return hashval % ELEMENT_HASH_SIZE;
274
+ }
275
+
276
+ static const Dict *tagsInstall(TidyDocImpl* doc, TidyTagImpl* tags, const Dict* old)
277
+ {
278
+ DictHash *np;
279
+ uint hashval;
280
+
281
+ if (old)
282
+ {
283
+ np = (DictHash *)TidyDocAlloc(doc, sizeof(*np));
284
+ np->tag = old;
285
+
286
+ hashval = tagsHash(old->name);
287
+ np->next = tags->hashtab[hashval];
288
+ tags->hashtab[hashval] = np;
289
+ }
290
+
291
+ return old;
292
+ }
293
+
294
+ static void tagsRemoveFromHash( TidyDocImpl* doc, TidyTagImpl* tags, ctmbstr s )
295
+ {
296
+ uint h = tagsHash(s);
297
+ DictHash *p, *prev = NULL;
298
+ for (p = tags->hashtab[h]; p && p->tag; p = p->next)
299
+ {
300
+ if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
301
+ {
302
+ DictHash* next = p->next;
303
+ if ( prev )
304
+ prev->next = next;
305
+ else
306
+ tags->hashtab[h] = next;
307
+ TidyDocFree(doc, p);
308
+ return;
309
+ }
310
+ prev = p;
311
+ }
312
+ }
313
+
314
+ static void tagsEmptyHash( TidyDocImpl* doc, TidyTagImpl* tags )
315
+ {
316
+ uint i;
317
+ DictHash *prev, *next;
318
+
319
+ for (i = 0; i < ELEMENT_HASH_SIZE; ++i)
320
+ {
321
+ prev = NULL;
322
+ next = tags->hashtab[i];
323
+
324
+ while(next)
325
+ {
326
+ prev = next->next;
327
+ TidyDocFree(doc, next);
328
+ next = prev;
329
+ }
330
+
331
+ tags->hashtab[i] = NULL;
332
+ }
333
+ }
334
+ #endif /* ELEMENT_HASH_LOOKUP */
335
+
336
+ static const Dict* tagsLookup( TidyDocImpl* doc, TidyTagImpl* tags, ctmbstr s )
337
+ {
338
+ const Dict *np;
339
+ #if ELEMENT_HASH_LOOKUP
340
+ const DictHash* p;
341
+ #endif
342
+
343
+ if (!s)
344
+ return NULL;
345
+
346
+ #if ELEMENT_HASH_LOOKUP
347
+ /* this breaks if declared elements get changed between two */
348
+ /* parser runs since Tidy would use the cached version rather */
349
+ /* than the new one. */
350
+ /* However, as FreeDeclaredTags() correctly cleans the hash */
351
+ /* this should not be true anymore. */
352
+ for (p = tags->hashtab[tagsHash(s)]; p && p->tag; p = p->next)
353
+ if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
354
+ return p->tag;
355
+
356
+ for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
357
+ if (TY_(tmbstrcmp)(s, np->name) == 0)
358
+ return tagsInstall(doc, tags, np);
359
+
360
+ for (np = tags->declared_tag_list; np; np = np->next)
361
+ if (TY_(tmbstrcmp)(s, np->name) == 0)
362
+ return tagsInstall(doc, tags, np);
363
+ #else
364
+
365
+ for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
366
+ if (TY_(tmbstrcmp)(s, np->name) == 0)
367
+ return np;
368
+
369
+ for (np = tags->declared_tag_list; np; np = np->next)
370
+ if (TY_(tmbstrcmp)(s, np->name) == 0)
371
+ return np;
372
+
373
+ #endif /* ELEMENT_HASH_LOOKUP */
374
+
375
+ return NULL;
376
+ }
377
+
378
+ static Dict* NewDict( TidyDocImpl* doc, ctmbstr name )
379
+ {
380
+ Dict *np = (Dict*) TidyDocAlloc( doc, sizeof(Dict) );
381
+ np->id = TidyTag_UNKNOWN;
382
+ np->name = name ? TY_(tmbstrdup)( doc->allocator, name ) : NULL;
383
+ np->versions = VERS_UNKNOWN;
384
+ np->attrvers = NULL;
385
+ np->model = CM_UNKNOWN;
386
+ np->parser = 0;
387
+ np->chkattrs = 0;
388
+ np->next = NULL;
389
+ return np;
390
+ }
391
+
392
+ static void FreeDict( TidyDocImpl* doc, Dict *d )
393
+ {
394
+ if ( d )
395
+ TidyDocFree( doc, d->name );
396
+ TidyDocFree( doc, d );
397
+ }
398
+
399
+ static void declare( TidyDocImpl* doc, TidyTagImpl* tags,
400
+ ctmbstr name, uint versions, uint model,
401
+ Parser *parser, CheckAttribs *chkattrs )
402
+ {
403
+ if ( name )
404
+ {
405
+ Dict* np = (Dict*) tagsLookup( doc, tags, name );
406
+ if ( np == NULL )
407
+ {
408
+ np = NewDict( doc, name );
409
+ np->next = tags->declared_tag_list;
410
+ tags->declared_tag_list = np;
411
+ }
412
+
413
+ /* Make sure we are not over-writing predefined tags */
414
+ if ( np->id == TidyTag_UNKNOWN )
415
+ {
416
+ np->versions = versions;
417
+ np->model |= model;
418
+ np->parser = parser;
419
+ np->chkattrs = chkattrs;
420
+ np->attrvers = NULL;
421
+ }
422
+ }
423
+ }
424
+
425
+ /* public interface for finding tag by name */
426
+ Bool TY_(FindTag)( TidyDocImpl* doc, Node *node )
427
+ {
428
+ const Dict *np = NULL;
429
+ if ( cfgBool(doc, TidyXmlTags) )
430
+ {
431
+ node->tag = doc->tags.xml_tags;
432
+ return yes;
433
+ }
434
+
435
+ if ( node->element && (np = tagsLookup(doc, &doc->tags, node->element)) )
436
+ {
437
+ node->tag = np;
438
+ return yes;
439
+ }
440
+
441
+ return no;
442
+ }
443
+
444
+ const Dict* TY_(LookupTagDef)( TidyTagId tid )
445
+ {
446
+ const Dict *np;
447
+
448
+ for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np )
449
+ if (np->id == tid)
450
+ return np;
451
+
452
+ return NULL;
453
+ }
454
+
455
+ Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node )
456
+ {
457
+ const Dict* np = tagsLookup( doc, &doc->tags, node->element );
458
+ if ( np )
459
+ return np->parser;
460
+ return NULL;
461
+ }
462
+
463
+ void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name )
464
+ {
465
+ Parser* parser = 0;
466
+ uint cm = CM_UNKNOWN;
467
+ uint vers = VERS_PROPRIETARY;
468
+
469
+ switch (tagType)
470
+ {
471
+ case tagtype_empty:
472
+ cm = CM_EMPTY|CM_NO_INDENT|CM_NEW;
473
+ parser = TY_(ParseBlock);
474
+ break;
475
+
476
+ case tagtype_inline:
477
+ cm = CM_INLINE|CM_NO_INDENT|CM_NEW;
478
+ parser = TY_(ParseInline);
479
+ break;
480
+
481
+ case tagtype_block:
482
+ cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
483
+ parser = TY_(ParseBlock);
484
+ break;
485
+
486
+ case tagtype_pre:
487
+ cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
488
+ parser = TY_(ParsePre);
489
+ break;
490
+
491
+ case tagtype_null:
492
+ break;
493
+ }
494
+ if ( cm && parser )
495
+ declare( doc, &doc->tags, name, vers, cm, parser, 0 );
496
+ }
497
+
498
+ TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc )
499
+ {
500
+ return (TidyIterator) doc->tags.declared_tag_list;
501
+ }
502
+
503
+ ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* ARG_UNUSED(doc),
504
+ UserTagType tagType, TidyIterator* iter )
505
+ {
506
+ ctmbstr name = NULL;
507
+ Dict* curr;
508
+ for ( curr = (Dict*) *iter; name == NULL && curr != NULL; curr = curr->next )
509
+ {
510
+ switch ( tagType )
511
+ {
512
+ case tagtype_empty:
513
+ if ( (curr->model & CM_EMPTY) != 0 )
514
+ name = curr->name;
515
+ break;
516
+
517
+ case tagtype_inline:
518
+ if ( (curr->model & CM_INLINE) != 0 )
519
+ name = curr->name;
520
+ break;
521
+
522
+ case tagtype_block:
523
+ if ( (curr->model & CM_BLOCK) != 0 &&
524
+ curr->parser == TY_(ParseBlock) )
525
+ name = curr->name;
526
+ break;
527
+
528
+ case tagtype_pre:
529
+ if ( (curr->model & CM_BLOCK) != 0 &&
530
+ curr->parser == TY_(ParsePre) )
531
+ name = curr->name;
532
+ break;
533
+
534
+ case tagtype_null:
535
+ break;
536
+ }
537
+ }
538
+ *iter = (TidyIterator) curr;
539
+ return name;
540
+ }
541
+
542
+ void TY_(InitTags)( TidyDocImpl* doc )
543
+ {
544
+ Dict* xml;
545
+ TidyTagImpl* tags = &doc->tags;
546
+
547
+ TidyClearMemory( tags, sizeof(TidyTagImpl) );
548
+
549
+ /* create dummy entry for all xml tags */
550
+ xml = NewDict( doc, NULL );
551
+ xml->versions = VERS_XML;
552
+ xml->model = CM_BLOCK;
553
+ xml->parser = 0;
554
+ xml->chkattrs = 0;
555
+ xml->attrvers = NULL;
556
+ tags->xml_tags = xml;
557
+ }
558
+
559
+ /* By default, zap all of them. But allow
560
+ ** an single type to be specified.
561
+ */
562
+ void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType )
563
+ {
564
+ TidyTagImpl* tags = &doc->tags;
565
+ Dict *curr, *next = NULL, *prev = NULL;
566
+
567
+ for ( curr=tags->declared_tag_list; curr; curr = next )
568
+ {
569
+ Bool deleteIt = yes;
570
+ next = curr->next;
571
+ switch ( tagType )
572
+ {
573
+ case tagtype_empty:
574
+ deleteIt = ( curr->model & CM_EMPTY ) != 0;
575
+ break;
576
+
577
+ case tagtype_inline:
578
+ deleteIt = ( curr->model & CM_INLINE ) != 0;
579
+ break;
580
+
581
+ case tagtype_block:
582
+ deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
583
+ curr->parser == TY_(ParseBlock) );
584
+ break;
585
+
586
+ case tagtype_pre:
587
+ deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
588
+ curr->parser == TY_(ParsePre) );
589
+ break;
590
+
591
+ case tagtype_null:
592
+ break;
593
+ }
594
+
595
+ if ( deleteIt )
596
+ {
597
+ #if ELEMENT_HASH_LOOKUP
598
+ tagsRemoveFromHash( doc, &doc->tags, curr->name );
599
+ #endif
600
+ FreeDict( doc, curr );
601
+ if ( prev )
602
+ prev->next = next;
603
+ else
604
+ tags->declared_tag_list = next;
605
+ }
606
+ else
607
+ prev = curr;
608
+ }
609
+ }
610
+
611
+ void TY_(FreeTags)( TidyDocImpl* doc )
612
+ {
613
+ TidyTagImpl* tags = &doc->tags;
614
+
615
+ #if ELEMENT_HASH_LOOKUP
616
+ tagsEmptyHash( doc, tags );
617
+ #endif
618
+ TY_(FreeDeclaredTags)( doc, tagtype_null );
619
+ FreeDict( doc, tags->xml_tags );
620
+
621
+ /* get rid of dangling tag references */
622
+ TidyClearMemory( tags, sizeof(TidyTagImpl) );
623
+ }
624
+
625
+
626
+ /* default method for checking an element's attributes */
627
+ void TY_(CheckAttributes)( TidyDocImpl* doc, Node *node )
628
+ {
629
+ AttVal *next, *attval = node->attributes;
630
+ while (attval)
631
+ {
632
+ next = attval->next;
633
+ TY_(CheckAttribute)( doc, node, attval );
634
+ attval = next;
635
+ }
636
+ }
637
+
638
+ /* methods for checking attributes for specific elements */
639
+
640
+ void CheckIMG( TidyDocImpl* doc, Node *node )
641
+ {
642
+ Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
643
+ Bool HasSrc = TY_(AttrGetById)(node, TidyAttr_SRC) != NULL;
644
+ Bool HasUseMap = TY_(AttrGetById)(node, TidyAttr_USEMAP) != NULL;
645
+ Bool HasIsMap = TY_(AttrGetById)(node, TidyAttr_ISMAP) != NULL;
646
+ Bool HasDataFld = TY_(AttrGetById)(node, TidyAttr_DATAFLD) != NULL;
647
+
648
+ TY_(CheckAttributes)(doc, node);
649
+
650
+ if ( !HasAlt )
651
+ {
652
+ if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
653
+ {
654
+ doc->badAccess |= BA_MISSING_IMAGE_ALT;
655
+ TY_(ReportMissingAttr)( doc, node, "alt" );
656
+ }
657
+
658
+ if ( cfgStr(doc, TidyAltText) )
659
+ TY_(AddAttribute)( doc, node, "alt", cfgStr(doc, TidyAltText) );
660
+ }
661
+
662
+ if ( !HasSrc && !HasDataFld )
663
+ TY_(ReportMissingAttr)( doc, node, "src" );
664
+
665
+ if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
666
+ {
667
+ if ( HasIsMap && !HasUseMap )
668
+ TY_(ReportAttrError)( doc, node, NULL, MISSING_IMAGEMAP);
669
+ }
670
+ }
671
+
672
+ void CheckCaption(TidyDocImpl* doc, Node *node)
673
+ {
674
+ AttVal *attval;
675
+
676
+ TY_(CheckAttributes)(doc, node);
677
+
678
+ attval = TY_(AttrGetById)(node, TidyAttr_ALIGN);
679
+
680
+ if (!AttrHasValue(attval))
681
+ return;
682
+
683
+ if (AttrValueIs(attval, "left") || AttrValueIs(attval, "right"))
684
+ TY_(ConstrainVersion)(doc, VERS_HTML40_LOOSE);
685
+ else if (AttrValueIs(attval, "top") || AttrValueIs(attval, "bottom"))
686
+ TY_(ConstrainVersion)(doc, ~(VERS_HTML20|VERS_HTML32));
687
+ else
688
+ TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE);
689
+ }
690
+
691
+ void CheckHTML( TidyDocImpl* doc, Node *node )
692
+ {
693
+ TY_(CheckAttributes)(doc, node);
694
+ }
695
+
696
+ void CheckAREA( TidyDocImpl* doc, Node *node )
697
+ {
698
+ Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
699
+ Bool HasHref = TY_(AttrGetById)(node, TidyAttr_HREF) != NULL;
700
+ Bool HasNohref = TY_(AttrGetById)(node, TidyAttr_NOHREF) != NULL;
701
+
702
+ TY_(CheckAttributes)(doc, node);
703
+
704
+ if ( !HasAlt )
705
+ {
706
+ if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
707
+ {
708
+ doc->badAccess |= BA_MISSING_LINK_ALT;
709
+ TY_(ReportMissingAttr)( doc, node, "alt" );
710
+ }
711
+ }
712
+
713
+ if ( !HasHref && !HasNohref )
714
+ TY_(ReportMissingAttr)( doc, node, "href" );
715
+ }
716
+
717
+ void CheckTABLE( TidyDocImpl* doc, Node *node )
718
+ {
719
+ AttVal* attval;
720
+ Bool HasSummary = TY_(AttrGetById)(node, TidyAttr_SUMMARY) != NULL;
721
+
722
+ TY_(CheckAttributes)(doc, node);
723
+
724
+ /* a missing summary attribute is bad accessibility, no matter
725
+ what HTML version is involved; a document without is valid */
726
+ if (cfg(doc, TidyAccessibilityCheckLevel) == 0)
727
+ {
728
+ if (!HasSummary)
729
+ {
730
+ doc->badAccess |= BA_MISSING_SUMMARY;
731
+ TY_(ReportMissingAttr)( doc, node, "summary");
732
+ }
733
+ }
734
+
735
+ /* convert <table border> to <table border="1"> */
736
+ if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) )
737
+ {
738
+ if (attval->value == NULL)
739
+ attval->value = TY_(tmbstrdup)(doc->allocator, "1");
740
+ }
741
+ }
742
+
743
+ /* add missing type attribute when appropriate */
744
+ void CheckSCRIPT( TidyDocImpl* doc, Node *node )
745
+ {
746
+ AttVal *lang, *type;
747
+ char buf[16];
748
+
749
+ TY_(CheckAttributes)(doc, node);
750
+
751
+ lang = TY_(AttrGetById)(node, TidyAttr_LANGUAGE);
752
+ type = TY_(AttrGetById)(node, TidyAttr_TYPE);
753
+
754
+ if (!type)
755
+ {
756
+ /* check for javascript */
757
+ if (lang)
758
+ {
759
+ /* Test #696799. lang->value can be NULL. */
760
+ buf[0] = '\0';
761
+ TY_(tmbstrncpy)(buf, lang->value, sizeof(buf));
762
+ buf[10] = '\0';
763
+
764
+ if (TY_(tmbstrncasecmp)(buf, "javascript", 10) == 0 ||
765
+ TY_(tmbstrncasecmp)(buf, "jscript", 7) == 0)
766
+ {
767
+ TY_(AddAttribute)(doc, node, "type", "text/javascript");
768
+ }
769
+ else if (TY_(tmbstrcasecmp)(buf, "vbscript") == 0)
770
+ {
771
+ /* per Randy Waki 8/6/01 */
772
+ TY_(AddAttribute)(doc, node, "type", "text/vbscript");
773
+ }
774
+ }
775
+ else
776
+ {
777
+ TY_(AddAttribute)(doc, node, "type", "text/javascript");
778
+ }
779
+
780
+ type = TY_(AttrGetById)(node, TidyAttr_TYPE);
781
+
782
+ if (type != NULL)
783
+ {
784
+ TY_(ReportAttrError)(doc, node, type, INSERTING_ATTRIBUTE);
785
+ }
786
+ else
787
+ {
788
+ TY_(ReportMissingAttr)(doc, node, "type");
789
+ }
790
+ }
791
+ }
792
+
793
+
794
+ /* add missing type attribute when appropriate */
795
+ void CheckSTYLE( TidyDocImpl* doc, Node *node )
796
+ {
797
+ AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
798
+
799
+ TY_(CheckAttributes)( doc, node );
800
+
801
+ if ( !type || !type->value || !TY_(tmbstrlen)(type->value) )
802
+ {
803
+ type = TY_(RepairAttrValue)(doc, node, "type", "text/css");
804
+ TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
805
+ }
806
+ }
807
+
808
+ /* add missing type attribute when appropriate */
809
+ void CheckLINK( TidyDocImpl* doc, Node *node )
810
+ {
811
+ AttVal *rel = TY_(AttrGetById)(node, TidyAttr_REL);
812
+
813
+ TY_(CheckAttributes)( doc, node );
814
+
815
+ /* todo: <link rel="alternate stylesheet"> */
816
+ if (AttrValueIs(rel, "stylesheet"))
817
+ {
818
+ AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
819
+ if (!type)
820
+ {
821
+ TY_(AddAttribute)( doc, node, "type", "text/css" );
822
+ type = TY_(AttrGetById)(node, TidyAttr_TYPE);
823
+ TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
824
+ }
825
+ }
826
+ }
827
+
828
+ /* reports missing action attribute */
829
+ void CheckFORM( TidyDocImpl* doc, Node *node )
830
+ {
831
+ AttVal *action = TY_(AttrGetById)(node, TidyAttr_ACTION);
832
+
833
+ TY_(CheckAttributes)(doc, node);
834
+
835
+ if (!action)
836
+ TY_(ReportMissingAttr)(doc, node, "action");
837
+ }
838
+
839
+ /* reports missing content attribute */
840
+ void CheckMETA( TidyDocImpl* doc, Node *node )
841
+ {
842
+ AttVal *content = TY_(AttrGetById)(node, TidyAttr_CONTENT);
843
+
844
+ TY_(CheckAttributes)(doc, node);
845
+
846
+ if (!content)
847
+ TY_(ReportMissingAttr)( doc, node, "content" );
848
+ /* name or http-equiv attribute must also be set */
849
+ }
850
+
851
+
852
+ Bool TY_(nodeIsText)( Node* node )
853
+ {
854
+ return ( node && node->type == TextNode );
855
+ }
856
+
857
+ Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node )
858
+ {
859
+ if ( doc && node )
860
+ {
861
+ uint ix;
862
+ Lexer* lexer = doc->lexer;
863
+ for ( ix = node->start; ix < node->end; ++ix )
864
+ {
865
+ /* whitespace */
866
+ if ( !TY_(IsWhite)( lexer->lexbuf[ix] ) )
867
+ return yes;
868
+ }
869
+ }
870
+ return no;
871
+ }
872
+
873
+ Bool TY_(nodeIsElement)( Node* node )
874
+ {
875
+ return ( node &&
876
+ (node->type == StartTag || node->type == StartEndTag) );
877
+ }
878
+
879
+ #if 0
880
+ /* Compare & result to operand. If equal, then all bits
881
+ ** requested are set.
882
+ */
883
+ Bool nodeMatchCM( Node* node, uint contentModel )
884
+ {
885
+ return ( node && node->tag &&
886
+ (node->tag->model & contentModel) == contentModel );
887
+ }
888
+ #endif
889
+
890
+ /* True if any of the bits requested are set.
891
+ */
892
+ Bool TY_(nodeHasCM)( Node* node, uint contentModel )
893
+ {
894
+ return ( node && node->tag &&
895
+ (node->tag->model & contentModel) != 0 );
896
+ }
897
+
898
+ Bool TY_(nodeCMIsBlock)( Node* node )
899
+ {
900
+ return TY_(nodeHasCM)( node, CM_BLOCK );
901
+ }
902
+ Bool TY_(nodeCMIsInline)( Node* node )
903
+ {
904
+ return TY_(nodeHasCM)( node, CM_INLINE );
905
+ }
906
+ Bool TY_(nodeCMIsEmpty)( Node* node )
907
+ {
908
+ return TY_(nodeHasCM)( node, CM_EMPTY );
909
+ }
910
+
911
+ Bool TY_(nodeIsHeader)( Node* node )
912
+ {
913
+ TidyTagId tid = TagId( node );
914
+ return ( tid && (
915
+ tid == TidyTag_H1 ||
916
+ tid == TidyTag_H2 ||
917
+ tid == TidyTag_H3 ||
918
+ tid == TidyTag_H4 ||
919
+ tid == TidyTag_H5 ||
920
+ tid == TidyTag_H6 ));
921
+ }
922
+
923
+ uint TY_(nodeHeaderLevel)( Node* node )
924
+ {
925
+ TidyTagId tid = TagId( node );
926
+ switch ( tid )
927
+ {
928
+ case TidyTag_H1:
929
+ return 1;
930
+ case TidyTag_H2:
931
+ return 2;
932
+ case TidyTag_H3:
933
+ return 3;
934
+ case TidyTag_H4:
935
+ return 4;
936
+ case TidyTag_H5:
937
+ return 5;
938
+ case TidyTag_H6:
939
+ return 6;
940
+ default:
941
+ {
942
+ /* fall through */
943
+ }
944
+ }
945
+ return 0;
946
+ }
947
+
948
+ /*
949
+ * local variables:
950
+ * mode: c
951
+ * indent-tabs-mode: nil
952
+ * c-basic-offset: 4
953
+ * eval: (c-set-offset 'substatement-open 0)
954
+ * end:
955
+ */