mandoc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +7 -0
  3. data/COPYING +674 -0
  4. data/README.md +117 -0
  5. data/ext/mandoc/extconf.rb +59 -0
  6. data/ext/mandoc/rb_mandoc.c +548 -0
  7. data/ext/mandoc/rb_mandoc.h +22 -0
  8. data/lib/mandoc/version.rb +19 -0
  9. data/lib/mandoc.rb +26 -0
  10. data/mandoc-1.14.6/LICENSE +55 -0
  11. data/mandoc-1.14.6/arch.c +54 -0
  12. data/mandoc-1.14.6/att.c +49 -0
  13. data/mandoc-1.14.6/catman.c +260 -0
  14. data/mandoc-1.14.6/cgi.c +1279 -0
  15. data/mandoc-1.14.6/chars.c +507 -0
  16. data/mandoc-1.14.6/compat_err.c +103 -0
  17. data/mandoc-1.14.6/compat_fts.c +696 -0
  18. data/mandoc-1.14.6/compat_fts.h +106 -0
  19. data/mandoc-1.14.6/compat_getline.c +59 -0
  20. data/mandoc-1.14.6/compat_getsubopt.c +87 -0
  21. data/mandoc-1.14.6/compat_isblank.c +23 -0
  22. data/mandoc-1.14.6/compat_mkdtemp.c +50 -0
  23. data/mandoc-1.14.6/compat_mkstemps.c +63 -0
  24. data/mandoc-1.14.6/compat_ohash.c +330 -0
  25. data/mandoc-1.14.6/compat_ohash.h +72 -0
  26. data/mandoc-1.14.6/compat_progname.c +31 -0
  27. data/mandoc-1.14.6/compat_reallocarray.c +40 -0
  28. data/mandoc-1.14.6/compat_recallocarray.c +99 -0
  29. data/mandoc-1.14.6/compat_strcasestr.c +64 -0
  30. data/mandoc-1.14.6/compat_stringlist.c +135 -0
  31. data/mandoc-1.14.6/compat_stringlist.h +48 -0
  32. data/mandoc-1.14.6/compat_strlcat.c +57 -0
  33. data/mandoc-1.14.6/compat_strlcpy.c +52 -0
  34. data/mandoc-1.14.6/compat_strndup.c +42 -0
  35. data/mandoc-1.14.6/compat_strsep.c +70 -0
  36. data/mandoc-1.14.6/compat_strtonum.c +67 -0
  37. data/mandoc-1.14.6/compat_vasprintf.c +47 -0
  38. data/mandoc-1.14.6/config.h +52 -0
  39. data/mandoc-1.14.6/dba.c +508 -0
  40. data/mandoc-1.14.6/dba.h +50 -0
  41. data/mandoc-1.14.6/dba_array.c +190 -0
  42. data/mandoc-1.14.6/dba_array.h +47 -0
  43. data/mandoc-1.14.6/dba_read.c +74 -0
  44. data/mandoc-1.14.6/dba_write.c +127 -0
  45. data/mandoc-1.14.6/dba_write.h +30 -0
  46. data/mandoc-1.14.6/dbm.c +480 -0
  47. data/mandoc-1.14.6/dbm.h +68 -0
  48. data/mandoc-1.14.6/dbm_map.c +194 -0
  49. data/mandoc-1.14.6/dbm_map.h +29 -0
  50. data/mandoc-1.14.6/demandoc.c +260 -0
  51. data/mandoc-1.14.6/eqn.c +1132 -0
  52. data/mandoc-1.14.6/eqn.h +72 -0
  53. data/mandoc-1.14.6/eqn_html.c +246 -0
  54. data/mandoc-1.14.6/eqn_parse.h +48 -0
  55. data/mandoc-1.14.6/eqn_term.c +174 -0
  56. data/mandoc-1.14.6/html.c +1102 -0
  57. data/mandoc-1.14.6/html.h +142 -0
  58. data/mandoc-1.14.6/lib.c +35 -0
  59. data/mandoc-1.14.6/libman.h +42 -0
  60. data/mandoc-1.14.6/libmandoc.h +85 -0
  61. data/mandoc-1.14.6/libmdoc.h +87 -0
  62. data/mandoc-1.14.6/main.c +1375 -0
  63. data/mandoc-1.14.6/main.h +53 -0
  64. data/mandoc-1.14.6/man.c +345 -0
  65. data/mandoc-1.14.6/man.h +21 -0
  66. data/mandoc-1.14.6/man_html.c +640 -0
  67. data/mandoc-1.14.6/man_macro.c +470 -0
  68. data/mandoc-1.14.6/man_term.c +1143 -0
  69. data/mandoc-1.14.6/man_validate.c +660 -0
  70. data/mandoc-1.14.6/manconf.h +58 -0
  71. data/mandoc-1.14.6/mandoc.c +669 -0
  72. data/mandoc-1.14.6/mandoc.h +329 -0
  73. data/mandoc-1.14.6/mandoc_aux.c +118 -0
  74. data/mandoc-1.14.6/mandoc_aux.h +27 -0
  75. data/mandoc-1.14.6/mandoc_msg.c +375 -0
  76. data/mandoc-1.14.6/mandoc_ohash.c +65 -0
  77. data/mandoc-1.14.6/mandoc_ohash.h +23 -0
  78. data/mandoc-1.14.6/mandoc_parse.h +44 -0
  79. data/mandoc-1.14.6/mandoc_xr.c +123 -0
  80. data/mandoc-1.14.6/mandoc_xr.h +31 -0
  81. data/mandoc-1.14.6/mandocd.c +282 -0
  82. data/mandoc-1.14.6/mandocdb.c +2448 -0
  83. data/mandoc-1.14.6/manpath.c +363 -0
  84. data/mandoc-1.14.6/mansearch.c +851 -0
  85. data/mandoc-1.14.6/mansearch.h +118 -0
  86. data/mandoc-1.14.6/mdoc.c +433 -0
  87. data/mandoc-1.14.6/mdoc.h +158 -0
  88. data/mandoc-1.14.6/mdoc_argv.c +682 -0
  89. data/mandoc-1.14.6/mdoc_html.c +1762 -0
  90. data/mandoc-1.14.6/mdoc_macro.c +1600 -0
  91. data/mandoc-1.14.6/mdoc_man.c +1850 -0
  92. data/mandoc-1.14.6/mdoc_markdown.c +1610 -0
  93. data/mandoc-1.14.6/mdoc_state.c +256 -0
  94. data/mandoc-1.14.6/mdoc_term.c +1964 -0
  95. data/mandoc-1.14.6/mdoc_validate.c +3062 -0
  96. data/mandoc-1.14.6/msec.c +37 -0
  97. data/mandoc-1.14.6/out.c +544 -0
  98. data/mandoc-1.14.6/out.h +70 -0
  99. data/mandoc-1.14.6/preconv.c +179 -0
  100. data/mandoc-1.14.6/read.c +732 -0
  101. data/mandoc-1.14.6/roff.c +4390 -0
  102. data/mandoc-1.14.6/roff.h +561 -0
  103. data/mandoc-1.14.6/roff_html.c +119 -0
  104. data/mandoc-1.14.6/roff_int.h +94 -0
  105. data/mandoc-1.14.6/roff_term.c +266 -0
  106. data/mandoc-1.14.6/roff_validate.c +151 -0
  107. data/mandoc-1.14.6/soelim.c +182 -0
  108. data/mandoc-1.14.6/st.c +82 -0
  109. data/mandoc-1.14.6/tag.c +327 -0
  110. data/mandoc-1.14.6/tag.h +35 -0
  111. data/mandoc-1.14.6/tbl.c +183 -0
  112. data/mandoc-1.14.6/tbl.h +121 -0
  113. data/mandoc-1.14.6/tbl_data.c +323 -0
  114. data/mandoc-1.14.6/tbl_html.c +293 -0
  115. data/mandoc-1.14.6/tbl_int.h +47 -0
  116. data/mandoc-1.14.6/tbl_layout.c +376 -0
  117. data/mandoc-1.14.6/tbl_opts.c +173 -0
  118. data/mandoc-1.14.6/tbl_parse.h +30 -0
  119. data/mandoc-1.14.6/tbl_term.c +948 -0
  120. data/mandoc-1.14.6/term.c +1113 -0
  121. data/mandoc-1.14.6/term.h +158 -0
  122. data/mandoc-1.14.6/term_ascii.c +424 -0
  123. data/mandoc-1.14.6/term_ps.c +1362 -0
  124. data/mandoc-1.14.6/term_tab.c +130 -0
  125. data/mandoc-1.14.6/term_tag.c +227 -0
  126. data/mandoc-1.14.6/term_tag.h +34 -0
  127. data/mandoc-1.14.6/tree.c +536 -0
  128. metadata +170 -0
@@ -0,0 +1,1102 @@
1
+ /* $Id: html.c,v 1.275 2021/09/09 14:47:24 schwarze Exp $ */
2
+ /*
3
+ * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4
+ * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
5
+ *
6
+ * Permission to use, copy, modify, and distribute this software for any
7
+ * purpose with or without fee is hereby granted, provided that the above
8
+ * copyright notice and this permission notice appear in all copies.
9
+ *
10
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
+ *
18
+ * Common functions for mandoc(1) HTML formatters.
19
+ * For use by individual formatters and by the main program.
20
+ */
21
+ #include "config.h"
22
+
23
+ #include <sys/types.h>
24
+ #include <sys/stat.h>
25
+
26
+ #include <assert.h>
27
+ #include <ctype.h>
28
+ #include <stdarg.h>
29
+ #include <stddef.h>
30
+ #include <stdio.h>
31
+ #include <stdint.h>
32
+ #include <stdlib.h>
33
+ #include <string.h>
34
+ #include <unistd.h>
35
+
36
+ #include "mandoc_aux.h"
37
+ #include "mandoc_ohash.h"
38
+ #include "mandoc.h"
39
+ #include "roff.h"
40
+ #include "out.h"
41
+ #include "html.h"
42
+ #include "manconf.h"
43
+ #include "main.h"
44
+
45
+ struct htmldata {
46
+ const char *name;
47
+ int flags;
48
+ #define HTML_INPHRASE (1 << 0) /* Can appear in phrasing context. */
49
+ #define HTML_TOPHRASE (1 << 1) /* Establishes phrasing context. */
50
+ #define HTML_NOSTACK (1 << 2) /* Does not have an end tag. */
51
+ #define HTML_NLBEFORE (1 << 3) /* Output line break before opening. */
52
+ #define HTML_NLBEGIN (1 << 4) /* Output line break after opening. */
53
+ #define HTML_NLEND (1 << 5) /* Output line break before closing. */
54
+ #define HTML_NLAFTER (1 << 6) /* Output line break after closing. */
55
+ #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
56
+ #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
57
+ #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
58
+ #define HTML_INDENT (1 << 7) /* Indent content by two spaces. */
59
+ #define HTML_NOINDENT (1 << 8) /* Exception: never indent content. */
60
+ };
61
+
62
+ static const struct htmldata htmltags[TAG_MAX] = {
63
+ {"html", HTML_NLALL},
64
+ {"head", HTML_NLALL | HTML_INDENT},
65
+ {"meta", HTML_NOSTACK | HTML_NLALL},
66
+ {"link", HTML_NOSTACK | HTML_NLALL},
67
+ {"style", HTML_NLALL | HTML_INDENT},
68
+ {"title", HTML_NLAROUND},
69
+ {"body", HTML_NLALL},
70
+ {"div", HTML_NLAROUND},
71
+ {"section", HTML_NLALL},
72
+ {"table", HTML_NLALL | HTML_INDENT},
73
+ {"tr", HTML_NLALL | HTML_INDENT},
74
+ {"td", HTML_NLAROUND},
75
+ {"li", HTML_NLAROUND | HTML_INDENT},
76
+ {"ul", HTML_NLALL | HTML_INDENT},
77
+ {"ol", HTML_NLALL | HTML_INDENT},
78
+ {"dl", HTML_NLALL | HTML_INDENT},
79
+ {"dt", HTML_NLAROUND},
80
+ {"dd", HTML_NLAROUND | HTML_INDENT},
81
+ {"h1", HTML_TOPHRASE | HTML_NLAROUND},
82
+ {"h2", HTML_TOPHRASE | HTML_NLAROUND},
83
+ {"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
84
+ {"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
85
+ {"a", HTML_INPHRASE | HTML_TOPHRASE},
86
+ {"b", HTML_INPHRASE | HTML_TOPHRASE},
87
+ {"cite", HTML_INPHRASE | HTML_TOPHRASE},
88
+ {"code", HTML_INPHRASE | HTML_TOPHRASE},
89
+ {"i", HTML_INPHRASE | HTML_TOPHRASE},
90
+ {"small", HTML_INPHRASE | HTML_TOPHRASE},
91
+ {"span", HTML_INPHRASE | HTML_TOPHRASE},
92
+ {"var", HTML_INPHRASE | HTML_TOPHRASE},
93
+ {"br", HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
94
+ {"hr", HTML_INPHRASE | HTML_NOSTACK},
95
+ {"mark", HTML_INPHRASE },
96
+ {"math", HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
97
+ {"mrow", 0},
98
+ {"mi", 0},
99
+ {"mn", 0},
100
+ {"mo", 0},
101
+ {"msup", 0},
102
+ {"msub", 0},
103
+ {"msubsup", 0},
104
+ {"mfrac", 0},
105
+ {"msqrt", 0},
106
+ {"mfenced", 0},
107
+ {"mtable", 0},
108
+ {"mtr", 0},
109
+ {"mtd", 0},
110
+ {"munderover", 0},
111
+ {"munder", 0},
112
+ {"mover", 0},
113
+ };
114
+
115
+ /* Avoid duplicate HTML id= attributes. */
116
+
117
+ struct id_entry {
118
+ int ord; /* Ordinal number of the latest occurrence. */
119
+ char id[]; /* The id= attribute without any ordinal suffix. */
120
+ };
121
+ static struct ohash id_unique;
122
+
123
+ static void html_reset_internal(struct html *);
124
+ static void print_byte(struct html *, char);
125
+ static void print_endword(struct html *);
126
+ static void print_indent(struct html *);
127
+ static void print_word(struct html *, const char *);
128
+
129
+ static void print_ctag(struct html *, struct tag *);
130
+ static int print_escape(struct html *, char);
131
+ static int print_encode(struct html *, const char *, const char *, int);
132
+ static void print_href(struct html *, const char *, const char *, int);
133
+ static void print_metaf(struct html *);
134
+
135
+
136
+ void *
137
+ html_alloc(const struct manoutput *outopts)
138
+ {
139
+ struct html *h;
140
+
141
+ h = mandoc_calloc(1, sizeof(struct html));
142
+
143
+ h->tag = NULL;
144
+ h->metac = h->metal = ESCAPE_FONTROMAN;
145
+ h->style = outopts->style;
146
+ if ((h->base_man1 = outopts->man) == NULL)
147
+ h->base_man2 = NULL;
148
+ else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
149
+ *h->base_man2++ = '\0';
150
+ h->base_includes = outopts->includes;
151
+ if (outopts->fragment)
152
+ h->oflags |= HTML_FRAGMENT;
153
+ if (outopts->toc)
154
+ h->oflags |= HTML_TOC;
155
+
156
+ mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
157
+
158
+ return h;
159
+ }
160
+
161
+ static void
162
+ html_reset_internal(struct html *h)
163
+ {
164
+ struct tag *tag;
165
+ struct id_entry *entry;
166
+ unsigned int slot;
167
+
168
+ while ((tag = h->tag) != NULL) {
169
+ h->tag = tag->next;
170
+ free(tag);
171
+ }
172
+ entry = ohash_first(&id_unique, &slot);
173
+ while (entry != NULL) {
174
+ free(entry);
175
+ entry = ohash_next(&id_unique, &slot);
176
+ }
177
+ ohash_delete(&id_unique);
178
+ }
179
+
180
+ void
181
+ html_reset(void *p)
182
+ {
183
+ html_reset_internal(p);
184
+ mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
185
+ }
186
+
187
+ void
188
+ html_free(void *p)
189
+ {
190
+ html_reset_internal(p);
191
+ free(p);
192
+ }
193
+
194
+ void
195
+ print_gen_head(struct html *h)
196
+ {
197
+ struct tag *t;
198
+
199
+ print_otag(h, TAG_META, "?", "charset", "utf-8");
200
+ print_otag(h, TAG_META, "??", "name", "viewport",
201
+ "content", "width=device-width, initial-scale=1.0");
202
+ if (h->style != NULL) {
203
+ print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
204
+ h->style, "type", "text/css", "media", "all");
205
+ return;
206
+ }
207
+
208
+ /*
209
+ * Print a minimal embedded style sheet.
210
+ */
211
+
212
+ t = print_otag(h, TAG_STYLE, "");
213
+ print_text(h, "table.head, table.foot { width: 100%; }");
214
+ print_endline(h);
215
+ print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
216
+ print_endline(h);
217
+ print_text(h, "td.head-vol { text-align: center; }");
218
+ print_endline(h);
219
+ print_text(h, ".Nd, .Bf, .Op { display: inline; }");
220
+ print_endline(h);
221
+ print_text(h, ".Pa, .Ad { font-style: italic; }");
222
+ print_endline(h);
223
+ print_text(h, ".Ms { font-weight: bold; }");
224
+ print_endline(h);
225
+ print_text(h, ".Bl-diag ");
226
+ print_byte(h, '>');
227
+ print_text(h, " dt { font-weight: bold; }");
228
+ print_endline(h);
229
+ print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
230
+ "{ font-weight: bold; font-family: inherit; }");
231
+ print_tagq(h, t);
232
+ }
233
+
234
+ int
235
+ html_setfont(struct html *h, enum mandoc_esc font)
236
+ {
237
+ switch (font) {
238
+ case ESCAPE_FONTPREV:
239
+ font = h->metal;
240
+ break;
241
+ case ESCAPE_FONTITALIC:
242
+ case ESCAPE_FONTBOLD:
243
+ case ESCAPE_FONTBI:
244
+ case ESCAPE_FONTROMAN:
245
+ case ESCAPE_FONTCR:
246
+ case ESCAPE_FONTCB:
247
+ case ESCAPE_FONTCI:
248
+ break;
249
+ case ESCAPE_FONT:
250
+ font = ESCAPE_FONTROMAN;
251
+ break;
252
+ default:
253
+ return 0;
254
+ }
255
+ h->metal = h->metac;
256
+ h->metac = font;
257
+ return 1;
258
+ }
259
+
260
+ static void
261
+ print_metaf(struct html *h)
262
+ {
263
+ if (h->metaf) {
264
+ print_tagq(h, h->metaf);
265
+ h->metaf = NULL;
266
+ }
267
+ switch (h->metac) {
268
+ case ESCAPE_FONTITALIC:
269
+ h->metaf = print_otag(h, TAG_I, "");
270
+ break;
271
+ case ESCAPE_FONTBOLD:
272
+ h->metaf = print_otag(h, TAG_B, "");
273
+ break;
274
+ case ESCAPE_FONTBI:
275
+ h->metaf = print_otag(h, TAG_B, "");
276
+ print_otag(h, TAG_I, "");
277
+ break;
278
+ case ESCAPE_FONTCR:
279
+ h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
280
+ break;
281
+ case ESCAPE_FONTCB:
282
+ h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
283
+ print_otag(h, TAG_B, "");
284
+ break;
285
+ case ESCAPE_FONTCI:
286
+ h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
287
+ print_otag(h, TAG_I, "");
288
+ break;
289
+ default:
290
+ break;
291
+ }
292
+ }
293
+
294
+ void
295
+ html_close_paragraph(struct html *h)
296
+ {
297
+ struct tag *this, *next;
298
+ int flags;
299
+
300
+ this = h->tag;
301
+ for (;;) {
302
+ next = this->next;
303
+ flags = htmltags[this->tag].flags;
304
+ if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
305
+ print_ctag(h, this);
306
+ if ((flags & HTML_INPHRASE) == 0)
307
+ break;
308
+ this = next;
309
+ }
310
+ }
311
+
312
+ /*
313
+ * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
314
+ * TOKEN_NONE does not switch. The old mode is returned.
315
+ */
316
+ enum roff_tok
317
+ html_fillmode(struct html *h, enum roff_tok want)
318
+ {
319
+ struct tag *t;
320
+ enum roff_tok had;
321
+
322
+ for (t = h->tag; t != NULL; t = t->next)
323
+ if (t->tag == TAG_PRE)
324
+ break;
325
+
326
+ had = t == NULL ? ROFF_fi : ROFF_nf;
327
+
328
+ if (want != had) {
329
+ switch (want) {
330
+ case ROFF_fi:
331
+ print_tagq(h, t);
332
+ break;
333
+ case ROFF_nf:
334
+ html_close_paragraph(h);
335
+ print_otag(h, TAG_PRE, "");
336
+ break;
337
+ case TOKEN_NONE:
338
+ break;
339
+ default:
340
+ abort();
341
+ }
342
+ }
343
+ return had;
344
+ }
345
+
346
+ /*
347
+ * Allocate a string to be used for the "id=" attribute of an HTML
348
+ * element and/or as a segment identifier for a URI in an <a> element.
349
+ * The function may fail and return NULL if the node lacks text data
350
+ * to create the attribute from.
351
+ * The caller is responsible for free(3)ing the returned string.
352
+ *
353
+ * If the "unique" argument is non-zero, the "id_unique" ohash table
354
+ * is used for de-duplication. If the "unique" argument is 1,
355
+ * it is the first time the function is called for this tag and
356
+ * location, so if an ordinal suffix is needed, it is incremented.
357
+ * If the "unique" argument is 2, it is the second time the function
358
+ * is called for this tag and location, so the ordinal suffix
359
+ * remains unchanged.
360
+ */
361
+ char *
362
+ html_make_id(const struct roff_node *n, int unique)
363
+ {
364
+ const struct roff_node *nch;
365
+ struct id_entry *entry;
366
+ char *buf, *cp;
367
+ size_t len;
368
+ unsigned int slot;
369
+
370
+ if (n->tag != NULL)
371
+ buf = mandoc_strdup(n->tag);
372
+ else {
373
+ switch (n->tok) {
374
+ case MDOC_Sh:
375
+ case MDOC_Ss:
376
+ case MDOC_Sx:
377
+ case MAN_SH:
378
+ case MAN_SS:
379
+ for (nch = n->child; nch != NULL; nch = nch->next)
380
+ if (nch->type != ROFFT_TEXT)
381
+ return NULL;
382
+ buf = NULL;
383
+ deroff(&buf, n);
384
+ if (buf == NULL)
385
+ return NULL;
386
+ break;
387
+ default:
388
+ if (n->child == NULL || n->child->type != ROFFT_TEXT)
389
+ return NULL;
390
+ buf = mandoc_strdup(n->child->string);
391
+ break;
392
+ }
393
+ }
394
+
395
+ /*
396
+ * In ID attributes, only use ASCII characters that are
397
+ * permitted in URL-fragment strings according to the
398
+ * explicit list at:
399
+ * https://url.spec.whatwg.org/#url-fragment-string
400
+ * In addition, reserve '~' for ordinal suffixes.
401
+ */
402
+
403
+ for (cp = buf; *cp != '\0'; cp++)
404
+ if (isalnum((unsigned char)*cp) == 0 &&
405
+ strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
406
+ *cp = '_';
407
+
408
+ if (unique == 0)
409
+ return buf;
410
+
411
+ /* Avoid duplicate HTML id= attributes. */
412
+
413
+ slot = ohash_qlookup(&id_unique, buf);
414
+ if ((entry = ohash_find(&id_unique, slot)) == NULL) {
415
+ len = strlen(buf) + 1;
416
+ entry = mandoc_malloc(sizeof(*entry) + len);
417
+ entry->ord = 1;
418
+ memcpy(entry->id, buf, len);
419
+ ohash_insert(&id_unique, slot, entry);
420
+ } else if (unique == 1)
421
+ entry->ord++;
422
+
423
+ if (entry->ord > 1) {
424
+ cp = buf;
425
+ mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
426
+ free(cp);
427
+ }
428
+ return buf;
429
+ }
430
+
431
+ static int
432
+ print_escape(struct html *h, char c)
433
+ {
434
+
435
+ switch (c) {
436
+ case '<':
437
+ print_word(h, "&lt;");
438
+ break;
439
+ case '>':
440
+ print_word(h, "&gt;");
441
+ break;
442
+ case '&':
443
+ print_word(h, "&amp;");
444
+ break;
445
+ case '"':
446
+ print_word(h, "&quot;");
447
+ break;
448
+ case ASCII_NBRSP:
449
+ print_word(h, "&nbsp;");
450
+ break;
451
+ case ASCII_HYPH:
452
+ print_byte(h, '-');
453
+ break;
454
+ case ASCII_BREAK:
455
+ break;
456
+ default:
457
+ return 0;
458
+ }
459
+ return 1;
460
+ }
461
+
462
+ static int
463
+ print_encode(struct html *h, const char *p, const char *pend, int norecurse)
464
+ {
465
+ char numbuf[16];
466
+ const char *seq;
467
+ size_t sz;
468
+ int c, len, breakline, nospace;
469
+ enum mandoc_esc esc;
470
+ static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
471
+ ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
472
+
473
+ if (pend == NULL)
474
+ pend = strchr(p, '\0');
475
+
476
+ breakline = 0;
477
+ nospace = 0;
478
+
479
+ while (p < pend) {
480
+ if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
481
+ h->flags &= ~HTML_SKIPCHAR;
482
+ p++;
483
+ continue;
484
+ }
485
+
486
+ for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
487
+ print_byte(h, *p);
488
+
489
+ if (breakline &&
490
+ (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
491
+ print_otag(h, TAG_BR, "");
492
+ breakline = 0;
493
+ while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
494
+ p++;
495
+ continue;
496
+ }
497
+
498
+ if (p >= pend)
499
+ break;
500
+
501
+ if (*p == ' ') {
502
+ print_endword(h);
503
+ p++;
504
+ continue;
505
+ }
506
+
507
+ if (print_escape(h, *p++))
508
+ continue;
509
+
510
+ esc = mandoc_escape(&p, &seq, &len);
511
+ switch (esc) {
512
+ case ESCAPE_FONT:
513
+ case ESCAPE_FONTPREV:
514
+ case ESCAPE_FONTBOLD:
515
+ case ESCAPE_FONTITALIC:
516
+ case ESCAPE_FONTBI:
517
+ case ESCAPE_FONTROMAN:
518
+ case ESCAPE_FONTCR:
519
+ case ESCAPE_FONTCB:
520
+ case ESCAPE_FONTCI:
521
+ if (0 == norecurse) {
522
+ h->flags |= HTML_NOSPACE;
523
+ if (html_setfont(h, esc))
524
+ print_metaf(h);
525
+ h->flags &= ~HTML_NOSPACE;
526
+ }
527
+ continue;
528
+ case ESCAPE_SKIPCHAR:
529
+ h->flags |= HTML_SKIPCHAR;
530
+ continue;
531
+ case ESCAPE_ERROR:
532
+ continue;
533
+ default:
534
+ break;
535
+ }
536
+
537
+ if (h->flags & HTML_SKIPCHAR) {
538
+ h->flags &= ~HTML_SKIPCHAR;
539
+ continue;
540
+ }
541
+
542
+ switch (esc) {
543
+ case ESCAPE_UNICODE:
544
+ /* Skip past "u" header. */
545
+ c = mchars_num2uc(seq + 1, len - 1);
546
+ break;
547
+ case ESCAPE_NUMBERED:
548
+ c = mchars_num2char(seq, len);
549
+ if (c < 0)
550
+ continue;
551
+ break;
552
+ case ESCAPE_SPECIAL:
553
+ c = mchars_spec2cp(seq, len);
554
+ if (c <= 0)
555
+ continue;
556
+ break;
557
+ case ESCAPE_UNDEF:
558
+ c = *seq;
559
+ break;
560
+ case ESCAPE_DEVICE:
561
+ print_word(h, "html");
562
+ continue;
563
+ case ESCAPE_BREAK:
564
+ breakline = 1;
565
+ continue;
566
+ case ESCAPE_NOSPACE:
567
+ if ('\0' == *p)
568
+ nospace = 1;
569
+ continue;
570
+ case ESCAPE_OVERSTRIKE:
571
+ if (len == 0)
572
+ continue;
573
+ c = seq[len - 1];
574
+ break;
575
+ default:
576
+ continue;
577
+ }
578
+ if ((c < 0x20 && c != 0x09) ||
579
+ (c > 0x7E && c < 0xA0))
580
+ c = 0xFFFD;
581
+ if (c > 0x7E) {
582
+ (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
583
+ print_word(h, numbuf);
584
+ } else if (print_escape(h, c) == 0)
585
+ print_byte(h, c);
586
+ }
587
+
588
+ return nospace;
589
+ }
590
+
591
+ static void
592
+ print_href(struct html *h, const char *name, const char *sec, int man)
593
+ {
594
+ struct stat sb;
595
+ const char *p, *pp;
596
+ char *filename;
597
+
598
+ if (man) {
599
+ pp = h->base_man1;
600
+ if (h->base_man2 != NULL) {
601
+ mandoc_asprintf(&filename, "%s.%s", name, sec);
602
+ if (stat(filename, &sb) == -1)
603
+ pp = h->base_man2;
604
+ free(filename);
605
+ }
606
+ } else
607
+ pp = h->base_includes;
608
+
609
+ while ((p = strchr(pp, '%')) != NULL) {
610
+ print_encode(h, pp, p, 1);
611
+ if (man && p[1] == 'S') {
612
+ if (sec == NULL)
613
+ print_byte(h, '1');
614
+ else
615
+ print_encode(h, sec, NULL, 1);
616
+ } else if ((man && p[1] == 'N') ||
617
+ (man == 0 && p[1] == 'I'))
618
+ print_encode(h, name, NULL, 1);
619
+ else
620
+ print_encode(h, p, p + 2, 1);
621
+ pp = p + 2;
622
+ }
623
+ if (*pp != '\0')
624
+ print_encode(h, pp, NULL, 1);
625
+ }
626
+
627
+ struct tag *
628
+ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
629
+ {
630
+ va_list ap;
631
+ struct tag *t;
632
+ const char *attr;
633
+ char *arg1, *arg2;
634
+ int style_written, tflags;
635
+
636
+ tflags = htmltags[tag].flags;
637
+
638
+ /* Flow content is not allowed in phrasing context. */
639
+
640
+ if ((tflags & HTML_INPHRASE) == 0) {
641
+ for (t = h->tag; t != NULL; t = t->next) {
642
+ if (t->closed)
643
+ continue;
644
+ assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
645
+ break;
646
+ }
647
+
648
+ /*
649
+ * Always wrap phrasing elements in a paragraph
650
+ * unless already contained in some flow container;
651
+ * never put them directly into a section.
652
+ */
653
+
654
+ } else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
655
+ print_otag(h, TAG_P, "c", "Pp");
656
+
657
+ /* Push this tag onto the stack of open scopes. */
658
+
659
+ if ((tflags & HTML_NOSTACK) == 0) {
660
+ t = mandoc_malloc(sizeof(struct tag));
661
+ t->tag = tag;
662
+ t->next = h->tag;
663
+ t->refcnt = 0;
664
+ t->closed = 0;
665
+ h->tag = t;
666
+ } else
667
+ t = NULL;
668
+
669
+ if (tflags & HTML_NLBEFORE)
670
+ print_endline(h);
671
+ if (h->col == 0)
672
+ print_indent(h);
673
+ else if ((h->flags & HTML_NOSPACE) == 0) {
674
+ if (h->flags & HTML_KEEP)
675
+ print_word(h, "&#x00A0;");
676
+ else {
677
+ if (h->flags & HTML_PREKEEP)
678
+ h->flags |= HTML_KEEP;
679
+ print_endword(h);
680
+ }
681
+ }
682
+
683
+ if ( ! (h->flags & HTML_NONOSPACE))
684
+ h->flags &= ~HTML_NOSPACE;
685
+ else
686
+ h->flags |= HTML_NOSPACE;
687
+
688
+ /* Print out the tag name and attributes. */
689
+
690
+ print_byte(h, '<');
691
+ print_word(h, htmltags[tag].name);
692
+
693
+ va_start(ap, fmt);
694
+
695
+ while (*fmt != '\0' && *fmt != 's') {
696
+
697
+ /* Parse attributes and arguments. */
698
+
699
+ arg1 = va_arg(ap, char *);
700
+ arg2 = NULL;
701
+ switch (*fmt++) {
702
+ case 'c':
703
+ attr = "class";
704
+ break;
705
+ case 'h':
706
+ attr = "href";
707
+ break;
708
+ case 'i':
709
+ attr = "id";
710
+ break;
711
+ case '?':
712
+ attr = arg1;
713
+ arg1 = va_arg(ap, char *);
714
+ break;
715
+ default:
716
+ abort();
717
+ }
718
+ if (*fmt == 'M')
719
+ arg2 = va_arg(ap, char *);
720
+ if (arg1 == NULL)
721
+ continue;
722
+
723
+ /* Print the attributes. */
724
+
725
+ print_byte(h, ' ');
726
+ print_word(h, attr);
727
+ print_byte(h, '=');
728
+ print_byte(h, '"');
729
+ switch (*fmt) {
730
+ case 'I':
731
+ print_href(h, arg1, NULL, 0);
732
+ fmt++;
733
+ break;
734
+ case 'M':
735
+ print_href(h, arg1, arg2, 1);
736
+ fmt++;
737
+ break;
738
+ case 'R':
739
+ print_byte(h, '#');
740
+ print_encode(h, arg1, NULL, 1);
741
+ fmt++;
742
+ break;
743
+ default:
744
+ print_encode(h, arg1, NULL, 1);
745
+ break;
746
+ }
747
+ print_byte(h, '"');
748
+ }
749
+
750
+ style_written = 0;
751
+ while (*fmt++ == 's') {
752
+ arg1 = va_arg(ap, char *);
753
+ arg2 = va_arg(ap, char *);
754
+ if (arg2 == NULL)
755
+ continue;
756
+ print_byte(h, ' ');
757
+ if (style_written == 0) {
758
+ print_word(h, "style=\"");
759
+ style_written = 1;
760
+ }
761
+ print_word(h, arg1);
762
+ print_byte(h, ':');
763
+ print_byte(h, ' ');
764
+ print_word(h, arg2);
765
+ print_byte(h, ';');
766
+ }
767
+ if (style_written)
768
+ print_byte(h, '"');
769
+
770
+ va_end(ap);
771
+
772
+ /* Accommodate for "well-formed" singleton escaping. */
773
+
774
+ if (htmltags[tag].flags & HTML_NOSTACK)
775
+ print_byte(h, '/');
776
+
777
+ print_byte(h, '>');
778
+
779
+ if (tflags & HTML_NLBEGIN)
780
+ print_endline(h);
781
+ else
782
+ h->flags |= HTML_NOSPACE;
783
+
784
+ if (tflags & HTML_INDENT)
785
+ h->indent++;
786
+ if (tflags & HTML_NOINDENT)
787
+ h->noindent++;
788
+
789
+ return t;
790
+ }
791
+
792
+ /*
793
+ * Print an element with an optional "id=" attribute.
794
+ * If the element has phrasing content and an "id=" attribute,
795
+ * also add a permalink: outside if it can be in phrasing context,
796
+ * inside otherwise.
797
+ */
798
+ struct tag *
799
+ print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
800
+ struct roff_node *n)
801
+ {
802
+ struct roff_node *nch;
803
+ struct tag *ret, *t;
804
+ char *id, *href;
805
+
806
+ ret = NULL;
807
+ id = href = NULL;
808
+ if (n->flags & NODE_ID)
809
+ id = html_make_id(n, 1);
810
+ if (n->flags & NODE_HREF)
811
+ href = id == NULL ? html_make_id(n, 2) : id;
812
+ if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
813
+ ret = print_otag(h, TAG_A, "chR", "permalink", href);
814
+ t = print_otag(h, elemtype, "ci", cattr, id);
815
+ if (ret == NULL) {
816
+ ret = t;
817
+ if (href != NULL && (nch = n->child) != NULL) {
818
+ /* man(7) is safe, it tags phrasing content only. */
819
+ if (n->tok > MDOC_MAX ||
820
+ htmltags[elemtype].flags & HTML_TOPHRASE)
821
+ nch = NULL;
822
+ else /* For mdoc(7), beware of nested blocks. */
823
+ while (nch != NULL && nch->type == ROFFT_TEXT)
824
+ nch = nch->next;
825
+ if (nch == NULL)
826
+ print_otag(h, TAG_A, "chR", "permalink", href);
827
+ }
828
+ }
829
+ free(id);
830
+ if (id == NULL)
831
+ free(href);
832
+ return ret;
833
+ }
834
+
835
+ static void
836
+ print_ctag(struct html *h, struct tag *tag)
837
+ {
838
+ int tflags;
839
+
840
+ if (tag->closed == 0) {
841
+ tag->closed = 1;
842
+ if (tag == h->metaf)
843
+ h->metaf = NULL;
844
+ if (tag == h->tblt)
845
+ h->tblt = NULL;
846
+
847
+ tflags = htmltags[tag->tag].flags;
848
+ if (tflags & HTML_INDENT)
849
+ h->indent--;
850
+ if (tflags & HTML_NOINDENT)
851
+ h->noindent--;
852
+ if (tflags & HTML_NLEND)
853
+ print_endline(h);
854
+ print_indent(h);
855
+ print_byte(h, '<');
856
+ print_byte(h, '/');
857
+ print_word(h, htmltags[tag->tag].name);
858
+ print_byte(h, '>');
859
+ if (tflags & HTML_NLAFTER)
860
+ print_endline(h);
861
+ }
862
+ if (tag->refcnt == 0) {
863
+ h->tag = tag->next;
864
+ free(tag);
865
+ }
866
+ }
867
+
868
+ void
869
+ print_gen_decls(struct html *h)
870
+ {
871
+ print_word(h, "<!DOCTYPE html>");
872
+ print_endline(h);
873
+ }
874
+
875
+ void
876
+ print_gen_comment(struct html *h, struct roff_node *n)
877
+ {
878
+ int wantblank;
879
+
880
+ print_word(h, "<!-- This is an automatically generated file."
881
+ " Do not edit.");
882
+ h->indent = 1;
883
+ wantblank = 0;
884
+ while (n != NULL && n->type == ROFFT_COMMENT) {
885
+ if (strstr(n->string, "-->") == NULL &&
886
+ (wantblank || *n->string != '\0')) {
887
+ print_endline(h);
888
+ print_indent(h);
889
+ print_word(h, n->string);
890
+ wantblank = *n->string != '\0';
891
+ }
892
+ n = n->next;
893
+ }
894
+ if (wantblank)
895
+ print_endline(h);
896
+ print_word(h, " -->");
897
+ print_endline(h);
898
+ h->indent = 0;
899
+ }
900
+
901
+ void
902
+ print_text(struct html *h, const char *word)
903
+ {
904
+ print_tagged_text(h, word, NULL);
905
+ }
906
+
907
+ void
908
+ print_tagged_text(struct html *h, const char *word, struct roff_node *n)
909
+ {
910
+ struct tag *t;
911
+ char *href;
912
+
913
+ /*
914
+ * Always wrap text in a paragraph unless already contained in
915
+ * some flow container; never put it directly into a section.
916
+ */
917
+
918
+ if (h->tag->tag == TAG_SECTION)
919
+ print_otag(h, TAG_P, "c", "Pp");
920
+
921
+ /* Output whitespace before this text? */
922
+
923
+ if (h->col && (h->flags & HTML_NOSPACE) == 0) {
924
+ if ( ! (HTML_KEEP & h->flags)) {
925
+ if (HTML_PREKEEP & h->flags)
926
+ h->flags |= HTML_KEEP;
927
+ print_endword(h);
928
+ } else
929
+ print_word(h, "&#x00A0;");
930
+ }
931
+
932
+ /*
933
+ * Optionally switch fonts, optionally write a permalink, then
934
+ * print the text, optionally surrounded by HTML whitespace.
935
+ */
936
+
937
+ assert(h->metaf == NULL);
938
+ print_metaf(h);
939
+ print_indent(h);
940
+
941
+ if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
942
+ t = print_otag(h, TAG_A, "chR", "permalink", href);
943
+ free(href);
944
+ } else
945
+ t = NULL;
946
+
947
+ if ( ! print_encode(h, word, NULL, 0)) {
948
+ if ( ! (h->flags & HTML_NONOSPACE))
949
+ h->flags &= ~HTML_NOSPACE;
950
+ h->flags &= ~HTML_NONEWLINE;
951
+ } else
952
+ h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
953
+
954
+ if (h->metaf != NULL) {
955
+ print_tagq(h, h->metaf);
956
+ h->metaf = NULL;
957
+ } else if (t != NULL)
958
+ print_tagq(h, t);
959
+
960
+ h->flags &= ~HTML_IGNDELIM;
961
+ }
962
+
963
+ void
964
+ print_tagq(struct html *h, const struct tag *until)
965
+ {
966
+ struct tag *this, *next;
967
+
968
+ for (this = h->tag; this != NULL; this = next) {
969
+ next = this == until ? NULL : this->next;
970
+ print_ctag(h, this);
971
+ }
972
+ }
973
+
974
+ /*
975
+ * Close out all open elements up to but excluding suntil.
976
+ * Note that a paragraph just inside stays open together with it
977
+ * because paragraphs include subsequent phrasing content.
978
+ */
979
+ void
980
+ print_stagq(struct html *h, const struct tag *suntil)
981
+ {
982
+ struct tag *this, *next;
983
+
984
+ for (this = h->tag; this != NULL; this = next) {
985
+ next = this->next;
986
+ if (this == suntil || (next == suntil &&
987
+ (this->tag == TAG_P || this->tag == TAG_PRE)))
988
+ break;
989
+ print_ctag(h, this);
990
+ }
991
+ }
992
+
993
+
994
+ /***********************************************************************
995
+ * Low level output functions.
996
+ * They implement line breaking using a short static buffer.
997
+ ***********************************************************************/
998
+
999
+ /*
1000
+ * Buffer one HTML output byte.
1001
+ * If the buffer is full, flush and deactivate it and start a new line.
1002
+ * If the buffer is inactive, print directly.
1003
+ */
1004
+ static void
1005
+ print_byte(struct html *h, char c)
1006
+ {
1007
+ if ((h->flags & HTML_BUFFER) == 0) {
1008
+ putchar(c);
1009
+ h->col++;
1010
+ return;
1011
+ }
1012
+
1013
+ if (h->col + h->bufcol < sizeof(h->buf)) {
1014
+ h->buf[h->bufcol++] = c;
1015
+ return;
1016
+ }
1017
+
1018
+ putchar('\n');
1019
+ h->col = 0;
1020
+ print_indent(h);
1021
+ putchar(' ');
1022
+ putchar(' ');
1023
+ fwrite(h->buf, h->bufcol, 1, stdout);
1024
+ putchar(c);
1025
+ h->col = (h->indent + 1) * 2 + h->bufcol + 1;
1026
+ h->bufcol = 0;
1027
+ h->flags &= ~HTML_BUFFER;
1028
+ }
1029
+
1030
+ /*
1031
+ * If something was printed on the current output line, end it.
1032
+ * Not to be called right after print_indent().
1033
+ */
1034
+ void
1035
+ print_endline(struct html *h)
1036
+ {
1037
+ if (h->col == 0)
1038
+ return;
1039
+
1040
+ if (h->bufcol) {
1041
+ putchar(' ');
1042
+ fwrite(h->buf, h->bufcol, 1, stdout);
1043
+ h->bufcol = 0;
1044
+ }
1045
+ putchar('\n');
1046
+ h->col = 0;
1047
+ h->flags |= HTML_NOSPACE;
1048
+ h->flags &= ~HTML_BUFFER;
1049
+ }
1050
+
1051
+ /*
1052
+ * Flush the HTML output buffer.
1053
+ * If it is inactive, activate it.
1054
+ */
1055
+ static void
1056
+ print_endword(struct html *h)
1057
+ {
1058
+ if (h->noindent) {
1059
+ print_byte(h, ' ');
1060
+ return;
1061
+ }
1062
+
1063
+ if ((h->flags & HTML_BUFFER) == 0) {
1064
+ h->col++;
1065
+ h->flags |= HTML_BUFFER;
1066
+ } else if (h->bufcol) {
1067
+ putchar(' ');
1068
+ fwrite(h->buf, h->bufcol, 1, stdout);
1069
+ h->col += h->bufcol + 1;
1070
+ }
1071
+ h->bufcol = 0;
1072
+ }
1073
+
1074
+ /*
1075
+ * If at the beginning of a new output line,
1076
+ * perform indentation and mark the line as containing output.
1077
+ * Make sure to really produce some output right afterwards,
1078
+ * but do not use print_otag() for producing it.
1079
+ */
1080
+ static void
1081
+ print_indent(struct html *h)
1082
+ {
1083
+ size_t i;
1084
+
1085
+ if (h->col || h->noindent)
1086
+ return;
1087
+
1088
+ h->col = h->indent * 2;
1089
+ for (i = 0; i < h->col; i++)
1090
+ putchar(' ');
1091
+ }
1092
+
1093
+ /*
1094
+ * Print or buffer some characters
1095
+ * depending on the current HTML output buffer state.
1096
+ */
1097
+ static void
1098
+ print_word(struct html *h, const char *cp)
1099
+ {
1100
+ while (*cp != '\0')
1101
+ print_byte(h, *cp++);
1102
+ }