mandoc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +7 -0
  3. data/COPYING +674 -0
  4. data/README.md +117 -0
  5. data/ext/mandoc/extconf.rb +59 -0
  6. data/ext/mandoc/rb_mandoc.c +548 -0
  7. data/ext/mandoc/rb_mandoc.h +22 -0
  8. data/lib/mandoc/version.rb +19 -0
  9. data/lib/mandoc.rb +26 -0
  10. data/mandoc-1.14.6/LICENSE +55 -0
  11. data/mandoc-1.14.6/arch.c +54 -0
  12. data/mandoc-1.14.6/att.c +49 -0
  13. data/mandoc-1.14.6/catman.c +260 -0
  14. data/mandoc-1.14.6/cgi.c +1279 -0
  15. data/mandoc-1.14.6/chars.c +507 -0
  16. data/mandoc-1.14.6/compat_err.c +103 -0
  17. data/mandoc-1.14.6/compat_fts.c +696 -0
  18. data/mandoc-1.14.6/compat_fts.h +106 -0
  19. data/mandoc-1.14.6/compat_getline.c +59 -0
  20. data/mandoc-1.14.6/compat_getsubopt.c +87 -0
  21. data/mandoc-1.14.6/compat_isblank.c +23 -0
  22. data/mandoc-1.14.6/compat_mkdtemp.c +50 -0
  23. data/mandoc-1.14.6/compat_mkstemps.c +63 -0
  24. data/mandoc-1.14.6/compat_ohash.c +330 -0
  25. data/mandoc-1.14.6/compat_ohash.h +72 -0
  26. data/mandoc-1.14.6/compat_progname.c +31 -0
  27. data/mandoc-1.14.6/compat_reallocarray.c +40 -0
  28. data/mandoc-1.14.6/compat_recallocarray.c +99 -0
  29. data/mandoc-1.14.6/compat_strcasestr.c +64 -0
  30. data/mandoc-1.14.6/compat_stringlist.c +135 -0
  31. data/mandoc-1.14.6/compat_stringlist.h +48 -0
  32. data/mandoc-1.14.6/compat_strlcat.c +57 -0
  33. data/mandoc-1.14.6/compat_strlcpy.c +52 -0
  34. data/mandoc-1.14.6/compat_strndup.c +42 -0
  35. data/mandoc-1.14.6/compat_strsep.c +70 -0
  36. data/mandoc-1.14.6/compat_strtonum.c +67 -0
  37. data/mandoc-1.14.6/compat_vasprintf.c +47 -0
  38. data/mandoc-1.14.6/config.h +52 -0
  39. data/mandoc-1.14.6/dba.c +508 -0
  40. data/mandoc-1.14.6/dba.h +50 -0
  41. data/mandoc-1.14.6/dba_array.c +190 -0
  42. data/mandoc-1.14.6/dba_array.h +47 -0
  43. data/mandoc-1.14.6/dba_read.c +74 -0
  44. data/mandoc-1.14.6/dba_write.c +127 -0
  45. data/mandoc-1.14.6/dba_write.h +30 -0
  46. data/mandoc-1.14.6/dbm.c +480 -0
  47. data/mandoc-1.14.6/dbm.h +68 -0
  48. data/mandoc-1.14.6/dbm_map.c +194 -0
  49. data/mandoc-1.14.6/dbm_map.h +29 -0
  50. data/mandoc-1.14.6/demandoc.c +260 -0
  51. data/mandoc-1.14.6/eqn.c +1132 -0
  52. data/mandoc-1.14.6/eqn.h +72 -0
  53. data/mandoc-1.14.6/eqn_html.c +246 -0
  54. data/mandoc-1.14.6/eqn_parse.h +48 -0
  55. data/mandoc-1.14.6/eqn_term.c +174 -0
  56. data/mandoc-1.14.6/html.c +1102 -0
  57. data/mandoc-1.14.6/html.h +142 -0
  58. data/mandoc-1.14.6/lib.c +35 -0
  59. data/mandoc-1.14.6/libman.h +42 -0
  60. data/mandoc-1.14.6/libmandoc.h +85 -0
  61. data/mandoc-1.14.6/libmdoc.h +87 -0
  62. data/mandoc-1.14.6/main.c +1375 -0
  63. data/mandoc-1.14.6/main.h +53 -0
  64. data/mandoc-1.14.6/man.c +345 -0
  65. data/mandoc-1.14.6/man.h +21 -0
  66. data/mandoc-1.14.6/man_html.c +640 -0
  67. data/mandoc-1.14.6/man_macro.c +470 -0
  68. data/mandoc-1.14.6/man_term.c +1143 -0
  69. data/mandoc-1.14.6/man_validate.c +660 -0
  70. data/mandoc-1.14.6/manconf.h +58 -0
  71. data/mandoc-1.14.6/mandoc.c +669 -0
  72. data/mandoc-1.14.6/mandoc.h +329 -0
  73. data/mandoc-1.14.6/mandoc_aux.c +118 -0
  74. data/mandoc-1.14.6/mandoc_aux.h +27 -0
  75. data/mandoc-1.14.6/mandoc_msg.c +375 -0
  76. data/mandoc-1.14.6/mandoc_ohash.c +65 -0
  77. data/mandoc-1.14.6/mandoc_ohash.h +23 -0
  78. data/mandoc-1.14.6/mandoc_parse.h +44 -0
  79. data/mandoc-1.14.6/mandoc_xr.c +123 -0
  80. data/mandoc-1.14.6/mandoc_xr.h +31 -0
  81. data/mandoc-1.14.6/mandocd.c +282 -0
  82. data/mandoc-1.14.6/mandocdb.c +2448 -0
  83. data/mandoc-1.14.6/manpath.c +363 -0
  84. data/mandoc-1.14.6/mansearch.c +851 -0
  85. data/mandoc-1.14.6/mansearch.h +118 -0
  86. data/mandoc-1.14.6/mdoc.c +433 -0
  87. data/mandoc-1.14.6/mdoc.h +158 -0
  88. data/mandoc-1.14.6/mdoc_argv.c +682 -0
  89. data/mandoc-1.14.6/mdoc_html.c +1762 -0
  90. data/mandoc-1.14.6/mdoc_macro.c +1600 -0
  91. data/mandoc-1.14.6/mdoc_man.c +1850 -0
  92. data/mandoc-1.14.6/mdoc_markdown.c +1610 -0
  93. data/mandoc-1.14.6/mdoc_state.c +256 -0
  94. data/mandoc-1.14.6/mdoc_term.c +1964 -0
  95. data/mandoc-1.14.6/mdoc_validate.c +3062 -0
  96. data/mandoc-1.14.6/msec.c +37 -0
  97. data/mandoc-1.14.6/out.c +544 -0
  98. data/mandoc-1.14.6/out.h +70 -0
  99. data/mandoc-1.14.6/preconv.c +179 -0
  100. data/mandoc-1.14.6/read.c +732 -0
  101. data/mandoc-1.14.6/roff.c +4390 -0
  102. data/mandoc-1.14.6/roff.h +561 -0
  103. data/mandoc-1.14.6/roff_html.c +119 -0
  104. data/mandoc-1.14.6/roff_int.h +94 -0
  105. data/mandoc-1.14.6/roff_term.c +266 -0
  106. data/mandoc-1.14.6/roff_validate.c +151 -0
  107. data/mandoc-1.14.6/soelim.c +182 -0
  108. data/mandoc-1.14.6/st.c +82 -0
  109. data/mandoc-1.14.6/tag.c +327 -0
  110. data/mandoc-1.14.6/tag.h +35 -0
  111. data/mandoc-1.14.6/tbl.c +183 -0
  112. data/mandoc-1.14.6/tbl.h +121 -0
  113. data/mandoc-1.14.6/tbl_data.c +323 -0
  114. data/mandoc-1.14.6/tbl_html.c +293 -0
  115. data/mandoc-1.14.6/tbl_int.h +47 -0
  116. data/mandoc-1.14.6/tbl_layout.c +376 -0
  117. data/mandoc-1.14.6/tbl_opts.c +173 -0
  118. data/mandoc-1.14.6/tbl_parse.h +30 -0
  119. data/mandoc-1.14.6/tbl_term.c +948 -0
  120. data/mandoc-1.14.6/term.c +1113 -0
  121. data/mandoc-1.14.6/term.h +158 -0
  122. data/mandoc-1.14.6/term_ascii.c +424 -0
  123. data/mandoc-1.14.6/term_ps.c +1362 -0
  124. data/mandoc-1.14.6/term_tab.c +130 -0
  125. data/mandoc-1.14.6/term_tag.c +227 -0
  126. data/mandoc-1.14.6/term_tag.h +34 -0
  127. data/mandoc-1.14.6/tree.c +536 -0
  128. metadata +170 -0
@@ -0,0 +1,732 @@
1
+ /* $Id: read.c,v 1.220 2021/06/27 17:57:54 schwarze Exp $ */
2
+ /*
3
+ * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
4
+ * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5
+ * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ *
19
+ * Top-level functions of the mandoc(3) parser:
20
+ * Parser and input encoding selection, decompression,
21
+ * handling of input bytes, characters, lines, and files,
22
+ * handling of roff(7) loops and file inclusion,
23
+ * and steering of the various parsers.
24
+ */
25
+ #include "config.h"
26
+
27
+ #include <sys/types.h>
28
+ #include <sys/mman.h>
29
+ #include <sys/stat.h>
30
+
31
+ #include <assert.h>
32
+ #include <ctype.h>
33
+ #include <errno.h>
34
+ #include <fcntl.h>
35
+ #include <stdarg.h>
36
+ #include <stdio.h>
37
+ #include <stdlib.h>
38
+ #include <string.h>
39
+ #include <unistd.h>
40
+ #include <zlib.h>
41
+
42
+ #include "mandoc_aux.h"
43
+ #include "mandoc.h"
44
+ #include "roff.h"
45
+ #include "mdoc.h"
46
+ #include "man.h"
47
+ #include "mandoc_parse.h"
48
+ #include "libmandoc.h"
49
+ #include "roff_int.h"
50
+ #include "tag.h"
51
+
52
+ #define REPARSE_LIMIT 1000
53
+
54
+ struct mparse {
55
+ struct roff *roff; /* roff parser (!NULL) */
56
+ struct roff_man *man; /* man parser */
57
+ struct buf *primary; /* buffer currently being parsed */
58
+ struct buf *secondary; /* copy of top level input */
59
+ struct buf *loop; /* open .while request line */
60
+ const char *os_s; /* default operating system */
61
+ int options; /* parser options */
62
+ int gzip; /* current input file is gzipped */
63
+ int filenc; /* encoding of the current file */
64
+ int reparse_count; /* finite interp. stack */
65
+ int line; /* line number in the file */
66
+ };
67
+
68
+ static void choose_parser(struct mparse *);
69
+ static void free_buf_list(struct buf *);
70
+ static void resize_buf(struct buf *, size_t);
71
+ static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
72
+ static int read_whole_file(struct mparse *, int, struct buf *, int *);
73
+ static void mparse_end(struct mparse *);
74
+
75
+
76
+ static void
77
+ resize_buf(struct buf *buf, size_t initial)
78
+ {
79
+
80
+ buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
81
+ buf->buf = mandoc_realloc(buf->buf, buf->sz);
82
+ }
83
+
84
+ static void
85
+ free_buf_list(struct buf *buf)
86
+ {
87
+ struct buf *tmp;
88
+
89
+ while (buf != NULL) {
90
+ tmp = buf;
91
+ buf = tmp->next;
92
+ free(tmp->buf);
93
+ free(tmp);
94
+ }
95
+ }
96
+
97
+ static void
98
+ choose_parser(struct mparse *curp)
99
+ {
100
+ char *cp, *ep;
101
+ int format;
102
+
103
+ /*
104
+ * If neither command line arguments -mdoc or -man select
105
+ * a parser nor the roff parser found a .Dd or .TH macro
106
+ * yet, look ahead in the main input buffer.
107
+ */
108
+
109
+ if ((format = roff_getformat(curp->roff)) == 0) {
110
+ cp = curp->primary->buf;
111
+ ep = cp + curp->primary->sz;
112
+ while (cp < ep) {
113
+ if (*cp == '.' || *cp == '\'') {
114
+ cp++;
115
+ if (cp[0] == 'D' && cp[1] == 'd') {
116
+ format = MPARSE_MDOC;
117
+ break;
118
+ }
119
+ if (cp[0] == 'T' && cp[1] == 'H') {
120
+ format = MPARSE_MAN;
121
+ break;
122
+ }
123
+ }
124
+ cp = memchr(cp, '\n', ep - cp);
125
+ if (cp == NULL)
126
+ break;
127
+ cp++;
128
+ }
129
+ }
130
+
131
+ if (format == MPARSE_MDOC) {
132
+ curp->man->meta.macroset = MACROSET_MDOC;
133
+ if (curp->man->mdocmac == NULL)
134
+ curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
135
+ } else {
136
+ curp->man->meta.macroset = MACROSET_MAN;
137
+ if (curp->man->manmac == NULL)
138
+ curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
139
+ }
140
+ curp->man->meta.first->tok = TOKEN_NONE;
141
+ }
142
+
143
+ /*
144
+ * Main parse routine for a buffer.
145
+ * It assumes encoding and line numbering are already set up.
146
+ * It can recurse directly (for invocations of user-defined
147
+ * macros, inline equations, and input line traps)
148
+ * and indirectly (for .so file inclusion).
149
+ */
150
+ static int
151
+ mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
152
+ {
153
+ struct buf ln;
154
+ struct buf *firstln, *lastln, *thisln, *loop;
155
+ char *cp;
156
+ size_t pos; /* byte number in the ln buffer */
157
+ size_t spos; /* at the start of the current line parse */
158
+ int line_result, result;
159
+ int of;
160
+ int lnn; /* line number in the real file */
161
+ int fd;
162
+ int inloop; /* Saw .while on this level. */
163
+ unsigned char c;
164
+
165
+ ln.sz = 256;
166
+ ln.buf = mandoc_malloc(ln.sz);
167
+ ln.next = NULL;
168
+ firstln = lastln = loop = NULL;
169
+ lnn = curp->line;
170
+ pos = 0;
171
+ inloop = 0;
172
+ result = ROFF_CONT;
173
+
174
+ while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
175
+ if (start) {
176
+ curp->line = lnn;
177
+ curp->reparse_count = 0;
178
+
179
+ if (lnn < 3 &&
180
+ curp->filenc & MPARSE_UTF8 &&
181
+ curp->filenc & MPARSE_LATIN1)
182
+ curp->filenc = preconv_cue(&blk, i);
183
+ }
184
+ spos = pos;
185
+
186
+ while (i < blk.sz && (start || blk.buf[i] != '\0')) {
187
+
188
+ /*
189
+ * When finding an unescaped newline character,
190
+ * leave the character loop to process the line.
191
+ * Skip a preceding carriage return, if any.
192
+ */
193
+
194
+ if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
195
+ '\n' == blk.buf[i + 1])
196
+ ++i;
197
+ if ('\n' == blk.buf[i]) {
198
+ ++i;
199
+ ++lnn;
200
+ break;
201
+ }
202
+
203
+ /*
204
+ * Make sure we have space for the worst
205
+ * case of 12 bytes: "\\[u10ffff]\n\0"
206
+ */
207
+
208
+ if (pos + 12 > ln.sz)
209
+ resize_buf(&ln, 256);
210
+
211
+ /*
212
+ * Encode 8-bit input.
213
+ */
214
+
215
+ c = blk.buf[i];
216
+ if (c & 0x80) {
217
+ if ( ! (curp->filenc && preconv_encode(
218
+ &blk, &i, &ln, &pos, &curp->filenc))) {
219
+ mandoc_msg(MANDOCERR_CHAR_BAD,
220
+ curp->line, pos, "0x%x", c);
221
+ ln.buf[pos++] = '?';
222
+ i++;
223
+ }
224
+ continue;
225
+ }
226
+
227
+ /*
228
+ * Exclude control characters.
229
+ */
230
+
231
+ if (c == 0x7f || (c < 0x20 && c != 0x09)) {
232
+ mandoc_msg(c == 0x00 || c == 0x04 ||
233
+ c > 0x0a ? MANDOCERR_CHAR_BAD :
234
+ MANDOCERR_CHAR_UNSUPP,
235
+ curp->line, pos, "0x%x", c);
236
+ i++;
237
+ if (c != '\r')
238
+ ln.buf[pos++] = '?';
239
+ continue;
240
+ }
241
+
242
+ ln.buf[pos++] = blk.buf[i++];
243
+ }
244
+ ln.buf[pos] = '\0';
245
+
246
+ /*
247
+ * Maintain a lookaside buffer of all lines.
248
+ * parsed from this input source.
249
+ */
250
+
251
+ thisln = mandoc_malloc(sizeof(*thisln));
252
+ thisln->buf = mandoc_strdup(ln.buf);
253
+ thisln->sz = strlen(ln.buf) + 1;
254
+ thisln->next = NULL;
255
+ if (firstln == NULL) {
256
+ firstln = lastln = thisln;
257
+ if (curp->secondary == NULL)
258
+ curp->secondary = firstln;
259
+ } else {
260
+ lastln->next = thisln;
261
+ lastln = thisln;
262
+ }
263
+
264
+ /* XXX Ugly hack to mark the end of the input. */
265
+
266
+ if (i == blk.sz || blk.buf[i] == '\0') {
267
+ if (pos + 2 > ln.sz)
268
+ resize_buf(&ln, 256);
269
+ ln.buf[pos++] = '\n';
270
+ ln.buf[pos] = '\0';
271
+ }
272
+
273
+ /*
274
+ * A significant amount of complexity is contained by
275
+ * the roff preprocessor. It's line-oriented but can be
276
+ * expressed on one line, so we need at times to
277
+ * readjust our starting point and re-run it. The roff
278
+ * preprocessor can also readjust the buffers with new
279
+ * data, so we pass them in wholesale.
280
+ */
281
+
282
+ of = 0;
283
+ rerun:
284
+ line_result = roff_parseln(curp->roff, curp->line,
285
+ &ln, &of, start && spos == 0 ? pos : 0);
286
+
287
+ /* Process options. */
288
+
289
+ if (line_result & ROFF_APPEND)
290
+ assert(line_result == (ROFF_IGN | ROFF_APPEND));
291
+
292
+ if (line_result & ROFF_USERCALL)
293
+ assert((line_result & ROFF_MASK) == ROFF_REPARSE);
294
+
295
+ if (line_result & ROFF_USERRET) {
296
+ assert(line_result == (ROFF_IGN | ROFF_USERRET));
297
+ if (start == 0) {
298
+ /* Return from the current macro. */
299
+ result = ROFF_USERRET;
300
+ goto out;
301
+ }
302
+ }
303
+
304
+ switch (line_result & ROFF_LOOPMASK) {
305
+ case ROFF_IGN:
306
+ break;
307
+ case ROFF_WHILE:
308
+ if (curp->loop != NULL) {
309
+ if (loop == curp->loop)
310
+ break;
311
+ mandoc_msg(MANDOCERR_WHILE_NEST,
312
+ curp->line, pos, NULL);
313
+ }
314
+ curp->loop = thisln;
315
+ loop = NULL;
316
+ inloop = 1;
317
+ break;
318
+ case ROFF_LOOPCONT:
319
+ case ROFF_LOOPEXIT:
320
+ if (curp->loop == NULL) {
321
+ mandoc_msg(MANDOCERR_WHILE_FAIL,
322
+ curp->line, pos, NULL);
323
+ break;
324
+ }
325
+ if (inloop == 0) {
326
+ mandoc_msg(MANDOCERR_WHILE_INTO,
327
+ curp->line, pos, NULL);
328
+ curp->loop = loop = NULL;
329
+ break;
330
+ }
331
+ if (line_result & ROFF_LOOPCONT)
332
+ loop = curp->loop;
333
+ else {
334
+ curp->loop = loop = NULL;
335
+ inloop = 0;
336
+ }
337
+ break;
338
+ default:
339
+ abort();
340
+ }
341
+
342
+ /* Process the main instruction from the roff parser. */
343
+
344
+ switch (line_result & ROFF_MASK) {
345
+ case ROFF_IGN:
346
+ break;
347
+ case ROFF_CONT:
348
+ if (curp->man->meta.macroset == MACROSET_NONE)
349
+ choose_parser(curp);
350
+ if ((curp->man->meta.macroset == MACROSET_MDOC ?
351
+ mdoc_parseln(curp->man, curp->line, ln.buf, of) :
352
+ man_parseln(curp->man, curp->line, ln.buf, of)
353
+ ) == 2)
354
+ goto out;
355
+ break;
356
+ case ROFF_RERUN:
357
+ goto rerun;
358
+ case ROFF_REPARSE:
359
+ if (++curp->reparse_count > REPARSE_LIMIT) {
360
+ /* Abort and return to the top level. */
361
+ result = ROFF_IGN;
362
+ mandoc_msg(MANDOCERR_ROFFLOOP,
363
+ curp->line, pos, NULL);
364
+ goto out;
365
+ }
366
+ result = mparse_buf_r(curp, ln, of, 0);
367
+ if (line_result & ROFF_USERCALL) {
368
+ roff_userret(curp->roff);
369
+ /* Continue normally. */
370
+ if (result & ROFF_USERRET)
371
+ result = ROFF_CONT;
372
+ }
373
+ if (start == 0 && result != ROFF_CONT)
374
+ goto out;
375
+ break;
376
+ case ROFF_SO:
377
+ if ( ! (curp->options & MPARSE_SO) &&
378
+ (i >= blk.sz || blk.buf[i] == '\0')) {
379
+ curp->man->meta.sodest =
380
+ mandoc_strdup(ln.buf + of);
381
+ goto out;
382
+ }
383
+ if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
384
+ mparse_readfd(curp, fd, ln.buf + of);
385
+ close(fd);
386
+ } else {
387
+ mandoc_msg(MANDOCERR_SO_FAIL,
388
+ curp->line, of, ".so %s: %s",
389
+ ln.buf + of, strerror(errno));
390
+ ln.sz = mandoc_asprintf(&cp,
391
+ ".sp\nSee the file %s.\n.sp",
392
+ ln.buf + of);
393
+ free(ln.buf);
394
+ ln.buf = cp;
395
+ of = 0;
396
+ mparse_buf_r(curp, ln, of, 0);
397
+ }
398
+ break;
399
+ default:
400
+ abort();
401
+ }
402
+
403
+ /* Start the next input line. */
404
+
405
+ if (loop != NULL &&
406
+ (line_result & ROFF_LOOPMASK) == ROFF_IGN)
407
+ loop = loop->next;
408
+
409
+ if (loop != NULL) {
410
+ if ((line_result & ROFF_APPEND) == 0)
411
+ *ln.buf = '\0';
412
+ if (ln.sz < loop->sz)
413
+ resize_buf(&ln, loop->sz);
414
+ (void)strlcat(ln.buf, loop->buf, ln.sz);
415
+ of = 0;
416
+ goto rerun;
417
+ }
418
+
419
+ pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
420
+ }
421
+ out:
422
+ if (inloop) {
423
+ if (result != ROFF_USERRET)
424
+ mandoc_msg(MANDOCERR_WHILE_OUTOF,
425
+ curp->line, pos, NULL);
426
+ curp->loop = NULL;
427
+ }
428
+ free(ln.buf);
429
+ if (firstln != curp->secondary)
430
+ free_buf_list(firstln);
431
+ return result;
432
+ }
433
+
434
+ static int
435
+ read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap)
436
+ {
437
+ struct stat st;
438
+ gzFile gz;
439
+ size_t off;
440
+ ssize_t ssz;
441
+ int gzerrnum, retval;
442
+
443
+ if (fstat(fd, &st) == -1) {
444
+ mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno));
445
+ return -1;
446
+ }
447
+
448
+ /*
449
+ * If we're a regular file, try just reading in the whole entry
450
+ * via mmap(). This is faster than reading it into blocks, and
451
+ * since each file is only a few bytes to begin with, I'm not
452
+ * concerned that this is going to tank any machines.
453
+ */
454
+
455
+ if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
456
+ if (st.st_size > 0x7fffffff) {
457
+ mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
458
+ return -1;
459
+ }
460
+ *with_mmap = 1;
461
+ fb->sz = (size_t)st.st_size;
462
+ fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
463
+ if (fb->buf != MAP_FAILED)
464
+ return 0;
465
+ }
466
+
467
+ if (curp->gzip) {
468
+ /*
469
+ * Duplicating the file descriptor is required
470
+ * because we will have to call gzclose(3)
471
+ * to free memory used internally by zlib,
472
+ * but that will also close the file descriptor,
473
+ * which this function must not do.
474
+ */
475
+ if ((fd = dup(fd)) == -1) {
476
+ mandoc_msg(MANDOCERR_DUP, 0, 0,
477
+ "%s", strerror(errno));
478
+ return -1;
479
+ }
480
+ if ((gz = gzdopen(fd, "rb")) == NULL) {
481
+ mandoc_msg(MANDOCERR_GZDOPEN, 0, 0,
482
+ "%s", strerror(errno));
483
+ close(fd);
484
+ return -1;
485
+ }
486
+ } else
487
+ gz = NULL;
488
+
489
+ /*
490
+ * If this isn't a regular file (like, say, stdin), then we must
491
+ * go the old way and just read things in bit by bit.
492
+ */
493
+
494
+ *with_mmap = 0;
495
+ off = 0;
496
+ retval = -1;
497
+ fb->sz = 0;
498
+ fb->buf = NULL;
499
+ for (;;) {
500
+ if (off == fb->sz) {
501
+ if (fb->sz == (1U << 31)) {
502
+ mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
503
+ break;
504
+ }
505
+ resize_buf(fb, 65536);
506
+ }
507
+ ssz = curp->gzip ?
508
+ gzread(gz, fb->buf + (int)off, fb->sz - off) :
509
+ read(fd, fb->buf + (int)off, fb->sz - off);
510
+ if (ssz == 0) {
511
+ fb->sz = off;
512
+ retval = 0;
513
+ break;
514
+ }
515
+ if (ssz == -1) {
516
+ if (curp->gzip)
517
+ (void)gzerror(gz, &gzerrnum);
518
+ mandoc_msg(MANDOCERR_READ, 0, 0, "%s",
519
+ curp->gzip && gzerrnum != Z_ERRNO ?
520
+ zError(gzerrnum) : strerror(errno));
521
+ break;
522
+ }
523
+ off += (size_t)ssz;
524
+ }
525
+
526
+ if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
527
+ mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s",
528
+ gzerrnum == Z_ERRNO ? strerror(errno) :
529
+ zError(gzerrnum));
530
+ if (retval == -1) {
531
+ free(fb->buf);
532
+ fb->buf = NULL;
533
+ }
534
+ return retval;
535
+ }
536
+
537
+ static void
538
+ mparse_end(struct mparse *curp)
539
+ {
540
+ if (curp->man->meta.macroset == MACROSET_NONE)
541
+ curp->man->meta.macroset = MACROSET_MAN;
542
+ if (curp->man->meta.macroset == MACROSET_MDOC)
543
+ mdoc_endparse(curp->man);
544
+ else
545
+ man_endparse(curp->man);
546
+ roff_endparse(curp->roff);
547
+ }
548
+
549
+ /*
550
+ * Read the whole file into memory and call the parsers.
551
+ * Called recursively when an .so request is encountered.
552
+ */
553
+ void
554
+ mparse_readfd(struct mparse *curp, int fd, const char *filename)
555
+ {
556
+ static int recursion_depth;
557
+
558
+ struct buf blk;
559
+ struct buf *save_primary;
560
+ const char *save_filename, *cp;
561
+ size_t offset;
562
+ int save_filenc, save_lineno;
563
+ int with_mmap;
564
+
565
+ if (recursion_depth > 64) {
566
+ mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL);
567
+ return;
568
+ } else if (recursion_depth == 0 &&
569
+ (cp = strrchr(filename, '.')) != NULL &&
570
+ cp[1] >= '1' && cp[1] <= '9')
571
+ curp->man->filesec = cp[1];
572
+ else
573
+ curp->man->filesec = '\0';
574
+
575
+ if (read_whole_file(curp, fd, &blk, &with_mmap) == -1)
576
+ return;
577
+
578
+ /*
579
+ * Save some properties of the parent file.
580
+ */
581
+
582
+ save_primary = curp->primary;
583
+ save_filenc = curp->filenc;
584
+ save_lineno = curp->line;
585
+ save_filename = mandoc_msg_getinfilename();
586
+
587
+ curp->primary = &blk;
588
+ curp->filenc = curp->options & (MPARSE_UTF8 | MPARSE_LATIN1);
589
+ curp->line = 1;
590
+ mandoc_msg_setinfilename(filename);
591
+
592
+ /* Skip an UTF-8 byte order mark. */
593
+ if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
594
+ (unsigned char)blk.buf[0] == 0xef &&
595
+ (unsigned char)blk.buf[1] == 0xbb &&
596
+ (unsigned char)blk.buf[2] == 0xbf) {
597
+ offset = 3;
598
+ curp->filenc &= ~MPARSE_LATIN1;
599
+ } else
600
+ offset = 0;
601
+
602
+ recursion_depth++;
603
+ mparse_buf_r(curp, blk, offset, 1);
604
+ if (--recursion_depth == 0)
605
+ mparse_end(curp);
606
+
607
+ /*
608
+ * Clean up and restore saved parent properties.
609
+ */
610
+
611
+ if (with_mmap)
612
+ munmap(blk.buf, blk.sz);
613
+ else
614
+ free(blk.buf);
615
+
616
+ curp->primary = save_primary;
617
+ curp->filenc = save_filenc;
618
+ curp->line = save_lineno;
619
+ if (save_filename != NULL)
620
+ mandoc_msg_setinfilename(save_filename);
621
+ }
622
+
623
+ int
624
+ mparse_open(struct mparse *curp, const char *file)
625
+ {
626
+ char *cp;
627
+ int fd, save_errno;
628
+
629
+ cp = strrchr(file, '.');
630
+ curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
631
+
632
+ /* First try to use the filename as it is. */
633
+
634
+ if ((fd = open(file, O_RDONLY)) != -1)
635
+ return fd;
636
+
637
+ /*
638
+ * If that doesn't work and the filename doesn't
639
+ * already end in .gz, try appending .gz.
640
+ */
641
+
642
+ if ( ! curp->gzip) {
643
+ save_errno = errno;
644
+ mandoc_asprintf(&cp, "%s.gz", file);
645
+ fd = open(cp, O_RDONLY);
646
+ free(cp);
647
+ errno = save_errno;
648
+ if (fd != -1) {
649
+ curp->gzip = 1;
650
+ return fd;
651
+ }
652
+ }
653
+
654
+ /* Neither worked, give up. */
655
+
656
+ return -1;
657
+ }
658
+
659
+ struct mparse *
660
+ mparse_alloc(int options, enum mandoc_os os_e, const char *os_s)
661
+ {
662
+ struct mparse *curp;
663
+
664
+ curp = mandoc_calloc(1, sizeof(struct mparse));
665
+
666
+ curp->options = options;
667
+ curp->os_s = os_s;
668
+
669
+ curp->roff = roff_alloc(options);
670
+ curp->man = roff_man_alloc(curp->roff, curp->os_s,
671
+ curp->options & MPARSE_QUICK ? 1 : 0);
672
+ if (curp->options & MPARSE_MDOC) {
673
+ curp->man->meta.macroset = MACROSET_MDOC;
674
+ if (curp->man->mdocmac == NULL)
675
+ curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
676
+ } else if (curp->options & MPARSE_MAN) {
677
+ curp->man->meta.macroset = MACROSET_MAN;
678
+ if (curp->man->manmac == NULL)
679
+ curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
680
+ }
681
+ curp->man->meta.first->tok = TOKEN_NONE;
682
+ curp->man->meta.os_e = os_e;
683
+ tag_alloc();
684
+ return curp;
685
+ }
686
+
687
+ void
688
+ mparse_reset(struct mparse *curp)
689
+ {
690
+ tag_free();
691
+ roff_reset(curp->roff);
692
+ roff_man_reset(curp->man);
693
+ free_buf_list(curp->secondary);
694
+ curp->secondary = NULL;
695
+ curp->gzip = 0;
696
+ tag_alloc();
697
+ }
698
+
699
+ void
700
+ mparse_free(struct mparse *curp)
701
+ {
702
+ tag_free();
703
+ roffhash_free(curp->man->mdocmac);
704
+ roffhash_free(curp->man->manmac);
705
+ roff_man_free(curp->man);
706
+ roff_free(curp->roff);
707
+ free_buf_list(curp->secondary);
708
+ free(curp);
709
+ }
710
+
711
+ struct roff_meta *
712
+ mparse_result(struct mparse *curp)
713
+ {
714
+ roff_state_reset(curp->man);
715
+ if (curp->options & MPARSE_VALIDATE) {
716
+ if (curp->man->meta.macroset == MACROSET_MDOC)
717
+ mdoc_validate(curp->man);
718
+ else
719
+ man_validate(curp->man);
720
+ tag_postprocess(curp->man, curp->man->meta.first);
721
+ }
722
+ return &curp->man->meta;
723
+ }
724
+
725
+ void
726
+ mparse_copy(const struct mparse *p)
727
+ {
728
+ struct buf *buf;
729
+
730
+ for (buf = p->secondary; buf != NULL; buf = buf->next)
731
+ puts(buf->buf);
732
+ }