mandoc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/COPYING +674 -0
- data/README.md +117 -0
- data/ext/mandoc/extconf.rb +59 -0
- data/ext/mandoc/rb_mandoc.c +548 -0
- data/ext/mandoc/rb_mandoc.h +22 -0
- data/lib/mandoc/version.rb +19 -0
- data/lib/mandoc.rb +26 -0
- data/mandoc-1.14.6/LICENSE +55 -0
- data/mandoc-1.14.6/arch.c +54 -0
- data/mandoc-1.14.6/att.c +49 -0
- data/mandoc-1.14.6/catman.c +260 -0
- data/mandoc-1.14.6/cgi.c +1279 -0
- data/mandoc-1.14.6/chars.c +507 -0
- data/mandoc-1.14.6/compat_err.c +103 -0
- data/mandoc-1.14.6/compat_fts.c +696 -0
- data/mandoc-1.14.6/compat_fts.h +106 -0
- data/mandoc-1.14.6/compat_getline.c +59 -0
- data/mandoc-1.14.6/compat_getsubopt.c +87 -0
- data/mandoc-1.14.6/compat_isblank.c +23 -0
- data/mandoc-1.14.6/compat_mkdtemp.c +50 -0
- data/mandoc-1.14.6/compat_mkstemps.c +63 -0
- data/mandoc-1.14.6/compat_ohash.c +330 -0
- data/mandoc-1.14.6/compat_ohash.h +72 -0
- data/mandoc-1.14.6/compat_progname.c +31 -0
- data/mandoc-1.14.6/compat_reallocarray.c +40 -0
- data/mandoc-1.14.6/compat_recallocarray.c +99 -0
- data/mandoc-1.14.6/compat_strcasestr.c +64 -0
- data/mandoc-1.14.6/compat_stringlist.c +135 -0
- data/mandoc-1.14.6/compat_stringlist.h +48 -0
- data/mandoc-1.14.6/compat_strlcat.c +57 -0
- data/mandoc-1.14.6/compat_strlcpy.c +52 -0
- data/mandoc-1.14.6/compat_strndup.c +42 -0
- data/mandoc-1.14.6/compat_strsep.c +70 -0
- data/mandoc-1.14.6/compat_strtonum.c +67 -0
- data/mandoc-1.14.6/compat_vasprintf.c +47 -0
- data/mandoc-1.14.6/config.h +52 -0
- data/mandoc-1.14.6/dba.c +508 -0
- data/mandoc-1.14.6/dba.h +50 -0
- data/mandoc-1.14.6/dba_array.c +190 -0
- data/mandoc-1.14.6/dba_array.h +47 -0
- data/mandoc-1.14.6/dba_read.c +74 -0
- data/mandoc-1.14.6/dba_write.c +127 -0
- data/mandoc-1.14.6/dba_write.h +30 -0
- data/mandoc-1.14.6/dbm.c +480 -0
- data/mandoc-1.14.6/dbm.h +68 -0
- data/mandoc-1.14.6/dbm_map.c +194 -0
- data/mandoc-1.14.6/dbm_map.h +29 -0
- data/mandoc-1.14.6/demandoc.c +260 -0
- data/mandoc-1.14.6/eqn.c +1132 -0
- data/mandoc-1.14.6/eqn.h +72 -0
- data/mandoc-1.14.6/eqn_html.c +246 -0
- data/mandoc-1.14.6/eqn_parse.h +48 -0
- data/mandoc-1.14.6/eqn_term.c +174 -0
- data/mandoc-1.14.6/html.c +1102 -0
- data/mandoc-1.14.6/html.h +142 -0
- data/mandoc-1.14.6/lib.c +35 -0
- data/mandoc-1.14.6/libman.h +42 -0
- data/mandoc-1.14.6/libmandoc.h +85 -0
- data/mandoc-1.14.6/libmdoc.h +87 -0
- data/mandoc-1.14.6/main.c +1375 -0
- data/mandoc-1.14.6/main.h +53 -0
- data/mandoc-1.14.6/man.c +345 -0
- data/mandoc-1.14.6/man.h +21 -0
- data/mandoc-1.14.6/man_html.c +640 -0
- data/mandoc-1.14.6/man_macro.c +470 -0
- data/mandoc-1.14.6/man_term.c +1143 -0
- data/mandoc-1.14.6/man_validate.c +660 -0
- data/mandoc-1.14.6/manconf.h +58 -0
- data/mandoc-1.14.6/mandoc.c +669 -0
- data/mandoc-1.14.6/mandoc.h +329 -0
- data/mandoc-1.14.6/mandoc_aux.c +118 -0
- data/mandoc-1.14.6/mandoc_aux.h +27 -0
- data/mandoc-1.14.6/mandoc_msg.c +375 -0
- data/mandoc-1.14.6/mandoc_ohash.c +65 -0
- data/mandoc-1.14.6/mandoc_ohash.h +23 -0
- data/mandoc-1.14.6/mandoc_parse.h +44 -0
- data/mandoc-1.14.6/mandoc_xr.c +123 -0
- data/mandoc-1.14.6/mandoc_xr.h +31 -0
- data/mandoc-1.14.6/mandocd.c +282 -0
- data/mandoc-1.14.6/mandocdb.c +2448 -0
- data/mandoc-1.14.6/manpath.c +363 -0
- data/mandoc-1.14.6/mansearch.c +851 -0
- data/mandoc-1.14.6/mansearch.h +118 -0
- data/mandoc-1.14.6/mdoc.c +433 -0
- data/mandoc-1.14.6/mdoc.h +158 -0
- data/mandoc-1.14.6/mdoc_argv.c +682 -0
- data/mandoc-1.14.6/mdoc_html.c +1762 -0
- data/mandoc-1.14.6/mdoc_macro.c +1600 -0
- data/mandoc-1.14.6/mdoc_man.c +1850 -0
- data/mandoc-1.14.6/mdoc_markdown.c +1610 -0
- data/mandoc-1.14.6/mdoc_state.c +256 -0
- data/mandoc-1.14.6/mdoc_term.c +1964 -0
- data/mandoc-1.14.6/mdoc_validate.c +3062 -0
- data/mandoc-1.14.6/msec.c +37 -0
- data/mandoc-1.14.6/out.c +544 -0
- data/mandoc-1.14.6/out.h +70 -0
- data/mandoc-1.14.6/preconv.c +179 -0
- data/mandoc-1.14.6/read.c +732 -0
- data/mandoc-1.14.6/roff.c +4390 -0
- data/mandoc-1.14.6/roff.h +561 -0
- data/mandoc-1.14.6/roff_html.c +119 -0
- data/mandoc-1.14.6/roff_int.h +94 -0
- data/mandoc-1.14.6/roff_term.c +266 -0
- data/mandoc-1.14.6/roff_validate.c +151 -0
- data/mandoc-1.14.6/soelim.c +182 -0
- data/mandoc-1.14.6/st.c +82 -0
- data/mandoc-1.14.6/tag.c +327 -0
- data/mandoc-1.14.6/tag.h +35 -0
- data/mandoc-1.14.6/tbl.c +183 -0
- data/mandoc-1.14.6/tbl.h +121 -0
- data/mandoc-1.14.6/tbl_data.c +323 -0
- data/mandoc-1.14.6/tbl_html.c +293 -0
- data/mandoc-1.14.6/tbl_int.h +47 -0
- data/mandoc-1.14.6/tbl_layout.c +376 -0
- data/mandoc-1.14.6/tbl_opts.c +173 -0
- data/mandoc-1.14.6/tbl_parse.h +30 -0
- data/mandoc-1.14.6/tbl_term.c +948 -0
- data/mandoc-1.14.6/term.c +1113 -0
- data/mandoc-1.14.6/term.h +158 -0
- data/mandoc-1.14.6/term_ascii.c +424 -0
- data/mandoc-1.14.6/term_ps.c +1362 -0
- data/mandoc-1.14.6/term_tab.c +130 -0
- data/mandoc-1.14.6/term_tag.c +227 -0
- data/mandoc-1.14.6/term_tag.h +34 -0
- data/mandoc-1.14.6/tree.c +536 -0
- metadata +170 -0
|
@@ -0,0 +1,732 @@
|
|
|
1
|
+
/* $Id: read.c,v 1.220 2021/06/27 17:57:54 schwarze Exp $ */
|
|
2
|
+
/*
|
|
3
|
+
* Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
|
|
4
|
+
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
|
|
5
|
+
* Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
|
|
6
|
+
*
|
|
7
|
+
* Permission to use, copy, modify, and distribute this software for any
|
|
8
|
+
* purpose with or without fee is hereby granted, provided that the above
|
|
9
|
+
* copyright notice and this permission notice appear in all copies.
|
|
10
|
+
*
|
|
11
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
|
|
12
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
13
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
|
|
14
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
15
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
16
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
17
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
18
|
+
*
|
|
19
|
+
* Top-level functions of the mandoc(3) parser:
|
|
20
|
+
* Parser and input encoding selection, decompression,
|
|
21
|
+
* handling of input bytes, characters, lines, and files,
|
|
22
|
+
* handling of roff(7) loops and file inclusion,
|
|
23
|
+
* and steering of the various parsers.
|
|
24
|
+
*/
|
|
25
|
+
#include "config.h"
|
|
26
|
+
|
|
27
|
+
#include <sys/types.h>
|
|
28
|
+
#include <sys/mman.h>
|
|
29
|
+
#include <sys/stat.h>
|
|
30
|
+
|
|
31
|
+
#include <assert.h>
|
|
32
|
+
#include <ctype.h>
|
|
33
|
+
#include <errno.h>
|
|
34
|
+
#include <fcntl.h>
|
|
35
|
+
#include <stdarg.h>
|
|
36
|
+
#include <stdio.h>
|
|
37
|
+
#include <stdlib.h>
|
|
38
|
+
#include <string.h>
|
|
39
|
+
#include <unistd.h>
|
|
40
|
+
#include <zlib.h>
|
|
41
|
+
|
|
42
|
+
#include "mandoc_aux.h"
|
|
43
|
+
#include "mandoc.h"
|
|
44
|
+
#include "roff.h"
|
|
45
|
+
#include "mdoc.h"
|
|
46
|
+
#include "man.h"
|
|
47
|
+
#include "mandoc_parse.h"
|
|
48
|
+
#include "libmandoc.h"
|
|
49
|
+
#include "roff_int.h"
|
|
50
|
+
#include "tag.h"
|
|
51
|
+
|
|
52
|
+
#define REPARSE_LIMIT 1000
|
|
53
|
+
|
|
54
|
+
struct mparse {
|
|
55
|
+
struct roff *roff; /* roff parser (!NULL) */
|
|
56
|
+
struct roff_man *man; /* man parser */
|
|
57
|
+
struct buf *primary; /* buffer currently being parsed */
|
|
58
|
+
struct buf *secondary; /* copy of top level input */
|
|
59
|
+
struct buf *loop; /* open .while request line */
|
|
60
|
+
const char *os_s; /* default operating system */
|
|
61
|
+
int options; /* parser options */
|
|
62
|
+
int gzip; /* current input file is gzipped */
|
|
63
|
+
int filenc; /* encoding of the current file */
|
|
64
|
+
int reparse_count; /* finite interp. stack */
|
|
65
|
+
int line; /* line number in the file */
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
static void choose_parser(struct mparse *);
|
|
69
|
+
static void free_buf_list(struct buf *);
|
|
70
|
+
static void resize_buf(struct buf *, size_t);
|
|
71
|
+
static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
|
|
72
|
+
static int read_whole_file(struct mparse *, int, struct buf *, int *);
|
|
73
|
+
static void mparse_end(struct mparse *);
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
static void
|
|
77
|
+
resize_buf(struct buf *buf, size_t initial)
|
|
78
|
+
{
|
|
79
|
+
|
|
80
|
+
buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
|
|
81
|
+
buf->buf = mandoc_realloc(buf->buf, buf->sz);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
static void
|
|
85
|
+
free_buf_list(struct buf *buf)
|
|
86
|
+
{
|
|
87
|
+
struct buf *tmp;
|
|
88
|
+
|
|
89
|
+
while (buf != NULL) {
|
|
90
|
+
tmp = buf;
|
|
91
|
+
buf = tmp->next;
|
|
92
|
+
free(tmp->buf);
|
|
93
|
+
free(tmp);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
static void
|
|
98
|
+
choose_parser(struct mparse *curp)
|
|
99
|
+
{
|
|
100
|
+
char *cp, *ep;
|
|
101
|
+
int format;
|
|
102
|
+
|
|
103
|
+
/*
|
|
104
|
+
* If neither command line arguments -mdoc or -man select
|
|
105
|
+
* a parser nor the roff parser found a .Dd or .TH macro
|
|
106
|
+
* yet, look ahead in the main input buffer.
|
|
107
|
+
*/
|
|
108
|
+
|
|
109
|
+
if ((format = roff_getformat(curp->roff)) == 0) {
|
|
110
|
+
cp = curp->primary->buf;
|
|
111
|
+
ep = cp + curp->primary->sz;
|
|
112
|
+
while (cp < ep) {
|
|
113
|
+
if (*cp == '.' || *cp == '\'') {
|
|
114
|
+
cp++;
|
|
115
|
+
if (cp[0] == 'D' && cp[1] == 'd') {
|
|
116
|
+
format = MPARSE_MDOC;
|
|
117
|
+
break;
|
|
118
|
+
}
|
|
119
|
+
if (cp[0] == 'T' && cp[1] == 'H') {
|
|
120
|
+
format = MPARSE_MAN;
|
|
121
|
+
break;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
cp = memchr(cp, '\n', ep - cp);
|
|
125
|
+
if (cp == NULL)
|
|
126
|
+
break;
|
|
127
|
+
cp++;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (format == MPARSE_MDOC) {
|
|
132
|
+
curp->man->meta.macroset = MACROSET_MDOC;
|
|
133
|
+
if (curp->man->mdocmac == NULL)
|
|
134
|
+
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
|
|
135
|
+
} else {
|
|
136
|
+
curp->man->meta.macroset = MACROSET_MAN;
|
|
137
|
+
if (curp->man->manmac == NULL)
|
|
138
|
+
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
|
|
139
|
+
}
|
|
140
|
+
curp->man->meta.first->tok = TOKEN_NONE;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/*
|
|
144
|
+
* Main parse routine for a buffer.
|
|
145
|
+
* It assumes encoding and line numbering are already set up.
|
|
146
|
+
* It can recurse directly (for invocations of user-defined
|
|
147
|
+
* macros, inline equations, and input line traps)
|
|
148
|
+
* and indirectly (for .so file inclusion).
|
|
149
|
+
*/
|
|
150
|
+
static int
|
|
151
|
+
mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
|
|
152
|
+
{
|
|
153
|
+
struct buf ln;
|
|
154
|
+
struct buf *firstln, *lastln, *thisln, *loop;
|
|
155
|
+
char *cp;
|
|
156
|
+
size_t pos; /* byte number in the ln buffer */
|
|
157
|
+
size_t spos; /* at the start of the current line parse */
|
|
158
|
+
int line_result, result;
|
|
159
|
+
int of;
|
|
160
|
+
int lnn; /* line number in the real file */
|
|
161
|
+
int fd;
|
|
162
|
+
int inloop; /* Saw .while on this level. */
|
|
163
|
+
unsigned char c;
|
|
164
|
+
|
|
165
|
+
ln.sz = 256;
|
|
166
|
+
ln.buf = mandoc_malloc(ln.sz);
|
|
167
|
+
ln.next = NULL;
|
|
168
|
+
firstln = lastln = loop = NULL;
|
|
169
|
+
lnn = curp->line;
|
|
170
|
+
pos = 0;
|
|
171
|
+
inloop = 0;
|
|
172
|
+
result = ROFF_CONT;
|
|
173
|
+
|
|
174
|
+
while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
|
|
175
|
+
if (start) {
|
|
176
|
+
curp->line = lnn;
|
|
177
|
+
curp->reparse_count = 0;
|
|
178
|
+
|
|
179
|
+
if (lnn < 3 &&
|
|
180
|
+
curp->filenc & MPARSE_UTF8 &&
|
|
181
|
+
curp->filenc & MPARSE_LATIN1)
|
|
182
|
+
curp->filenc = preconv_cue(&blk, i);
|
|
183
|
+
}
|
|
184
|
+
spos = pos;
|
|
185
|
+
|
|
186
|
+
while (i < blk.sz && (start || blk.buf[i] != '\0')) {
|
|
187
|
+
|
|
188
|
+
/*
|
|
189
|
+
* When finding an unescaped newline character,
|
|
190
|
+
* leave the character loop to process the line.
|
|
191
|
+
* Skip a preceding carriage return, if any.
|
|
192
|
+
*/
|
|
193
|
+
|
|
194
|
+
if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
|
|
195
|
+
'\n' == blk.buf[i + 1])
|
|
196
|
+
++i;
|
|
197
|
+
if ('\n' == blk.buf[i]) {
|
|
198
|
+
++i;
|
|
199
|
+
++lnn;
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/*
|
|
204
|
+
* Make sure we have space for the worst
|
|
205
|
+
* case of 12 bytes: "\\[u10ffff]\n\0"
|
|
206
|
+
*/
|
|
207
|
+
|
|
208
|
+
if (pos + 12 > ln.sz)
|
|
209
|
+
resize_buf(&ln, 256);
|
|
210
|
+
|
|
211
|
+
/*
|
|
212
|
+
* Encode 8-bit input.
|
|
213
|
+
*/
|
|
214
|
+
|
|
215
|
+
c = blk.buf[i];
|
|
216
|
+
if (c & 0x80) {
|
|
217
|
+
if ( ! (curp->filenc && preconv_encode(
|
|
218
|
+
&blk, &i, &ln, &pos, &curp->filenc))) {
|
|
219
|
+
mandoc_msg(MANDOCERR_CHAR_BAD,
|
|
220
|
+
curp->line, pos, "0x%x", c);
|
|
221
|
+
ln.buf[pos++] = '?';
|
|
222
|
+
i++;
|
|
223
|
+
}
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/*
|
|
228
|
+
* Exclude control characters.
|
|
229
|
+
*/
|
|
230
|
+
|
|
231
|
+
if (c == 0x7f || (c < 0x20 && c != 0x09)) {
|
|
232
|
+
mandoc_msg(c == 0x00 || c == 0x04 ||
|
|
233
|
+
c > 0x0a ? MANDOCERR_CHAR_BAD :
|
|
234
|
+
MANDOCERR_CHAR_UNSUPP,
|
|
235
|
+
curp->line, pos, "0x%x", c);
|
|
236
|
+
i++;
|
|
237
|
+
if (c != '\r')
|
|
238
|
+
ln.buf[pos++] = '?';
|
|
239
|
+
continue;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
ln.buf[pos++] = blk.buf[i++];
|
|
243
|
+
}
|
|
244
|
+
ln.buf[pos] = '\0';
|
|
245
|
+
|
|
246
|
+
/*
|
|
247
|
+
* Maintain a lookaside buffer of all lines.
|
|
248
|
+
* parsed from this input source.
|
|
249
|
+
*/
|
|
250
|
+
|
|
251
|
+
thisln = mandoc_malloc(sizeof(*thisln));
|
|
252
|
+
thisln->buf = mandoc_strdup(ln.buf);
|
|
253
|
+
thisln->sz = strlen(ln.buf) + 1;
|
|
254
|
+
thisln->next = NULL;
|
|
255
|
+
if (firstln == NULL) {
|
|
256
|
+
firstln = lastln = thisln;
|
|
257
|
+
if (curp->secondary == NULL)
|
|
258
|
+
curp->secondary = firstln;
|
|
259
|
+
} else {
|
|
260
|
+
lastln->next = thisln;
|
|
261
|
+
lastln = thisln;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/* XXX Ugly hack to mark the end of the input. */
|
|
265
|
+
|
|
266
|
+
if (i == blk.sz || blk.buf[i] == '\0') {
|
|
267
|
+
if (pos + 2 > ln.sz)
|
|
268
|
+
resize_buf(&ln, 256);
|
|
269
|
+
ln.buf[pos++] = '\n';
|
|
270
|
+
ln.buf[pos] = '\0';
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/*
|
|
274
|
+
* A significant amount of complexity is contained by
|
|
275
|
+
* the roff preprocessor. It's line-oriented but can be
|
|
276
|
+
* expressed on one line, so we need at times to
|
|
277
|
+
* readjust our starting point and re-run it. The roff
|
|
278
|
+
* preprocessor can also readjust the buffers with new
|
|
279
|
+
* data, so we pass them in wholesale.
|
|
280
|
+
*/
|
|
281
|
+
|
|
282
|
+
of = 0;
|
|
283
|
+
rerun:
|
|
284
|
+
line_result = roff_parseln(curp->roff, curp->line,
|
|
285
|
+
&ln, &of, start && spos == 0 ? pos : 0);
|
|
286
|
+
|
|
287
|
+
/* Process options. */
|
|
288
|
+
|
|
289
|
+
if (line_result & ROFF_APPEND)
|
|
290
|
+
assert(line_result == (ROFF_IGN | ROFF_APPEND));
|
|
291
|
+
|
|
292
|
+
if (line_result & ROFF_USERCALL)
|
|
293
|
+
assert((line_result & ROFF_MASK) == ROFF_REPARSE);
|
|
294
|
+
|
|
295
|
+
if (line_result & ROFF_USERRET) {
|
|
296
|
+
assert(line_result == (ROFF_IGN | ROFF_USERRET));
|
|
297
|
+
if (start == 0) {
|
|
298
|
+
/* Return from the current macro. */
|
|
299
|
+
result = ROFF_USERRET;
|
|
300
|
+
goto out;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
switch (line_result & ROFF_LOOPMASK) {
|
|
305
|
+
case ROFF_IGN:
|
|
306
|
+
break;
|
|
307
|
+
case ROFF_WHILE:
|
|
308
|
+
if (curp->loop != NULL) {
|
|
309
|
+
if (loop == curp->loop)
|
|
310
|
+
break;
|
|
311
|
+
mandoc_msg(MANDOCERR_WHILE_NEST,
|
|
312
|
+
curp->line, pos, NULL);
|
|
313
|
+
}
|
|
314
|
+
curp->loop = thisln;
|
|
315
|
+
loop = NULL;
|
|
316
|
+
inloop = 1;
|
|
317
|
+
break;
|
|
318
|
+
case ROFF_LOOPCONT:
|
|
319
|
+
case ROFF_LOOPEXIT:
|
|
320
|
+
if (curp->loop == NULL) {
|
|
321
|
+
mandoc_msg(MANDOCERR_WHILE_FAIL,
|
|
322
|
+
curp->line, pos, NULL);
|
|
323
|
+
break;
|
|
324
|
+
}
|
|
325
|
+
if (inloop == 0) {
|
|
326
|
+
mandoc_msg(MANDOCERR_WHILE_INTO,
|
|
327
|
+
curp->line, pos, NULL);
|
|
328
|
+
curp->loop = loop = NULL;
|
|
329
|
+
break;
|
|
330
|
+
}
|
|
331
|
+
if (line_result & ROFF_LOOPCONT)
|
|
332
|
+
loop = curp->loop;
|
|
333
|
+
else {
|
|
334
|
+
curp->loop = loop = NULL;
|
|
335
|
+
inloop = 0;
|
|
336
|
+
}
|
|
337
|
+
break;
|
|
338
|
+
default:
|
|
339
|
+
abort();
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/* Process the main instruction from the roff parser. */
|
|
343
|
+
|
|
344
|
+
switch (line_result & ROFF_MASK) {
|
|
345
|
+
case ROFF_IGN:
|
|
346
|
+
break;
|
|
347
|
+
case ROFF_CONT:
|
|
348
|
+
if (curp->man->meta.macroset == MACROSET_NONE)
|
|
349
|
+
choose_parser(curp);
|
|
350
|
+
if ((curp->man->meta.macroset == MACROSET_MDOC ?
|
|
351
|
+
mdoc_parseln(curp->man, curp->line, ln.buf, of) :
|
|
352
|
+
man_parseln(curp->man, curp->line, ln.buf, of)
|
|
353
|
+
) == 2)
|
|
354
|
+
goto out;
|
|
355
|
+
break;
|
|
356
|
+
case ROFF_RERUN:
|
|
357
|
+
goto rerun;
|
|
358
|
+
case ROFF_REPARSE:
|
|
359
|
+
if (++curp->reparse_count > REPARSE_LIMIT) {
|
|
360
|
+
/* Abort and return to the top level. */
|
|
361
|
+
result = ROFF_IGN;
|
|
362
|
+
mandoc_msg(MANDOCERR_ROFFLOOP,
|
|
363
|
+
curp->line, pos, NULL);
|
|
364
|
+
goto out;
|
|
365
|
+
}
|
|
366
|
+
result = mparse_buf_r(curp, ln, of, 0);
|
|
367
|
+
if (line_result & ROFF_USERCALL) {
|
|
368
|
+
roff_userret(curp->roff);
|
|
369
|
+
/* Continue normally. */
|
|
370
|
+
if (result & ROFF_USERRET)
|
|
371
|
+
result = ROFF_CONT;
|
|
372
|
+
}
|
|
373
|
+
if (start == 0 && result != ROFF_CONT)
|
|
374
|
+
goto out;
|
|
375
|
+
break;
|
|
376
|
+
case ROFF_SO:
|
|
377
|
+
if ( ! (curp->options & MPARSE_SO) &&
|
|
378
|
+
(i >= blk.sz || blk.buf[i] == '\0')) {
|
|
379
|
+
curp->man->meta.sodest =
|
|
380
|
+
mandoc_strdup(ln.buf + of);
|
|
381
|
+
goto out;
|
|
382
|
+
}
|
|
383
|
+
if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
|
|
384
|
+
mparse_readfd(curp, fd, ln.buf + of);
|
|
385
|
+
close(fd);
|
|
386
|
+
} else {
|
|
387
|
+
mandoc_msg(MANDOCERR_SO_FAIL,
|
|
388
|
+
curp->line, of, ".so %s: %s",
|
|
389
|
+
ln.buf + of, strerror(errno));
|
|
390
|
+
ln.sz = mandoc_asprintf(&cp,
|
|
391
|
+
".sp\nSee the file %s.\n.sp",
|
|
392
|
+
ln.buf + of);
|
|
393
|
+
free(ln.buf);
|
|
394
|
+
ln.buf = cp;
|
|
395
|
+
of = 0;
|
|
396
|
+
mparse_buf_r(curp, ln, of, 0);
|
|
397
|
+
}
|
|
398
|
+
break;
|
|
399
|
+
default:
|
|
400
|
+
abort();
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/* Start the next input line. */
|
|
404
|
+
|
|
405
|
+
if (loop != NULL &&
|
|
406
|
+
(line_result & ROFF_LOOPMASK) == ROFF_IGN)
|
|
407
|
+
loop = loop->next;
|
|
408
|
+
|
|
409
|
+
if (loop != NULL) {
|
|
410
|
+
if ((line_result & ROFF_APPEND) == 0)
|
|
411
|
+
*ln.buf = '\0';
|
|
412
|
+
if (ln.sz < loop->sz)
|
|
413
|
+
resize_buf(&ln, loop->sz);
|
|
414
|
+
(void)strlcat(ln.buf, loop->buf, ln.sz);
|
|
415
|
+
of = 0;
|
|
416
|
+
goto rerun;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
|
|
420
|
+
}
|
|
421
|
+
out:
|
|
422
|
+
if (inloop) {
|
|
423
|
+
if (result != ROFF_USERRET)
|
|
424
|
+
mandoc_msg(MANDOCERR_WHILE_OUTOF,
|
|
425
|
+
curp->line, pos, NULL);
|
|
426
|
+
curp->loop = NULL;
|
|
427
|
+
}
|
|
428
|
+
free(ln.buf);
|
|
429
|
+
if (firstln != curp->secondary)
|
|
430
|
+
free_buf_list(firstln);
|
|
431
|
+
return result;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
static int
|
|
435
|
+
read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap)
|
|
436
|
+
{
|
|
437
|
+
struct stat st;
|
|
438
|
+
gzFile gz;
|
|
439
|
+
size_t off;
|
|
440
|
+
ssize_t ssz;
|
|
441
|
+
int gzerrnum, retval;
|
|
442
|
+
|
|
443
|
+
if (fstat(fd, &st) == -1) {
|
|
444
|
+
mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno));
|
|
445
|
+
return -1;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/*
|
|
449
|
+
* If we're a regular file, try just reading in the whole entry
|
|
450
|
+
* via mmap(). This is faster than reading it into blocks, and
|
|
451
|
+
* since each file is only a few bytes to begin with, I'm not
|
|
452
|
+
* concerned that this is going to tank any machines.
|
|
453
|
+
*/
|
|
454
|
+
|
|
455
|
+
if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
|
|
456
|
+
if (st.st_size > 0x7fffffff) {
|
|
457
|
+
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
|
|
458
|
+
return -1;
|
|
459
|
+
}
|
|
460
|
+
*with_mmap = 1;
|
|
461
|
+
fb->sz = (size_t)st.st_size;
|
|
462
|
+
fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
|
|
463
|
+
if (fb->buf != MAP_FAILED)
|
|
464
|
+
return 0;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
if (curp->gzip) {
|
|
468
|
+
/*
|
|
469
|
+
* Duplicating the file descriptor is required
|
|
470
|
+
* because we will have to call gzclose(3)
|
|
471
|
+
* to free memory used internally by zlib,
|
|
472
|
+
* but that will also close the file descriptor,
|
|
473
|
+
* which this function must not do.
|
|
474
|
+
*/
|
|
475
|
+
if ((fd = dup(fd)) == -1) {
|
|
476
|
+
mandoc_msg(MANDOCERR_DUP, 0, 0,
|
|
477
|
+
"%s", strerror(errno));
|
|
478
|
+
return -1;
|
|
479
|
+
}
|
|
480
|
+
if ((gz = gzdopen(fd, "rb")) == NULL) {
|
|
481
|
+
mandoc_msg(MANDOCERR_GZDOPEN, 0, 0,
|
|
482
|
+
"%s", strerror(errno));
|
|
483
|
+
close(fd);
|
|
484
|
+
return -1;
|
|
485
|
+
}
|
|
486
|
+
} else
|
|
487
|
+
gz = NULL;
|
|
488
|
+
|
|
489
|
+
/*
|
|
490
|
+
* If this isn't a regular file (like, say, stdin), then we must
|
|
491
|
+
* go the old way and just read things in bit by bit.
|
|
492
|
+
*/
|
|
493
|
+
|
|
494
|
+
*with_mmap = 0;
|
|
495
|
+
off = 0;
|
|
496
|
+
retval = -1;
|
|
497
|
+
fb->sz = 0;
|
|
498
|
+
fb->buf = NULL;
|
|
499
|
+
for (;;) {
|
|
500
|
+
if (off == fb->sz) {
|
|
501
|
+
if (fb->sz == (1U << 31)) {
|
|
502
|
+
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
|
|
503
|
+
break;
|
|
504
|
+
}
|
|
505
|
+
resize_buf(fb, 65536);
|
|
506
|
+
}
|
|
507
|
+
ssz = curp->gzip ?
|
|
508
|
+
gzread(gz, fb->buf + (int)off, fb->sz - off) :
|
|
509
|
+
read(fd, fb->buf + (int)off, fb->sz - off);
|
|
510
|
+
if (ssz == 0) {
|
|
511
|
+
fb->sz = off;
|
|
512
|
+
retval = 0;
|
|
513
|
+
break;
|
|
514
|
+
}
|
|
515
|
+
if (ssz == -1) {
|
|
516
|
+
if (curp->gzip)
|
|
517
|
+
(void)gzerror(gz, &gzerrnum);
|
|
518
|
+
mandoc_msg(MANDOCERR_READ, 0, 0, "%s",
|
|
519
|
+
curp->gzip && gzerrnum != Z_ERRNO ?
|
|
520
|
+
zError(gzerrnum) : strerror(errno));
|
|
521
|
+
break;
|
|
522
|
+
}
|
|
523
|
+
off += (size_t)ssz;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
|
|
527
|
+
mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s",
|
|
528
|
+
gzerrnum == Z_ERRNO ? strerror(errno) :
|
|
529
|
+
zError(gzerrnum));
|
|
530
|
+
if (retval == -1) {
|
|
531
|
+
free(fb->buf);
|
|
532
|
+
fb->buf = NULL;
|
|
533
|
+
}
|
|
534
|
+
return retval;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
static void
|
|
538
|
+
mparse_end(struct mparse *curp)
|
|
539
|
+
{
|
|
540
|
+
if (curp->man->meta.macroset == MACROSET_NONE)
|
|
541
|
+
curp->man->meta.macroset = MACROSET_MAN;
|
|
542
|
+
if (curp->man->meta.macroset == MACROSET_MDOC)
|
|
543
|
+
mdoc_endparse(curp->man);
|
|
544
|
+
else
|
|
545
|
+
man_endparse(curp->man);
|
|
546
|
+
roff_endparse(curp->roff);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
/*
|
|
550
|
+
* Read the whole file into memory and call the parsers.
|
|
551
|
+
* Called recursively when an .so request is encountered.
|
|
552
|
+
*/
|
|
553
|
+
void
|
|
554
|
+
mparse_readfd(struct mparse *curp, int fd, const char *filename)
|
|
555
|
+
{
|
|
556
|
+
static int recursion_depth;
|
|
557
|
+
|
|
558
|
+
struct buf blk;
|
|
559
|
+
struct buf *save_primary;
|
|
560
|
+
const char *save_filename, *cp;
|
|
561
|
+
size_t offset;
|
|
562
|
+
int save_filenc, save_lineno;
|
|
563
|
+
int with_mmap;
|
|
564
|
+
|
|
565
|
+
if (recursion_depth > 64) {
|
|
566
|
+
mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL);
|
|
567
|
+
return;
|
|
568
|
+
} else if (recursion_depth == 0 &&
|
|
569
|
+
(cp = strrchr(filename, '.')) != NULL &&
|
|
570
|
+
cp[1] >= '1' && cp[1] <= '9')
|
|
571
|
+
curp->man->filesec = cp[1];
|
|
572
|
+
else
|
|
573
|
+
curp->man->filesec = '\0';
|
|
574
|
+
|
|
575
|
+
if (read_whole_file(curp, fd, &blk, &with_mmap) == -1)
|
|
576
|
+
return;
|
|
577
|
+
|
|
578
|
+
/*
|
|
579
|
+
* Save some properties of the parent file.
|
|
580
|
+
*/
|
|
581
|
+
|
|
582
|
+
save_primary = curp->primary;
|
|
583
|
+
save_filenc = curp->filenc;
|
|
584
|
+
save_lineno = curp->line;
|
|
585
|
+
save_filename = mandoc_msg_getinfilename();
|
|
586
|
+
|
|
587
|
+
curp->primary = &blk;
|
|
588
|
+
curp->filenc = curp->options & (MPARSE_UTF8 | MPARSE_LATIN1);
|
|
589
|
+
curp->line = 1;
|
|
590
|
+
mandoc_msg_setinfilename(filename);
|
|
591
|
+
|
|
592
|
+
/* Skip an UTF-8 byte order mark. */
|
|
593
|
+
if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
|
|
594
|
+
(unsigned char)blk.buf[0] == 0xef &&
|
|
595
|
+
(unsigned char)blk.buf[1] == 0xbb &&
|
|
596
|
+
(unsigned char)blk.buf[2] == 0xbf) {
|
|
597
|
+
offset = 3;
|
|
598
|
+
curp->filenc &= ~MPARSE_LATIN1;
|
|
599
|
+
} else
|
|
600
|
+
offset = 0;
|
|
601
|
+
|
|
602
|
+
recursion_depth++;
|
|
603
|
+
mparse_buf_r(curp, blk, offset, 1);
|
|
604
|
+
if (--recursion_depth == 0)
|
|
605
|
+
mparse_end(curp);
|
|
606
|
+
|
|
607
|
+
/*
|
|
608
|
+
* Clean up and restore saved parent properties.
|
|
609
|
+
*/
|
|
610
|
+
|
|
611
|
+
if (with_mmap)
|
|
612
|
+
munmap(blk.buf, blk.sz);
|
|
613
|
+
else
|
|
614
|
+
free(blk.buf);
|
|
615
|
+
|
|
616
|
+
curp->primary = save_primary;
|
|
617
|
+
curp->filenc = save_filenc;
|
|
618
|
+
curp->line = save_lineno;
|
|
619
|
+
if (save_filename != NULL)
|
|
620
|
+
mandoc_msg_setinfilename(save_filename);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
int
|
|
624
|
+
mparse_open(struct mparse *curp, const char *file)
|
|
625
|
+
{
|
|
626
|
+
char *cp;
|
|
627
|
+
int fd, save_errno;
|
|
628
|
+
|
|
629
|
+
cp = strrchr(file, '.');
|
|
630
|
+
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
|
|
631
|
+
|
|
632
|
+
/* First try to use the filename as it is. */
|
|
633
|
+
|
|
634
|
+
if ((fd = open(file, O_RDONLY)) != -1)
|
|
635
|
+
return fd;
|
|
636
|
+
|
|
637
|
+
/*
|
|
638
|
+
* If that doesn't work and the filename doesn't
|
|
639
|
+
* already end in .gz, try appending .gz.
|
|
640
|
+
*/
|
|
641
|
+
|
|
642
|
+
if ( ! curp->gzip) {
|
|
643
|
+
save_errno = errno;
|
|
644
|
+
mandoc_asprintf(&cp, "%s.gz", file);
|
|
645
|
+
fd = open(cp, O_RDONLY);
|
|
646
|
+
free(cp);
|
|
647
|
+
errno = save_errno;
|
|
648
|
+
if (fd != -1) {
|
|
649
|
+
curp->gzip = 1;
|
|
650
|
+
return fd;
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
/* Neither worked, give up. */
|
|
655
|
+
|
|
656
|
+
return -1;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
struct mparse *
|
|
660
|
+
mparse_alloc(int options, enum mandoc_os os_e, const char *os_s)
|
|
661
|
+
{
|
|
662
|
+
struct mparse *curp;
|
|
663
|
+
|
|
664
|
+
curp = mandoc_calloc(1, sizeof(struct mparse));
|
|
665
|
+
|
|
666
|
+
curp->options = options;
|
|
667
|
+
curp->os_s = os_s;
|
|
668
|
+
|
|
669
|
+
curp->roff = roff_alloc(options);
|
|
670
|
+
curp->man = roff_man_alloc(curp->roff, curp->os_s,
|
|
671
|
+
curp->options & MPARSE_QUICK ? 1 : 0);
|
|
672
|
+
if (curp->options & MPARSE_MDOC) {
|
|
673
|
+
curp->man->meta.macroset = MACROSET_MDOC;
|
|
674
|
+
if (curp->man->mdocmac == NULL)
|
|
675
|
+
curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
|
|
676
|
+
} else if (curp->options & MPARSE_MAN) {
|
|
677
|
+
curp->man->meta.macroset = MACROSET_MAN;
|
|
678
|
+
if (curp->man->manmac == NULL)
|
|
679
|
+
curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
|
|
680
|
+
}
|
|
681
|
+
curp->man->meta.first->tok = TOKEN_NONE;
|
|
682
|
+
curp->man->meta.os_e = os_e;
|
|
683
|
+
tag_alloc();
|
|
684
|
+
return curp;
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
void
|
|
688
|
+
mparse_reset(struct mparse *curp)
|
|
689
|
+
{
|
|
690
|
+
tag_free();
|
|
691
|
+
roff_reset(curp->roff);
|
|
692
|
+
roff_man_reset(curp->man);
|
|
693
|
+
free_buf_list(curp->secondary);
|
|
694
|
+
curp->secondary = NULL;
|
|
695
|
+
curp->gzip = 0;
|
|
696
|
+
tag_alloc();
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
void
|
|
700
|
+
mparse_free(struct mparse *curp)
|
|
701
|
+
{
|
|
702
|
+
tag_free();
|
|
703
|
+
roffhash_free(curp->man->mdocmac);
|
|
704
|
+
roffhash_free(curp->man->manmac);
|
|
705
|
+
roff_man_free(curp->man);
|
|
706
|
+
roff_free(curp->roff);
|
|
707
|
+
free_buf_list(curp->secondary);
|
|
708
|
+
free(curp);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
struct roff_meta *
|
|
712
|
+
mparse_result(struct mparse *curp)
|
|
713
|
+
{
|
|
714
|
+
roff_state_reset(curp->man);
|
|
715
|
+
if (curp->options & MPARSE_VALIDATE) {
|
|
716
|
+
if (curp->man->meta.macroset == MACROSET_MDOC)
|
|
717
|
+
mdoc_validate(curp->man);
|
|
718
|
+
else
|
|
719
|
+
man_validate(curp->man);
|
|
720
|
+
tag_postprocess(curp->man, curp->man->meta.first);
|
|
721
|
+
}
|
|
722
|
+
return &curp->man->meta;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
void
|
|
726
|
+
mparse_copy(const struct mparse *p)
|
|
727
|
+
{
|
|
728
|
+
struct buf *buf;
|
|
729
|
+
|
|
730
|
+
for (buf = p->secondary; buf != NULL; buf = buf->next)
|
|
731
|
+
puts(buf->buf);
|
|
732
|
+
}
|